Ejemplo n.º 1
0
def test_stats():

    table = (('foo', 'bar', 'baz'), ('A', 1, 2), ('B', '2', '3.4'),
             (u'B', u'3', u'7.8', True), ('D', 'xyz', 9.0), ('E', None))

    result = stats(table, 'bar')
    assert result['min'] == 1.0
    assert result['max'] == 3.0
    assert result['sum'] == 6.0
    assert result['count'] == 3
    assert result['errors'] == 2
    assert result['mean'] == 2.0
Ejemplo n.º 2
0
def test_stats():

    table = (('foo', 'bar', 'baz'),
             ('A', 1, 2),
             ('B', '2', '3.4'),
             ('B', '3', '7.8', True),
             ('D', 'xyz', 9.0),
             ('E', None))

    result = stats(table, 'bar')    
    assert result['min'] == 1.0
    assert result['max'] == 3.0
    assert result['sum'] == 6.0
    assert result['count'] == 3
    assert result['errors'] == 2
    assert result['mean'] == 2.0
Ejemplo n.º 3
0
def test_stats():

    table = (
        ("foo", "bar", "baz"),
        ("A", 1, 2),
        ("B", "2", "3.4"),
        (u"B", u"3", u"7.8", True),
        ("D", "xyz", 9.0),
        ("E", None),
    )

    result = stats(table, "bar")
    assert result["min"] == 1.0
    assert result["max"] == 3.0
    assert result["sum"] == 6.0
    assert result["count"] == 3
    assert result["errors"] == 2
    assert result["mean"] == 2.0
Ejemplo n.º 4
0
    def upload_scores(self,
                      tbl,
                      config,
                      url_type,
                      id_type='vanid',
                      email=None,
                      auto_approve=True,
                      approve_tolerance=.1,
                      **url_kwargs):
        """
        Upload scores. Use to create or overwrite scores. Multiple score loads
        should be configured in a single call. [1]_

        `Args:`
            tbl: object
                A parsons.Table object. The table must contain the scores and first column in the
                table must contain the primary key (e.g. vanid).
            config: list
                The score configuration. A list of dictionaries in which you specify the following

                .. list-table::
                    :widths: 20 80
                    :header-rows: 0

                    * - ``score_column``
                      - The name of the column where the score is housed.
                    * - ``score_id``
                      - The score slot id.

                Example:

                .. highlight:: python
                .. code-block:: python

                  [{'score1_id' : int, score1_column': str}
                   {'score2_id' : int, score2_column': str}]

            url_type: str
                The cloud file storage to use to post the file (``S3`` or ``GCS``).
                See :ref:`Cloud Storage <cloud-storage>` for more details.
            email: str
                An email address to send job load status updates.
            auto_approve: boolean
                If the scores are within the expected tolerance of deviation from the
                average values provided, then score will be automatically approved.
            approve_tolderance: float
                The deviation from the average scores allowed in order to automatically
                approve the score. Maximum of .1.
            **url_kwargs: kwargs
                Arguments to configure your cloud storage url type. See
                :ref:`Cloud Storage <cloud-storage>` for more details.
        `Returns:`
            int
               The score load job id.

        .. [1] NGPVAN asks that you load multiple scores in a single call to reduce the load
           on their servers.
        """

        # Move to cloud storage
        file_name = str(uuid.uuid1())
        url = cloud_storage.post_file(tbl,
                                      url_type,
                                      file_path=file_name + '.zip',
                                      **url_kwargs)
        logger.info(f'Table uploaded to {url_type}.')

        # Generate shell request
        json = {
            "description": 'A description',
            "file": {
                "columnDelimiter": 'csv',
                "columns": [{
                    'name': c
                } for c in tbl.columns],
                "fileName": file_name + '.csv',
                "hasHeader": "True",
                "hasQuotes": "False",
                "sourceUrl": url
            },
            "actions": []
        }

        # Configure each score
        for i in config:
            action = {
                "actionType": "score",
                "personIdColumn": tbl.columns[0],
                "personIdType": id_type,
                "scoreColumn": i['score_column'],
                "scoreId": i['score_id']
            }

            if auto_approve:
                average = petl.stats(tbl.table, i['score_column']).mean
                action['approvalCriteria'] = {
                    "average": average,
                    "tolerance": approve_tolerance
                }

            json['actions'].append(action)

        # Add email listener
        if email:
            json['listeners'] = [{"type": "EMAIL", 'value': email}]

        # Upload scores
        r = self.connection.post_request('fileLoadingJobs', json=json)
        logger.info(f"Scores job {r['jobId']} created.")
        return r['jobId']
Ejemplo n.º 5
0
from __future__ import division, print_function, absolute_import


# limits()
##########

import petl as etl
table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]]
minv, maxv = etl.limits(table, 'bar')
minv
maxv


# stats()
#########

import petl as etl
table = [['foo', 'bar', 'baz'],
         ['A', 1, 2],
         ['B', '2', '3.4'],
         [u'B', u'3', u'7.8', True],
         ['D', 'xyz', 9.0],
         ['E', None]]
etl.stats(table, 'bar')