コード例 #1
0
    def to_s3_csv(self, bucket, key, aws_access_key_id=None,
                  aws_secret_access_key=None, compression=None, encoding=None,
                  errors='strict', write_header=True, public_url=False,
                  public_url_expires=3600, **csvargs):
        """
        Writes the table to an s3 object as a CSV

        `Args:`
            bucket: str
                The s3 bucket to upload to
            key: str
                The s3 key to name the file. If it ends in '.gz' or '.zip', the file will be
                compressed.
            aws_access_key_id: str
                Required if not included as environmental variable
            aws_secret_access_key: str
                Required if not included as environmental variable
            compression: str
                The compression type for the s3 object. Currently "None", "zip" and "gzip" are
                supported. If specified, will override the key suffix.
            encoding: str
                The CSV encoding type for `csv.writer()
                <https://docs.python.org/2/library/csv.html#csv.writer/>`_
            errors: str
                Raise an Error if encountered
            write_header: boolean
                Include header in output
            public_url: boolean
                Create a public link to the file
            public_url_expire: 3600
                The time, in seconds, until the url expires if ``public_url`` set to ``True``.
            \**csvargs: kwargs
                ``csv_writer`` optional arguments
        `Returns:`
            Public url if specified. If not ``None``.
        """  # noqa: W605

        compression = compression or files.compression_type_for_path(key)

        csv_name = files.extract_file_name(key, include_suffix=False) + '.csv'

        # Save the CSV as a temp file
        local_path = self.to_csv(temp_file_compression=compression,
                                 encoding=encoding,
                                 errors=errors,
                                 write_header=write_header,
                                 csv_name=csv_name,
                                 **csvargs)

        # Put the file on S3
        from parsons import S3
        self.s3 = S3(aws_access_key_id=aws_access_key_id,
                     aws_secret_access_key=aws_secret_access_key)
        self.s3.put_file(bucket, key, local_path)

        if public_url:
            return self.s3.get_url(bucket, key, expires_in=public_url_expires)
        else:
            return None
コード例 #2
0
ファイル: tofrom.py プロジェクト: margotw40/parsons
    def to_zip_csv(self,
                   archive_path=None,
                   csv_name=None,
                   encoding=None,
                   errors='strict',
                   write_header=True,
                   if_exists='replace',
                   **csvargs):
        """
        Outputs table to a CSV in a zip archive. Additional key word arguments are passed to
        ``csv.writer()``. So, e.g., to override the delimiter from the default CSV dialect,
        provide the delimiter keyword argument. Use thismethod if you would like to write
        multiple csv files to the same archive.

        .. warning::
                If a file already exists in the archive, it will be overwritten.

        `Args:`
            archive_path: str
                The path to zip achive. If not specified, a temporary file will be created and
                returned, and that file will be removed automatically when the script is done
                running.
            csv_name: str
                The name of the csv file to be stored in the archive. If ``None`` will use
                the archive name.
            encoding: str
                The CSV encoding type for `csv.writer()
                <https://docs.python.org/2/library/csv.html#csv.writer/>`_
            errors: str
                Raise an Error if encountered
            write_header: boolean
                Include header in output
            if_exists: str
                If archive already exists, one of 'replace' or 'append'
            \**csvargs: kwargs
                ``csv_writer`` optional arguments

        `Returns:`
            str
                The path of the archive
        """  # noqa: W605

        if not archive_path:
            archive_path = files.create_temp_file(suffix='.zip')

        cf = self.to_csv(encoding=encoding,
                         errors=errors,
                         write_header=write_header,
                         **csvargs)

        if not csv_name:
            csv_name = files.extract_file_name(archive_path,
                                               include_suffix=False) + '.csv'

        return zip_archive.create_archive(archive_path,
                                          cf,
                                          file_name=csv_name,
                                          if_exists=if_exists)
コード例 #3
0
    def upload_scores(self,
                      tbl,
                      config,
                      url_type,
                      id_type='vanid',
                      email=None,
                      auto_approve=True,
                      approve_tolerance=.1,
                      **url_kwargs):
        """
        Upload scores. Use to create or overwrite scores. Multiple score loads
        should be configured in a single call. [1]_

        `Args:`
            tbl: object
                A parsons.Table object. The table must contain the scores and first column in the
                table must contain the primary key (e.g. vanid).
            config: list
                The score configuration. A list of dictionaries in which you specify the following

                .. list-table::
                    :widths: 20 80
                    :header-rows: 0

                    * - ``score_column``
                      - The name of the column where the score is housed.
                    * - ``score_id``
                      - The score slot id.

                Example:

                .. highlight:: python
                .. code-block:: python

                  [{'score1_id' : int, score1_column': str}
                   {'score2_id' : int, score2_column': str}]

            url_type: str
                The cloud file storage to use to post the file. Currently only ``S3``.
            email: str
                An email address to send job load status updates.
            auto_approve: boolean
                If the scores are within the expected tolerance of deviation from the
                average values provided, then score will be automatically approved.
            approve_tolderance: float
                The deviation from the average scores allowed in order to automatically
                approve the score. Maximum of .1.
            **url_kwargs: kwargs
                Arguments to configure your cloud storage url type.
                    * S3 requires ``bucket`` argument and, if not stored as env variables
                      ``aws_access_key`` and ``aws_secret_access_key``.
        `Returns:`
            int
               The score load job id.

        .. [1] NGPVAN asks that you load multiple scores in a single call to reduce the load
           on their servers.
        """

        # Move to cloud storage
        file_name = str(uuid.uuid1()) + '.zip'
        public_url = cloud_storage.post_file(tbl,
                                             url_type,
                                             file_path=file_name,
                                             **url_kwargs)
        csv_name = files.extract_file_name(file_name,
                                           include_suffix=False) + '.csv'
        logger.info(f'Table uploaded to {url_type}.')

        # Generate shell request
        json = {
            "description": 'A description',
            "file": {
                "columnDelimiter": 'csv',
                "columns": [{
                    'name': c
                } for c in tbl.columns],
                "fileName": csv_name,
                "hasHeader": "True",
                "hasQuotes": "False",
                "sourceUrl": public_url
            },
            "actions": []
        }

        # Configure each score
        for i in config:
            action = {
                "actionType": "score",
                "personIdColumn": tbl.columns[0],
                "personIdType": id_type,
                "scoreColumn": i['score_column'],
                "scoreId": i['score_id']
            }

            if auto_approve:
                average = petl.stats(tbl.table, i['score_column']).mean
                action['approvalCriteria'] = {
                    "average": average,
                    "tolerance": approve_tolerance
                }

            json['actions'].append(action)

        # Add email listener
        if email:
            json['listeners'] = [{"type": "EMAIL", 'value': email}]

        # Upload scores
        r = self.connection.post_request('fileLoadingJobs', json=json)
        logger.info(f"Scores job {r['jobId']} created.")
        return r['jobId']
コード例 #4
0
ファイル: tofrom.py プロジェクト: rdhyee/parsons
    def to_gcs_csv(self,
                   bucket_name,
                   blob_name,
                   app_creds=None,
                   project=None,
                   compression=None,
                   encoding=None,
                   errors='strict',
                   write_header=True,
                   public_url=False,
                   public_url_expires=60,
                   **csvargs):
        """
        Writes the table to a Google Cloud Storage blob as a CSV.

        `Args:`
            bucket_name: str
                The bucket to upload to
            blob_name: str
                The blob to name the file. If it ends in '.gz' or '.zip', the file will be
                compressed.
            app_creds: str
                A credentials json string or a path to a json file. Not required
                if ``GOOGLE_APPLICATION_CREDENTIALS`` env variable set.
            project: str
                The project which the client is acting on behalf of. If not passed
                then will use the default inferred environment.
            compression: str
                The compression type for the csv. Currently "None", "zip" and "gzip" are
                supported. If specified, will override the key suffix.
            encoding: str
                The CSV encoding type for `csv.writer()
                <https://docs.python.org/2/library/csv.html#csv.writer/>`_
            errors: str
                Raise an Error if encountered
            write_header: boolean
                Include header in output
            public_url: boolean
                Create a public link to the file
            public_url_expire: 60
                The time, in minutes, until the url expires if ``public_url`` set to ``True``.
            \**csvargs: kwargs
                ``csv_writer`` optional arguments
        `Returns:`
            Public url if specified. If not ``None``.
        """  # noqa: W605

        compression = compression or files.compression_type_for_path(blob_name)

        csv_name = files.extract_file_name(blob_name,
                                           include_suffix=False) + '.csv'

        # Save the CSV as a temp file
        local_path = self.to_csv(temp_file_compression=compression,
                                 encoding=encoding,
                                 errors=errors,
                                 write_header=write_header,
                                 csv_name=csv_name,
                                 **csvargs)

        from parsons.google.google_cloud_storage import GoogleCloudStorage
        gcs = GoogleCloudStorage(app_creds=app_creds, project=project)
        gcs.put_blob(bucket_name, blob_name, local_path)

        if public_url:
            return gcs.get_url(bucket_name,
                               blob_name,
                               expires_in=public_url_expires)
        else:
            return None