def to_s3_csv(self, bucket, key, aws_access_key_id=None, aws_secret_access_key=None, compression=None, encoding=None, errors='strict', write_header=True, public_url=False, public_url_expires=3600, **csvargs): """ Writes the table to an s3 object as a CSV `Args:` bucket: str The s3 bucket to upload to key: str The s3 key to name the file. If it ends in '.gz' or '.zip', the file will be compressed. aws_access_key_id: str Required if not included as environmental variable aws_secret_access_key: str Required if not included as environmental variable compression: str The compression type for the s3 object. Currently "None", "zip" and "gzip" are supported. If specified, will override the key suffix. encoding: str The CSV encoding type for `csv.writer() <https://docs.python.org/2/library/csv.html#csv.writer/>`_ errors: str Raise an Error if encountered write_header: boolean Include header in output public_url: boolean Create a public link to the file public_url_expire: 3600 The time, in seconds, until the url expires if ``public_url`` set to ``True``. \**csvargs: kwargs ``csv_writer`` optional arguments `Returns:` Public url if specified. If not ``None``. """ # noqa: W605 compression = compression or files.compression_type_for_path(key) csv_name = files.extract_file_name(key, include_suffix=False) + '.csv' # Save the CSV as a temp file local_path = self.to_csv(temp_file_compression=compression, encoding=encoding, errors=errors, write_header=write_header, csv_name=csv_name, **csvargs) # Put the file on S3 from parsons import S3 self.s3 = S3(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) self.s3.put_file(bucket, key, local_path) if public_url: return self.s3.get_url(bucket, key, expires_in=public_url_expires) else: return None
def to_zip_csv(self, archive_path=None, csv_name=None, encoding=None, errors='strict', write_header=True, if_exists='replace', **csvargs): """ Outputs table to a CSV in a zip archive. Additional key word arguments are passed to ``csv.writer()``. So, e.g., to override the delimiter from the default CSV dialect, provide the delimiter keyword argument. Use thismethod if you would like to write multiple csv files to the same archive. .. warning:: If a file already exists in the archive, it will be overwritten. `Args:` archive_path: str The path to zip achive. If not specified, a temporary file will be created and returned, and that file will be removed automatically when the script is done running. csv_name: str The name of the csv file to be stored in the archive. If ``None`` will use the archive name. encoding: str The CSV encoding type for `csv.writer() <https://docs.python.org/2/library/csv.html#csv.writer/>`_ errors: str Raise an Error if encountered write_header: boolean Include header in output if_exists: str If archive already exists, one of 'replace' or 'append' \**csvargs: kwargs ``csv_writer`` optional arguments `Returns:` str The path of the archive """ # noqa: W605 if not archive_path: archive_path = files.create_temp_file(suffix='.zip') cf = self.to_csv(encoding=encoding, errors=errors, write_header=write_header, **csvargs) if not csv_name: csv_name = files.extract_file_name(archive_path, include_suffix=False) + '.csv' return zip_archive.create_archive(archive_path, cf, file_name=csv_name, if_exists=if_exists)
def upload_scores(self, tbl, config, url_type, id_type='vanid', email=None, auto_approve=True, approve_tolerance=.1, **url_kwargs): """ Upload scores. Use to create or overwrite scores. Multiple score loads should be configured in a single call. [1]_ `Args:` tbl: object A parsons.Table object. The table must contain the scores and first column in the table must contain the primary key (e.g. vanid). config: list The score configuration. A list of dictionaries in which you specify the following .. list-table:: :widths: 20 80 :header-rows: 0 * - ``score_column`` - The name of the column where the score is housed. * - ``score_id`` - The score slot id. Example: .. highlight:: python .. code-block:: python [{'score1_id' : int, score1_column': str} {'score2_id' : int, score2_column': str}] url_type: str The cloud file storage to use to post the file. Currently only ``S3``. email: str An email address to send job load status updates. auto_approve: boolean If the scores are within the expected tolerance of deviation from the average values provided, then score will be automatically approved. approve_tolderance: float The deviation from the average scores allowed in order to automatically approve the score. Maximum of .1. **url_kwargs: kwargs Arguments to configure your cloud storage url type. * S3 requires ``bucket`` argument and, if not stored as env variables ``aws_access_key`` and ``aws_secret_access_key``. `Returns:` int The score load job id. .. [1] NGPVAN asks that you load multiple scores in a single call to reduce the load on their servers. """ # Move to cloud storage file_name = str(uuid.uuid1()) + '.zip' public_url = cloud_storage.post_file(tbl, url_type, file_path=file_name, **url_kwargs) csv_name = files.extract_file_name(file_name, include_suffix=False) + '.csv' logger.info(f'Table uploaded to {url_type}.') # Generate shell request json = { "description": 'A description', "file": { "columnDelimiter": 'csv', "columns": [{ 'name': c } for c in tbl.columns], "fileName": csv_name, "hasHeader": "True", "hasQuotes": "False", "sourceUrl": public_url }, "actions": [] } # Configure each score for i in config: action = { "actionType": "score", "personIdColumn": tbl.columns[0], "personIdType": id_type, "scoreColumn": i['score_column'], "scoreId": i['score_id'] } if auto_approve: average = petl.stats(tbl.table, i['score_column']).mean action['approvalCriteria'] = { "average": average, "tolerance": approve_tolerance } json['actions'].append(action) # Add email listener if email: json['listeners'] = [{"type": "EMAIL", 'value': email}] # Upload scores r = self.connection.post_request('fileLoadingJobs', json=json) logger.info(f"Scores job {r['jobId']} created.") return r['jobId']
def to_gcs_csv(self, bucket_name, blob_name, app_creds=None, project=None, compression=None, encoding=None, errors='strict', write_header=True, public_url=False, public_url_expires=60, **csvargs): """ Writes the table to a Google Cloud Storage blob as a CSV. `Args:` bucket_name: str The bucket to upload to blob_name: str The blob to name the file. If it ends in '.gz' or '.zip', the file will be compressed. app_creds: str A credentials json string or a path to a json file. Not required if ``GOOGLE_APPLICATION_CREDENTIALS`` env variable set. project: str The project which the client is acting on behalf of. If not passed then will use the default inferred environment. compression: str The compression type for the csv. Currently "None", "zip" and "gzip" are supported. If specified, will override the key suffix. encoding: str The CSV encoding type for `csv.writer() <https://docs.python.org/2/library/csv.html#csv.writer/>`_ errors: str Raise an Error if encountered write_header: boolean Include header in output public_url: boolean Create a public link to the file public_url_expire: 60 The time, in minutes, until the url expires if ``public_url`` set to ``True``. \**csvargs: kwargs ``csv_writer`` optional arguments `Returns:` Public url if specified. If not ``None``. """ # noqa: W605 compression = compression or files.compression_type_for_path(blob_name) csv_name = files.extract_file_name(blob_name, include_suffix=False) + '.csv' # Save the CSV as a temp file local_path = self.to_csv(temp_file_compression=compression, encoding=encoding, errors=errors, write_header=write_header, csv_name=csv_name, **csvargs) from parsons.google.google_cloud_storage import GoogleCloudStorage gcs = GoogleCloudStorage(app_creds=app_creds, project=project) gcs.put_blob(bucket_name, blob_name, local_path) if public_url: return gcs.get_url(bucket_name, blob_name, expires_in=public_url_expires) else: return None