def setUp(self): self.temp_schema = TEMP_SCHEMA self.rs = Redshift() self.tbl = Table([['ID', 'Name'], [1, 'Jim'], [2, 'John'], [3, 'Sarah']]) # Create a schema, create a table, create a view setup_sql = f""" drop schema if exists {self.temp_schema} cascade; create schema {self.temp_schema}; """ other_sql = f""" create table {self.temp_schema}.test (id int,name varchar(5)); create view {self.temp_schema}.test_view as ( select * from {self.temp_schema}.test ); """ self.rs.query(setup_sql) self.rs.query(other_sql) self.s3 = S3() self.temp_s3_bucket = os.environ['S3_TEMP_BUCKET'] self.temp_s3_prefix = 'test/'
def from_s3_csv(cls, bucket, key, aws_access_key_id=None, aws_secret_access_key=None, **csvargs): """ Create a ``parsons table`` from a key in an S3 bucket. `Args:` bucket: str The S3 bucket. key: str The S3 key aws_access_key_id: str Required if not included as environmental variable. aws_secret_access_key: str Required if not included as environmental variable. \**csvargs: kwargs ``csv_reader`` optional arguments `Returns:` `parsons.Table` object """ # noqa: W605 from parsons.aws import S3 s3 = S3(aws_access_key_id, aws_secret_access_key) file_obj = s3.get_file(bucket, key) if files.compression_type_for_path(key) == 'zip': file_obj = files.zip_archive.unzip_archive(file_obj) return cls(petl.fromcsv(file_obj, **csvargs))
def from_s3_csv(cls, bucket, key, from_manifest=False, aws_access_key_id=None, aws_secret_access_key=None, **csvargs): """ Create a ``parsons table`` from a key in an S3 bucket. `Args:` bucket: str The S3 bucket. key: str The S3 key from_manifest: bool If True, treats `key` as a manifest file and loads all urls into a `parsons.Table`. Defaults to False. aws_access_key_id: str Required if not included as environmental variable. aws_secret_access_key: str Required if not included as environmental variable. \**csvargs: kwargs ``csv_reader`` optional arguments `Returns:` `parsons.Table` object """ # noqa: W605 from parsons.aws import S3 s3 = S3(aws_access_key_id, aws_secret_access_key) if from_manifest: with open(s3.get_file(bucket, key)) as fd: manifest = json.load(fd) s3_keys = [x["url"] for x in manifest["entries"]] else: s3_keys = [f"s3://{bucket}/{key}"] tbls = [] for key in s3_keys: # TODO handle urls that end with '/', i.e. urls that point to "folders" _, _, bucket_, key_ = key.split("/", 3) file_ = s3.get_file(bucket_, key_) if files.compression_type_for_path(key_) == 'zip': file_ = files.zip_archive.unzip_archive(file_) tbls.append(petl.fromcsv(file_, **csvargs)) return cls(petl.cat(*tbls))
def to_s3_csv(self, bucket, key, aws_access_key_id=None, aws_secret_access_key=None, compression=None, encoding=None, errors='strict', write_header=True, acl='bucket-owner-full-control', public_url=False, public_url_expires=3600, **csvargs): """ Writes the table to an s3 object as a CSV `Args:` bucket: str The s3 bucket to upload to key: str The s3 key to name the file. If it ends in '.gz' or '.zip', the file will be compressed. aws_access_key_id: str Required if not included as environmental variable aws_secret_access_key: str Required if not included as environmental variable compression: str The compression type for the s3 object. Currently "None", "zip" and "gzip" are supported. If specified, will override the key suffix. encoding: str The CSV encoding type for `csv.writer() <https://docs.python.org/2/library/csv.html#csv.writer/>`_ errors: str Raise an Error if encountered write_header: boolean Include header in output public_url: boolean Create a public link to the file public_url_expire: 3600 The time, in seconds, until the url expires if ``public_url`` set to ``True``. acl: str The S3 permissions on the file \**csvargs: kwargs ``csv_writer`` optional arguments `Returns:` Public url if specified. If not ``None``. """ # noqa: W605 compression = compression or files.compression_type_for_path(key) csv_name = files.extract_file_name(key, include_suffix=False) + '.csv' # Save the CSV as a temp file local_path = self.to_csv(temp_file_compression=compression, encoding=encoding, errors=errors, write_header=write_header, csv_name=csv_name, **csvargs) # Put the file on S3 from parsons.aws import S3 self.s3 = S3(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) self.s3.put_file(bucket, key, local_path, acl=acl) if public_url: return self.s3.get_url(bucket, key, expires_in=public_url_expires) else: return None