Esempio n. 1
0
    def setUp(self):

        self.temp_schema = TEMP_SCHEMA

        self.rs = Redshift()

        self.tbl = Table([['ID', 'Name'],
                          [1, 'Jim'],
                          [2, 'John'],
                          [3, 'Sarah']])

        # Create a schema, create a table, create a view
        setup_sql = f"""
                    drop schema if exists {self.temp_schema} cascade;
                    create schema {self.temp_schema};
                    """

        other_sql = f"""
                    create table {self.temp_schema}.test (id int,name varchar(5));
                    create view {self.temp_schema}.test_view as (
                        select * from {self.temp_schema}.test
                    );
        """

        self.rs.query(setup_sql)

        self.rs.query(other_sql)

        self.s3 = S3()

        self.temp_s3_bucket = os.environ['S3_TEMP_BUCKET']
        self.temp_s3_prefix = 'test/'
Esempio n. 2
0
    def from_s3_csv(cls, bucket, key, aws_access_key_id=None, aws_secret_access_key=None,
                    **csvargs):
        """
        Create a ``parsons table`` from a key in an S3 bucket.

        `Args:`
            bucket: str
                The S3 bucket.
            key: str
                The S3 key
            aws_access_key_id: str
                Required if not included as environmental variable.
            aws_secret_access_key: str
                Required if not included as environmental variable.
            \**csvargs: kwargs
                ``csv_reader`` optional arguments
        `Returns:`
            `parsons.Table` object
        """  # noqa: W605

        from parsons.aws import S3
        s3 = S3(aws_access_key_id, aws_secret_access_key)
        file_obj = s3.get_file(bucket, key)

        if files.compression_type_for_path(key) == 'zip':
            file_obj = files.zip_archive.unzip_archive(file_obj)

        return cls(petl.fromcsv(file_obj, **csvargs))
Esempio n. 3
0
    def from_s3_csv(cls,
                    bucket,
                    key,
                    from_manifest=False,
                    aws_access_key_id=None,
                    aws_secret_access_key=None,
                    **csvargs):
        """
        Create a ``parsons table`` from a key in an S3 bucket.

        `Args:`
            bucket: str
                The S3 bucket.
            key: str
                The S3 key
            from_manifest: bool
                If True, treats `key` as a manifest file and loads all urls into a `parsons.Table`.
                Defaults to False.
            aws_access_key_id: str
                Required if not included as environmental variable.
            aws_secret_access_key: str
                Required if not included as environmental variable.
            \**csvargs: kwargs
                ``csv_reader`` optional arguments
        `Returns:`
            `parsons.Table` object
        """  # noqa: W605

        from parsons.aws import S3
        s3 = S3(aws_access_key_id, aws_secret_access_key)

        if from_manifest:
            with open(s3.get_file(bucket, key)) as fd:
                manifest = json.load(fd)

            s3_keys = [x["url"] for x in manifest["entries"]]

        else:
            s3_keys = [f"s3://{bucket}/{key}"]

        tbls = []
        for key in s3_keys:
            # TODO handle urls that end with '/', i.e. urls that point to "folders"
            _, _, bucket_, key_ = key.split("/", 3)
            file_ = s3.get_file(bucket_, key_)
            if files.compression_type_for_path(key_) == 'zip':
                file_ = files.zip_archive.unzip_archive(file_)

            tbls.append(petl.fromcsv(file_, **csvargs))

        return cls(petl.cat(*tbls))
Esempio n. 4
0
    def to_s3_csv(self,
                  bucket,
                  key,
                  aws_access_key_id=None,
                  aws_secret_access_key=None,
                  compression=None,
                  encoding=None,
                  errors='strict',
                  write_header=True,
                  acl='bucket-owner-full-control',
                  public_url=False,
                  public_url_expires=3600,
                  **csvargs):
        """
        Writes the table to an s3 object as a CSV

        `Args:`
            bucket: str
                The s3 bucket to upload to
            key: str
                The s3 key to name the file. If it ends in '.gz' or '.zip', the file will be
                compressed.
            aws_access_key_id: str
                Required if not included as environmental variable
            aws_secret_access_key: str
                Required if not included as environmental variable
            compression: str
                The compression type for the s3 object. Currently "None", "zip" and "gzip" are
                supported. If specified, will override the key suffix.
            encoding: str
                The CSV encoding type for `csv.writer()
                <https://docs.python.org/2/library/csv.html#csv.writer/>`_
            errors: str
                Raise an Error if encountered
            write_header: boolean
                Include header in output
            public_url: boolean
                Create a public link to the file
            public_url_expire: 3600
                The time, in seconds, until the url expires if ``public_url`` set to ``True``.
            acl: str
                The S3 permissions on the file
            \**csvargs: kwargs
                ``csv_writer`` optional arguments
        `Returns:`
            Public url if specified. If not ``None``.
        """  # noqa: W605

        compression = compression or files.compression_type_for_path(key)

        csv_name = files.extract_file_name(key, include_suffix=False) + '.csv'

        # Save the CSV as a temp file
        local_path = self.to_csv(temp_file_compression=compression,
                                 encoding=encoding,
                                 errors=errors,
                                 write_header=write_header,
                                 csv_name=csv_name,
                                 **csvargs)

        # Put the file on S3
        from parsons.aws import S3
        self.s3 = S3(aws_access_key_id=aws_access_key_id,
                     aws_secret_access_key=aws_secret_access_key)
        self.s3.put_file(bucket, key, local_path, acl=acl)

        if public_url:
            return self.s3.get_url(bucket, key, expires_in=public_url_expires)
        else:
            return None