Exemple #1
0
def test_encrypt_decrypt_file():
    """
    Encrypt and Decrypt a file
    """

    encryption_material = SnowflakeFileEncryptionMaterial(
        query_stage_master_key='ztke8tIdVt1zmlQIZm0BMA==',
        query_id='123873c7-3a66-40c4-ab89-e3722fbccce1',
        smk_id=3112)
    data = 'test data'
    input_fd, input_file = tempfile.mkstemp()
    encrypted_file = None
    decrypted_file = None
    try:
        with codecs.open(input_file, 'w', encoding=UTF8) as fd:
            fd.write(data)

        (metadata, encrypted_file) = SnowflakeEncryptionUtil.encrypt_file(
            encryption_material, input_file)
        decrypted_file = SnowflakeEncryptionUtil.decrypt_file(
            metadata, encryption_material, encrypted_file)

        contents = ''
        with codecs.open(decrypted_file, 'r', encoding=UTF8) as fd:
            for line in fd:
                contents += line
        assert data == contents, "encrypted and decrypted contents"
    finally:
        os.close(input_fd)
        os.remove(input_file)
        if encrypted_file:
            os.remove(encrypted_file)
        if decrypted_file:
            os.remove(decrypted_file)
Exemple #2
0
def test_encrypt_decrypt_file(tmp_path):
    """Encrypts and Decrypts a file."""
    encryption_material = SnowflakeFileEncryptionMaterial(
        query_stage_master_key="ztke8tIdVt1zmlQIZm0BMA==",
        query_id="123873c7-3a66-40c4-ab89-e3722fbccce1",
        smk_id=3112,
    )
    data = "test data"
    input_file = tmp_path / "test_encrypt_decrypt_file"
    encrypted_file = None
    decrypted_file = None
    try:
        with input_file.open("w", encoding=UTF8) as fd:
            fd.write(data)

        (metadata, encrypted_file) = SnowflakeEncryptionUtil.encrypt_file(
            encryption_material, input_file)
        decrypted_file = SnowflakeEncryptionUtil.decrypt_file(
            metadata, encryption_material, encrypted_file)

        contents = ""
        with codecs.open(decrypted_file, "r", encoding=UTF8) as fd:
            for line in fd:
                contents += line
        assert data == contents, "encrypted and decrypted contents"
    finally:
        input_file.unlink()
        if encrypted_file:
            os.remove(encrypted_file)
        if decrypted_file:
            os.remove(decrypted_file)
def test_encrypt_decrypt_large_file(tmpdir):
    """Encrypts and Decrypts a large file."""
    encryption_material = SnowflakeFileEncryptionMaterial(
        query_stage_master_key='ztke8tIdVt1zmlQIZm0BMA==',
        query_id='123873c7-3a66-40c4-ab89-e3722fbccce1',
        smk_id=3112)

    # generates N files
    number_of_files = 1
    number_of_lines = 10000
    tmp_dir = generate_k_lines_of_n_files(number_of_lines, number_of_files, tmp_dir=str(tmpdir.mkdir('data')))

    files = glob.glob(os.path.join(tmp_dir, 'file*'))
    input_file = files[0]
    encrypted_file = None
    decrypted_file = None
    try:
        (metadata, encrypted_file) = SnowflakeEncryptionUtil.encrypt_file(
            encryption_material, input_file)
        decrypted_file = SnowflakeEncryptionUtil.decrypt_file(
            metadata, encryption_material, encrypted_file)

        contents = ''
        cnt = 0
        with codecs.open(decrypted_file, 'r', encoding=UTF8) as fd:
            for line in fd:
                contents += line
                cnt += 1
        assert cnt == number_of_lines, "number of lines"
    finally:
        os.remove(input_file)
        if encrypted_file:
            os.remove(encrypted_file)
        if decrypted_file:
            os.remove(decrypted_file)
    def put_to_stage(self, file, stream, count, temp_dir=None):
        self.logger.info("Uploading {} rows to external snowflake stage on S3".format(count))

        # Generating key in S3 bucket
        bucket = self.connection_config['s3_bucket']
        s3_acl = self.connection_config.get('s3_acl')
        s3_key_prefix = self.connection_config.get('s3_key_prefix', '')
        s3_file_naming_scheme = self.connection_config.get(
            's3_file_naming_scheme', "pipelinewise_{stream}_{timecode}.{ext}"
        )
        s3_file_name = s3_file_naming_scheme
        for k, v in {
            "{stream}": stream,
            "{timecode}": datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f"),
            "{ext}": ".".join(file.replace("\\", "/").split("/")[-1].split(".")[1:])
        }.items():
            if k in s3_file_name:
                s3_file_name = s3_file_name.replace(k, v)
        s3_key = "{}{}".format(s3_key_prefix, s3_file_name)

        self.logger.info("Target S3 bucket: {}, local file: {}, S3 key: {}".format(bucket, file, s3_key))

        # Encrypt csv if client side encryption enabled
        master_key = self.connection_config.get('client_side_encryption_master_key', '')
        if master_key != '':
            # Encrypt the file
            encryption_material = SnowflakeFileEncryptionMaterial(
                query_stage_master_key=master_key,
                query_id='',
                smk_id=0
            )
            encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file(
                encryption_material,
                file,
                tmp_dir=temp_dir
            )

            # Upload to s3
            extra_args = {'ACL': s3_acl} if s3_acl else dict()

            # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file
            extra_args['Metadata'] = {
                'x-amz-key': encryption_metadata.key,
                'x-amz-iv': encryption_metadata.iv
            }
            self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs=extra_args)

            # Remove the uploaded encrypted file
            os.remove(encrypted_file)

        # Upload to S3 without encrypting
        else:
            extra_args = {'ACL': s3_acl} if s3_acl else None
            self.s3.upload_file(file, bucket, s3_key, ExtraArgs=extra_args)

        return s3_key
    def upload_file(self, file, stream, temp_dir=None):
        """Upload file to an external snowflake stage on s3"""
        # Generating key in S3 bucket
        bucket = self.connection_config['s3_bucket']
        s3_acl = self.connection_config.get('s3_acl')
        s3_key_prefix = self.connection_config.get('s3_key_prefix', '')
        s3_key = "{}pipelinewise_{}_{}_{}".format(
            s3_key_prefix, stream,
            datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f"),
            os.path.basename(file))

        self.logger.info('Target S3 bucket: %s, local file: %s, S3 key: %s',
                         bucket, file, s3_key)

        # Encrypt csv if client side encryption enabled
        master_key = self.connection_config.get(
            'client_side_encryption_master_key', '')
        if master_key != '':
            # Encrypt the file
            encryption_material = SnowflakeFileEncryptionMaterial(
                query_stage_master_key=master_key, query_id='', smk_id=0)
            encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file(
                encryption_material, file, tmp_dir=temp_dir)

            # Upload to s3
            extra_args = {'ACL': s3_acl} if s3_acl else dict()

            # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file
            extra_args['Metadata'] = {
                'x-amz-key': encryption_metadata.key,
                'x-amz-iv': encryption_metadata.iv
            }
            self.s3_client.upload_file(encrypted_file,
                                       bucket,
                                       s3_key,
                                       ExtraArgs=extra_args)

            # Remove the uploaded encrypted file
            os.remove(encrypted_file)

        # Upload to S3 without encrypting
        else:
            extra_args = {'ACL': s3_acl} if s3_acl else None
            self.s3_client.upload_file(file,
                                       bucket,
                                       s3_key,
                                       ExtraArgs=extra_args)

        return s3_key
    def upload_to_s3(self, file, tmp_dir=None):
        bucket = self.connection_config['s3_bucket']
        s3_acl = self.connection_config.get('s3_acl')
        s3_key_prefix = self.connection_config.get('s3_key_prefix', '')
        s3_key = '{}{}'.format(s3_key_prefix, os.path.basename(file))

        LOGGER.info(
            'Uploading to S3 bucket: %s, local file: %s, S3 key: %s',
            bucket,
            file,
            s3_key,
        )

        # Encrypt csv if client side encryption enabled
        master_key = self.connection_config.get(
            'client_side_encryption_master_key', '')
        if master_key != '':
            # Encrypt the file
            LOGGER.info('Encrypting file %s...', file)
            encryption_material = SnowflakeFileEncryptionMaterial(
                query_stage_master_key=master_key, query_id='', smk_id=0)
            encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file(
                encryption_material, file, tmp_dir=tmp_dir)

            # Upload to s3
            extra_args = {'ACL': s3_acl} if s3_acl else {}

            # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file
            extra_args['Metadata'] = {
                'x-amz-key': encryption_metadata.key,
                'x-amz-iv': encryption_metadata.iv,
            }
            self.s3.upload_file(encrypted_file,
                                bucket,
                                s3_key,
                                ExtraArgs=extra_args)

            # Remove the uploaded encrypted file
            os.remove(encrypted_file)

        # Upload to S3 without encrypting
        else:
            extra_args = {'ACL': s3_acl} if s3_acl else None
            self.s3.upload_file(file, bucket, s3_key, ExtraArgs=extra_args)

        return s3_key
    def put_to_stage(self, file, stream, count, temp_dir=None):
        self.logger.info(
            "Uploading {} rows to external snowflake stage on S3".format(
                count))

        # Generating key in S3 bucket
        bucket = self.connection_config['s3_bucket']
        s3_key_prefix = self.connection_config.get('s3_key_prefix', '')
        s3_key = "{}pipelinewise_{}_{}.csv".format(
            s3_key_prefix, stream,
            datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f"))

        self.logger.info(
            "Target S3 bucket: {}, local file: {}, S3 key: {}".format(
                bucket, file, s3_key))

        # Encrypt csv if client side encryption enabled
        master_key = self.connection_config.get(
            'client_side_encryption_master_key', '')
        if master_key != '':
            # Encrypt the file
            encryption_material = SnowflakeFileEncryptionMaterial(
                query_stage_master_key=master_key, query_id='', smk_id=0)
            encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file(
                encryption_material, file, tmp_dir=temp_dir)

            # Upload to s3
            # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file
            metadata = {
                'x-amz-key': encryption_metadata.key,
                'x-amz-iv': encryption_metadata.iv
            }
            self.s3.upload_file(encrypted_file,
                                bucket,
                                s3_key,
                                ExtraArgs={'Metadata': metadata})

            # Remove the uploaded encrypted file
            os.remove(encrypted_file)

        # Upload to S3 without encrypting
        else:
            self.s3.upload_file(file, bucket, s3_key)

        return s3_key
    def put_to_stage(self, file, stream, count):
        logger.info(f"Uploading {count} rows to stage from {stream} on S3")

        # Generating key in S3 bucket
        bucket = self.connection_config["s3_bucket"]
        s3_key_prefix = self.connection_config.get("s3_key_prefix", "")
        s3_key = "{}pipelinewise_{}_{}.csv".format(
            s3_key_prefix, stream,
            datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f"))

        logger.info("Target S3 bucket: {}, local file: {}, S3 key: {}".format(
            bucket, file, s3_key))

        # Encrypt csv if client side encryption enabled
        master_key = self.connection_config.get(
            "client_side_encryption_master_key", "")
        if master_key != "":
            # Encrypt the file
            encryption_material = SnowflakeFileEncryptionMaterial(
                query_stage_master_key=master_key, query_id="", smk_id=0)
            encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file(
                encryption_material, file)

            # Upload to s3
            # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file
            metadata = {
                "x-amz-key": encryption_metadata.key,
                "x-amz-iv": encryption_metadata.iv
            }
            self.s3.upload_file(encrypted_file,
                                bucket,
                                s3_key,
                                ExtraArgs={"Metadata": metadata})

            # Remove the uploaded encrypted file
            os.remove(encrypted_file)

        # Upload to S3 without encrypting
        else:
            self.s3.upload_file(file, bucket, s3_key)

        return s3_key
Exemple #9
0
    def upload_to_s3(self, file, table):
        bucket = self.connection_config['s3_bucket']
        s3_key_prefix = self.connection_config.get('s3_key_prefix', '')
        s3_key = "{}pipelinewise_{}_{}.csv.gz".format(
            s3_key_prefix, table, time.strftime("%Y%m%d-%H%M%S"))

        utils.log(
            "SNOWFLAKE - Uploading to S3 bucket: {}, local file: {}, S3 key: {}"
            .format(bucket, file, s3_key))

        # Encrypt csv if client side encryption enabled
        master_key = self.connection_config.get(
            'client_side_encryption_master_key', '')
        if master_key != '':
            # Encrypt the file
            utils.log("Encrypting file {}...".format(file))
            encryption_material = SnowflakeFileEncryptionMaterial(
                query_stage_master_key=master_key, query_id='', smk_id=0)
            encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file(
                encryption_material, file)

            # Upload to s3
            # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file
            metadata = {
                'x-amz-key': encryption_metadata.key,
                'x-amz-iv': encryption_metadata.iv
            }
            self.s3.upload_file(encrypted_file,
                                bucket,
                                s3_key,
                                ExtraArgs={'Metadata': metadata})

            # Remove the uploaded encrypted file
            os.remove(encrypted_file)

        # Upload to S3 without encrypting
        else:
            self.s3.upload_file(file, bucket, s3_key)

        return s3_key
Exemple #10
0
    def put_to_stage(self, file, stream, count):
        logger.info(
            "Uploading {} rows to external snowflake stage on S3".format(
                count))

        s3_key_prefix = self.connection_config.get('s3_key_prefix', '')
        s3_key = "{}pipelinewise_{}_{}.csv".format(
            s3_key_prefix, stream,
            datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f"))

        # internal staging
        if not 'aws_access_key_id' in self.connection_config:
            stage = self.connection_config['stage']
            logger.info("Target internal staging: {} ".format(stage))

            with self.open_connection() as connection:
                with connection.cursor(snowflake.connector.DictCursor) as cur:
                    cur.execute("USE SCHEMA {}".format(
                        self.connection_config['default_target_schema']))
                    logger.info("file: {}, stage: {}, s3_key: {}".format(
                        file, stage, s3_key))
                    put_sql = "PUT file:///{} @{}/{}".format(
                        file, stage, s3_key)
                    logger.info("SNOWFLAKE - {}".format(put_sql))
                    cur.execute(put_sql)
                    logger.info("PUT complete")

        else:
            # Generating key in S3 bucket
            bucket = self.connection_config['s3_bucket']
            endpoint = self.connection_config['s3_endpoint']
            region = self.connection_config['s3_region']

            logger.info(
                "Target S3 bucket: {}, local file: {}, S3 key: {}, endpoint: {}, region: {} "
                .format(bucket, file, s3_key, endpoint, region))

            # Encrypt csv if client side encryption enabled
            master_key = self.connection_config.get(
                'client_side_encryption_master_key', '')
            if master_key != '':
                # Encrypt the file
                encryption_material = SnowflakeFileEncryptionMaterial(
                    query_stage_master_key=master_key, query_id='', smk_id=0)
                encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file(
                    encryption_material, file)

                # Upload to s3
                # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file
                metadata = {
                    'x-amz-key': encryption_metadata.key,
                    'x-amz-iv': encryption_metadata.iv
                }
                self.s3.upload_file(encrypted_file,
                                    bucket,
                                    s3_key,
                                    ExtraArgs={'Metadata': metadata})

                # Remove the uploaded encrypted file
                os.remove(encrypted_file)

            # Upload to S3 without encrypting
            else:
                self.s3.upload_file(file, bucket, s3_key)

        return s3_key