def test_encrypt_decrypt_large_file(tmpdir, test_files):
    """
    Encrypt and Decrypt a large file
    """
    encryption_material = SnowflakeFileEncryptionMaterial(
        query_stage_master_key='ztke8tIdVt1zmlQIZm0BMA==',
        query_id='123873c7-3a66-40c4-ab89-e3722fbccce1',
        smk_id=3112)

    # generates N files
    number_of_files = 1
    number_of_lines = 10000
    tmp_dir = test_files(tmpdir, number_of_lines, number_of_files)

    files = glob.glob(os.path.join(tmp_dir, 'file*'))
    input_file = files[0]
    encrypted_file = None
    decrypted_file = None
    try:
        (metadata, encrypted_file) = SnowflakeEncryptionUtil.encrypt_file(
            encryption_material, input_file)
        decrypted_file = SnowflakeEncryptionUtil.decrypt_file(
            metadata, encryption_material, encrypted_file)

        contents = ''
        cnt = 0
        fd = codecs.open(decrypted_file, 'r', encoding=UTF8)
        for line in fd:
            contents += line
            cnt += 1
        assert cnt == number_of_lines, "number of lines"
    finally:
        os.remove(input_file)
        if encrypted_file:
            os.remove(encrypted_file)
        if decrypted_file:
            os.remove(decrypted_file)
    def put_to_stage(self, file, stream, count):
        logger.info(f"Uploading {count} rows to stage from {stream} on S3")

        # Generating key in S3 bucket
        bucket = self.connection_config["s3_bucket"]
        s3_key_prefix = self.connection_config.get("s3_key_prefix", "")
        s3_key = "{}pipelinewise_{}_{}.csv".format(
            s3_key_prefix, stream, datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f")
        )

        logger.info("Target S3 bucket: {}, local file: {}, S3 key: {}".format(bucket, file, s3_key))

        # Encrypt csv if client side encryption enabled
        master_key = self.connection_config.get("client_side_encryption_master_key", "")
        if master_key != "":
            # Encrypt the file
            encryption_material = SnowflakeFileEncryptionMaterial(
                query_stage_master_key=master_key, query_id="", smk_id=0
            )
            encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file(
                encryption_material, file
            )

            # Upload to s3
            # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file
            metadata = {"x-amz-key": encryption_metadata.key, "x-amz-iv": encryption_metadata.iv}
            self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs={"Metadata": metadata})

            # Remove the uploaded encrypted file
            os.remove(encrypted_file)

        # Upload to S3 without encrypting
        else:
            self.s3.upload_file(file, bucket, s3_key)

        return s3_key
Example #3
0
    def put_to_stage(self, file, stream, count):
        logger.info(
            "Uploading {} rows to external snowflake stage on S3".format(
                count))

        s3_key_prefix = self.connection_config.get('s3_key_prefix', '')
        s3_key = "{}pipelinewise_{}_{}.csv".format(
            s3_key_prefix, stream,
            datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f"))

        # internal staging
        if not 'aws_access_key_id' in self.connection_config:
            stage = self.connection_config['stage']
            logger.info("Target internal staging: {} ".format(stage))

            with self.open_connection() as connection:
                with connection.cursor(snowflake.connector.DictCursor) as cur:
                    cur.execute("USE SCHEMA {}".format(
                        self.connection_config['default_target_schema']))
                    logger.info("file: {}, stage: {}, s3_key: {}".format(
                        file, stage, s3_key))
                    put_sql = "PUT file:///{} @{}/{}".format(
                        file, stage, s3_key)
                    logger.info("SNOWFLAKE - {}".format(put_sql))
                    cur.execute(put_sql)
                    logger.info("PUT complete")

        else:
            # Generating key in S3 bucket
            bucket = self.connection_config['s3_bucket']
            endpoint = self.connection_config['s3_endpoint']
            region = self.connection_config['s3_region']

            logger.info(
                "Target S3 bucket: {}, local file: {}, S3 key: {}, endpoint: {}, region: {} "
                .format(bucket, file, s3_key, endpoint, region))

            # Encrypt csv if client side encryption enabled
            master_key = self.connection_config.get(
                'client_side_encryption_master_key', '')
            if master_key != '':
                # Encrypt the file
                encryption_material = SnowflakeFileEncryptionMaterial(
                    query_stage_master_key=master_key, query_id='', smk_id=0)
                encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file(
                    encryption_material, file)

                # Upload to s3
                # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file
                metadata = {
                    'x-amz-key': encryption_metadata.key,
                    'x-amz-iv': encryption_metadata.iv
                }
                self.s3.upload_file(encrypted_file,
                                    bucket,
                                    s3_key,
                                    ExtraArgs={'Metadata': metadata})

                # Remove the uploaded encrypted file
                os.remove(encrypted_file)

            # Upload to S3 without encrypting
            else:
                self.s3.upload_file(file, bucket, s3_key)

        return s3_key