def test_encrypt_decrypt_file(tmp_path): """Encrypts and Decrypts a file.""" encryption_material = SnowflakeFileEncryptionMaterial( query_stage_master_key="ztke8tIdVt1zmlQIZm0BMA==", query_id="123873c7-3a66-40c4-ab89-e3722fbccce1", smk_id=3112, ) data = "test data" input_file = tmp_path / "test_encrypt_decrypt_file" encrypted_file = None decrypted_file = None try: with input_file.open("w", encoding=UTF8) as fd: fd.write(data) (metadata, encrypted_file) = SnowflakeEncryptionUtil.encrypt_file( encryption_material, input_file) decrypted_file = SnowflakeEncryptionUtil.decrypt_file( metadata, encryption_material, encrypted_file) contents = "" with codecs.open(decrypted_file, "r", encoding=UTF8) as fd: for line in fd: contents += line assert data == contents, "encrypted and decrypted contents" finally: input_file.unlink() if encrypted_file: os.remove(encrypted_file) if decrypted_file: os.remove(decrypted_file)
def test_encrypt_decrypt_file(): """ Encrypt and Decrypt a file """ encryption_material = SnowflakeFileEncryptionMaterial( query_stage_master_key='ztke8tIdVt1zmlQIZm0BMA==', query_id='123873c7-3a66-40c4-ab89-e3722fbccce1', smk_id=3112) data = 'test data' input_fd, input_file = tempfile.mkstemp() encrypted_file = None decrypted_file = None try: with codecs.open(input_file, 'w', encoding=UTF8) as fd: fd.write(data) (metadata, encrypted_file) = SnowflakeEncryptionUtil.encrypt_file( encryption_material, input_file) decrypted_file = SnowflakeEncryptionUtil.decrypt_file( metadata, encryption_material, encrypted_file) contents = '' with codecs.open(decrypted_file, 'r', encoding=UTF8) as fd: for line in fd: contents += line assert data == contents, "encrypted and decrypted contents" finally: os.close(input_fd) os.remove(input_file) if encrypted_file: os.remove(encrypted_file) if decrypted_file: os.remove(decrypted_file)
def test_encrypt_decrypt_large_file(tmpdir): """Encrypts and Decrypts a large file.""" encryption_material = SnowflakeFileEncryptionMaterial( query_stage_master_key='ztke8tIdVt1zmlQIZm0BMA==', query_id='123873c7-3a66-40c4-ab89-e3722fbccce1', smk_id=3112) # generates N files number_of_files = 1 number_of_lines = 10000 tmp_dir = generate_k_lines_of_n_files(number_of_lines, number_of_files, tmp_dir=str(tmpdir.mkdir('data'))) files = glob.glob(os.path.join(tmp_dir, 'file*')) input_file = files[0] encrypted_file = None decrypted_file = None try: (metadata, encrypted_file) = SnowflakeEncryptionUtil.encrypt_file( encryption_material, input_file) decrypted_file = SnowflakeEncryptionUtil.decrypt_file( metadata, encryption_material, encrypted_file) contents = '' cnt = 0 with codecs.open(decrypted_file, 'r', encoding=UTF8) as fd: for line in fd: contents += line cnt += 1 assert cnt == number_of_lines, "number of lines" finally: os.remove(input_file) if encrypted_file: os.remove(encrypted_file) if decrypted_file: os.remove(decrypted_file)
def put_to_stage(self, file, stream, count, temp_dir=None): self.logger.info("Uploading {} rows to external snowflake stage on S3".format(count)) # Generating key in S3 bucket bucket = self.connection_config['s3_bucket'] s3_acl = self.connection_config.get('s3_acl') s3_key_prefix = self.connection_config.get('s3_key_prefix', '') s3_file_naming_scheme = self.connection_config.get( 's3_file_naming_scheme', "pipelinewise_{stream}_{timecode}.{ext}" ) s3_file_name = s3_file_naming_scheme for k, v in { "{stream}": stream, "{timecode}": datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f"), "{ext}": ".".join(file.replace("\\", "/").split("/")[-1].split(".")[1:]) }.items(): if k in s3_file_name: s3_file_name = s3_file_name.replace(k, v) s3_key = "{}{}".format(s3_key_prefix, s3_file_name) self.logger.info("Target S3 bucket: {}, local file: {}, S3 key: {}".format(bucket, file, s3_key)) # Encrypt csv if client side encryption enabled master_key = self.connection_config.get('client_side_encryption_master_key', '') if master_key != '': # Encrypt the file encryption_material = SnowflakeFileEncryptionMaterial( query_stage_master_key=master_key, query_id='', smk_id=0 ) encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file( encryption_material, file, tmp_dir=temp_dir ) # Upload to s3 extra_args = {'ACL': s3_acl} if s3_acl else dict() # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file extra_args['Metadata'] = { 'x-amz-key': encryption_metadata.key, 'x-amz-iv': encryption_metadata.iv } self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs=extra_args) # Remove the uploaded encrypted file os.remove(encrypted_file) # Upload to S3 without encrypting else: extra_args = {'ACL': s3_acl} if s3_acl else None self.s3.upload_file(file, bucket, s3_key, ExtraArgs=extra_args) return s3_key
def test_encrypt_decrypt_large_file(tmpdir): """Encrypts and Decrypts a large file.""" encryption_material = SnowflakeFileEncryptionMaterial( query_stage_master_key="ztke8tIdVt1zmlQIZm0BMA==", query_id="123873c7-3a66-40c4-ab89-e3722fbccce1", smk_id=3112, ) # generates N files number_of_files = 1 number_of_lines = 100_000 tmp_dir = generate_k_lines_of_n_files(number_of_lines, number_of_files, tmp_dir=str(tmpdir.mkdir("data"))) files = glob.glob(os.path.join(tmp_dir, "file*")) input_file = files[0] encrypted_file = None decrypted_file = None try: digest_in, size_in = SnowflakeFileUtil.get_digest_and_size_for_file( input_file) for run_count in range(2): # Test padding cases when size is and is not multiple of block_size if run_count == 1: # second time run, truncate the file to test a different padding case with open(input_file, "wb") as f_in: if size_in % encryption_util.block_size == 0: size_in -= 3 else: size_in -= size_in % encryption_util.block_size f_in.truncate(size_in) digest_in, size_in = SnowflakeFileUtil.get_digest_and_size_for_file( input_file) (metadata, encrypted_file) = SnowflakeEncryptionUtil.encrypt_file( encryption_material, input_file) decrypted_file = SnowflakeEncryptionUtil.decrypt_file( metadata, encryption_material, encrypted_file) digest_dec, size_dec = SnowflakeFileUtil.get_digest_and_size_for_file( decrypted_file) assert size_in == size_dec assert digest_in == digest_dec finally: os.remove(input_file) if encrypted_file: os.remove(encrypted_file) if decrypted_file: os.remove(decrypted_file)
def upload_file(self, file, stream, temp_dir=None): """Upload file to an external snowflake stage on s3""" # Generating key in S3 bucket bucket = self.connection_config['s3_bucket'] s3_acl = self.connection_config.get('s3_acl') s3_key_prefix = self.connection_config.get('s3_key_prefix', '') s3_key = "{}pipelinewise_{}_{}_{}".format( s3_key_prefix, stream, datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f"), os.path.basename(file)) self.logger.info('Target S3 bucket: %s, local file: %s, S3 key: %s', bucket, file, s3_key) # Encrypt csv if client side encryption enabled master_key = self.connection_config.get( 'client_side_encryption_master_key', '') if master_key != '': # Encrypt the file encryption_material = SnowflakeFileEncryptionMaterial( query_stage_master_key=master_key, query_id='', smk_id=0) encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file( encryption_material, file, tmp_dir=temp_dir) # Upload to s3 extra_args = {'ACL': s3_acl} if s3_acl else dict() # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file extra_args['Metadata'] = { 'x-amz-key': encryption_metadata.key, 'x-amz-iv': encryption_metadata.iv } self.s3_client.upload_file(encrypted_file, bucket, s3_key, ExtraArgs=extra_args) # Remove the uploaded encrypted file os.remove(encrypted_file) # Upload to S3 without encrypting else: extra_args = {'ACL': s3_acl} if s3_acl else None self.s3_client.upload_file(file, bucket, s3_key, ExtraArgs=extra_args) return s3_key
def upload_to_s3(self, file, tmp_dir=None): bucket = self.connection_config['s3_bucket'] s3_acl = self.connection_config.get('s3_acl') s3_key_prefix = self.connection_config.get('s3_key_prefix', '') s3_key = '{}{}'.format(s3_key_prefix, os.path.basename(file)) LOGGER.info( 'Uploading to S3 bucket: %s, local file: %s, S3 key: %s', bucket, file, s3_key, ) # Encrypt csv if client side encryption enabled master_key = self.connection_config.get( 'client_side_encryption_master_key', '') if master_key != '': # Encrypt the file LOGGER.info('Encrypting file %s...', file) encryption_material = SnowflakeFileEncryptionMaterial( query_stage_master_key=master_key, query_id='', smk_id=0) encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file( encryption_material, file, tmp_dir=tmp_dir) # Upload to s3 extra_args = {'ACL': s3_acl} if s3_acl else {} # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file extra_args['Metadata'] = { 'x-amz-key': encryption_metadata.key, 'x-amz-iv': encryption_metadata.iv, } self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs=extra_args) # Remove the uploaded encrypted file os.remove(encrypted_file) # Upload to S3 without encrypting else: extra_args = {'ACL': s3_acl} if s3_acl else None self.s3.upload_file(file, bucket, s3_key, ExtraArgs=extra_args) return s3_key
def put_to_stage(self, file, stream, count, temp_dir=None): self.logger.info( "Uploading {} rows to external snowflake stage on S3".format( count)) # Generating key in S3 bucket bucket = self.connection_config['s3_bucket'] s3_key_prefix = self.connection_config.get('s3_key_prefix', '') s3_key = "{}pipelinewise_{}_{}.csv".format( s3_key_prefix, stream, datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f")) self.logger.info( "Target S3 bucket: {}, local file: {}, S3 key: {}".format( bucket, file, s3_key)) # Encrypt csv if client side encryption enabled master_key = self.connection_config.get( 'client_side_encryption_master_key', '') if master_key != '': # Encrypt the file encryption_material = SnowflakeFileEncryptionMaterial( query_stage_master_key=master_key, query_id='', smk_id=0) encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file( encryption_material, file, tmp_dir=temp_dir) # Upload to s3 # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file metadata = { 'x-amz-key': encryption_metadata.key, 'x-amz-iv': encryption_metadata.iv } self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs={'Metadata': metadata}) # Remove the uploaded encrypted file os.remove(encrypted_file) # Upload to S3 without encrypting else: self.s3.upload_file(file, bucket, s3_key) return s3_key
def put_to_stage(self, file, stream, count): logger.info(f"Uploading {count} rows to stage from {stream} on S3") # Generating key in S3 bucket bucket = self.connection_config["s3_bucket"] s3_key_prefix = self.connection_config.get("s3_key_prefix", "") s3_key = "{}pipelinewise_{}_{}.csv".format( s3_key_prefix, stream, datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f")) logger.info("Target S3 bucket: {}, local file: {}, S3 key: {}".format( bucket, file, s3_key)) # Encrypt csv if client side encryption enabled master_key = self.connection_config.get( "client_side_encryption_master_key", "") if master_key != "": # Encrypt the file encryption_material = SnowflakeFileEncryptionMaterial( query_stage_master_key=master_key, query_id="", smk_id=0) encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file( encryption_material, file) # Upload to s3 # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file metadata = { "x-amz-key": encryption_metadata.key, "x-amz-iv": encryption_metadata.iv } self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs={"Metadata": metadata}) # Remove the uploaded encrypted file os.remove(encrypted_file) # Upload to S3 without encrypting else: self.s3.upload_file(file, bucket, s3_key) return s3_key
def upload_to_s3(self, file, table): bucket = self.connection_config['s3_bucket'] s3_key_prefix = self.connection_config.get('s3_key_prefix', '') s3_key = "{}pipelinewise_{}_{}.csv.gz".format( s3_key_prefix, table, time.strftime("%Y%m%d-%H%M%S")) utils.log( "SNOWFLAKE - Uploading to S3 bucket: {}, local file: {}, S3 key: {}" .format(bucket, file, s3_key)) # Encrypt csv if client side encryption enabled master_key = self.connection_config.get( 'client_side_encryption_master_key', '') if master_key != '': # Encrypt the file utils.log("Encrypting file {}...".format(file)) encryption_material = SnowflakeFileEncryptionMaterial( query_stage_master_key=master_key, query_id='', smk_id=0) encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file( encryption_material, file) # Upload to s3 # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file metadata = { 'x-amz-key': encryption_metadata.key, 'x-amz-iv': encryption_metadata.iv } self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs={'Metadata': metadata}) # Remove the uploaded encrypted file os.remove(encrypted_file) # Upload to S3 without encrypting else: self.s3.upload_file(file, bucket, s3_key) return s3_key
def put_to_stage(self, file, stream, count): logger.info( "Uploading {} rows to external snowflake stage on S3".format( count)) s3_key_prefix = self.connection_config.get('s3_key_prefix', '') s3_key = "{}pipelinewise_{}_{}.csv".format( s3_key_prefix, stream, datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f")) # internal staging if not 'aws_access_key_id' in self.connection_config: stage = self.connection_config['stage'] logger.info("Target internal staging: {} ".format(stage)) with self.open_connection() as connection: with connection.cursor(snowflake.connector.DictCursor) as cur: cur.execute("USE SCHEMA {}".format( self.connection_config['default_target_schema'])) logger.info("file: {}, stage: {}, s3_key: {}".format( file, stage, s3_key)) put_sql = "PUT file:///{} @{}/{}".format( file, stage, s3_key) logger.info("SNOWFLAKE - {}".format(put_sql)) cur.execute(put_sql) logger.info("PUT complete") else: # Generating key in S3 bucket bucket = self.connection_config['s3_bucket'] endpoint = self.connection_config['s3_endpoint'] region = self.connection_config['s3_region'] logger.info( "Target S3 bucket: {}, local file: {}, S3 key: {}, endpoint: {}, region: {} " .format(bucket, file, s3_key, endpoint, region)) # Encrypt csv if client side encryption enabled master_key = self.connection_config.get( 'client_side_encryption_master_key', '') if master_key != '': # Encrypt the file encryption_material = SnowflakeFileEncryptionMaterial( query_stage_master_key=master_key, query_id='', smk_id=0) encryption_metadata, encrypted_file = SnowflakeEncryptionUtil.encrypt_file( encryption_material, file) # Upload to s3 # Send key and iv in the metadata, that will be required to decrypt and upload the encrypted file metadata = { 'x-amz-key': encryption_metadata.key, 'x-amz-iv': encryption_metadata.iv } self.s3.upload_file(encrypted_file, bucket, s3_key, ExtraArgs={'Metadata': metadata}) # Remove the uploaded encrypted file os.remove(encrypted_file) # Upload to S3 without encrypting else: self.s3.upload_file(file, bucket, s3_key) return s3_key