def test_encrypted_parquet_write_read_plain_footer_single_wrapping( tempdir, data_table): """Write an encrypted parquet, with plaintext footer and with single wrapping, verify it's encrypted, and then read plaintext columns.""" path = tempdir / PARQUET_NAME # Encrypt the footer with the footer key, # encrypt column `a` and column `b` with another key, # keep `c` plaintext encryption_config = pe.EncryptionConfiguration(footer_key=FOOTER_KEY_NAME, column_keys={ COL_KEY_NAME: ["a", "b"], }, plaintext_footer=True, double_wrapping=False) kms_connection_config = pe.KmsConnectionConfig( custom_kms_conf={ FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), COL_KEY_NAME: COL_KEY.decode("UTF-8"), }) def kms_factory(kms_connection_configuration): return InMemoryKmsClient(kms_connection_configuration) crypto_factory = pe.CryptoFactory(kms_factory) # Write with encryption properties write_encrypted_parquet(path, data_table, encryption_config, kms_connection_config, crypto_factory)
def test_encrypted_parquet_write_kms_specific_error(tempdir, data_table, basic_encryption_config): """Write an encrypted parquet, but raise KeyError in KmsClient.""" path = tempdir / 'encrypted_table_kms_error.in_mem.parquet' encryption_config = basic_encryption_config # Empty master_keys_map kms_connection_config = pe.KmsConnectionConfig() class ThrowingKmsClient(pe.KmsClient): """A KmsClient implementation that throws exception in wrap/unwrap calls """ def __init__(self, config): """Create an InMemoryKmsClient instance.""" pe.KmsClient.__init__(self) self.config = config def wrap_key(self, key_bytes, master_key_identifier): raise ValueError("Cannot Wrap Key") def unwrap_key(self, wrapped_key, master_key_identifier): raise ValueError("Cannot Unwrap Key") def kms_factory(kms_connection_configuration): # Exception thrown in wrap/unwrap calls return ThrowingKmsClient(kms_connection_configuration) crypto_factory = pe.CryptoFactory(kms_factory) with pytest.raises(ValueError, match="Cannot Wrap Key"): # Write with encryption properties write_encrypted_parquet(path, data_table, encryption_config, kms_connection_config, crypto_factory)
def test_encrypted_parquet_write_kms_factory_type_error( tempdir, data_table, basic_encryption_config): """Write an encrypted parquet, but use wrong KMS client type that doesn't implement KmsClient.""" path = tempdir / 'encrypted_table_kms_factory_error.in_mem.parquet' encryption_config = basic_encryption_config # Empty master_keys_map kms_connection_config = pe.KmsConnectionConfig() class WrongTypeKmsClient(): """This is not an implementation of KmsClient. """ def __init__(self, config): self.master_keys_map = config.custom_kms_conf def wrap_key(self, key_bytes, master_key_identifier): return None def unwrap_key(self, wrapped_key, master_key_identifier): return None def kms_factory(kms_connection_configuration): return WrongTypeKmsClient(kms_connection_configuration) crypto_factory = pe.CryptoFactory(kms_factory) with pytest.raises(TypeError): # Write with encryption properties write_encrypted_parquet(path, data_table, encryption_config, kms_connection_config, crypto_factory)
def parquet_write_read_with_vault(parquet_filename): """An example for writing an encrypted parquet and reading an encrypted parquet using master keys managed by Hashicorp Vault KMS. Note that for this implementation requests dependency is needed and environment properties VAULT_URL and VAULT_TOKEN should be set. Please enable the transit engine. """ path = parquet_filename table = pa.Table.from_pydict({ 'a': pa.array([1, 2, 3]), 'b': pa.array(['a', 'b', 'c']), 'c': pa.array(['x', 'y', 'z']) }) # Encrypt the footer with the footer key, # encrypt column `a` with one key # and column `b` with another key, # keep `c` plaintext footer_key_name = "footer_key" col_a_key_name = "col_a_key" col_b_key_name = "col_b_key" encryption_config = pe.EncryptionConfiguration(footer_key=footer_key_name, column_keys={ col_a_key_name: ["a"], col_b_key_name: ["b"], }) kms_connection_config = pe.KmsConnectionConfig( kms_instance_url=os.environ.get('VAULT_URL', ''), key_access_token=os.environ.get('VAULT_TOKEN', ''), ) def kms_factory(kms_connection_configuration): return VaultClient(kms_connection_configuration) # Write with encryption properties crypto_factory = pe.CryptoFactory(kms_factory) file_encryption_properties = crypto_factory.file_encryption_properties( kms_connection_config, encryption_config) with pq.ParquetWriter(path, table.schema, encryption_properties=file_encryption_properties) \ as writer: writer.write_table(table) # Read with decryption properties file_decryption_properties = crypto_factory.file_decryption_properties( kms_connection_config) result = pq.ParquetFile(path, decryption_properties=file_decryption_properties) result_table = result.read() assert table.equals(result_table)
def test_encrypted_parquet_write_kms_factory_error(tempdir, data_table, basic_encryption_config): """Write an encrypted parquet, but raise ValueError in kms_factory.""" path = tempdir / 'encrypted_table_kms_factory_error.in_mem.parquet' encryption_config = basic_encryption_config # Empty master_keys_map kms_connection_config = pe.KmsConnectionConfig() def kms_factory(kms_connection_configuration): raise ValueError('Cannot create KmsClient') crypto_factory = pe.CryptoFactory(kms_factory) with pytest.raises(ValueError, match="Cannot create KmsClient"): # Write with encryption properties write_encrypted_parquet(path, data_table, encryption_config, kms_connection_config, crypto_factory)
def test_encrypted_parquet_write_read_wrong_key(tempdir, data_table): """Write an encrypted parquet, verify it's encrypted, and then read it using wrong keys.""" path = tempdir / PARQUET_NAME # Encrypt the footer with the footer key, # encrypt column `a` and column `b` with another key, # keep `c` plaintext encryption_config = pe.EncryptionConfiguration( footer_key=FOOTER_KEY_NAME, column_keys={ COL_KEY_NAME: ["a", "b"], }, encryption_algorithm="AES_GCM_V1", cache_lifetime=timedelta(minutes=5.0), data_key_length_bits=256) kms_connection_config = pe.KmsConnectionConfig( custom_kms_conf={ FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), COL_KEY_NAME: COL_KEY.decode("UTF-8"), }) def kms_factory(kms_connection_configuration): return InMemoryKmsClient(kms_connection_configuration) crypto_factory = pe.CryptoFactory(kms_factory) # Write with encryption properties write_encrypted_parquet(path, data_table, encryption_config, kms_connection_config, crypto_factory) verify_file_encrypted(path) # Read with decryption properties wrong_kms_connection_config = pe.KmsConnectionConfig( custom_kms_conf={ # Wrong keys - mixup in names FOOTER_KEY_NAME: COL_KEY.decode("UTF-8"), COL_KEY_NAME: FOOTER_KEY.decode("UTF-8"), }) decryption_config = pe.DecryptionConfiguration(cache_lifetime=timedelta( minutes=5.0)) with pytest.raises(ValueError, match=r"Incorrect master key used"): read_encrypted_parquet(path, decryption_config, wrong_kms_connection_config, crypto_factory)
def test_encrypted_parquet_write_kms_error(tempdir, data_table, basic_encryption_config): """Write an encrypted parquet, but raise KeyError in KmsClient.""" path = tempdir / 'encrypted_table_kms_error.in_mem.parquet' encryption_config = basic_encryption_config # Empty master_keys_map kms_connection_config = pe.KmsConnectionConfig() def kms_factory(kms_connection_configuration): # Empty master keys map will cause KeyError to be raised # on wrap/unwrap calls return InMemoryKmsClient(kms_connection_configuration) crypto_factory = pe.CryptoFactory(kms_factory) with pytest.raises(KeyError, match="footer_key"): # Write with encryption properties write_encrypted_parquet(path, data_table, encryption_config, kms_connection_config, crypto_factory)
def test_encrypted_parquet_loop(tempdir, data_table, basic_encryption_config): """Write an encrypted parquet, verify it's encrypted, and then read it multithreaded in a loop.""" path = tempdir / PARQUET_NAME # Encrypt the footer with the footer key, # encrypt column `a` and column `b` with another key, # keep `c` plaintext encryption_config = basic_encryption_config kms_connection_config = pe.KmsConnectionConfig( custom_kms_conf={ FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), COL_KEY_NAME: COL_KEY.decode("UTF-8"), }) def kms_factory(kms_connection_configuration): return InMemoryKmsClient(kms_connection_configuration) crypto_factory = pe.CryptoFactory(kms_factory) # Write with encryption properties write_encrypted_parquet(path, data_table, encryption_config, kms_connection_config, crypto_factory) verify_file_encrypted(path) decryption_config = pe.DecryptionConfiguration(cache_lifetime=timedelta( minutes=5.0)) for i in range(50): # Read with decryption properties file_decryption_properties = crypto_factory.file_decryption_properties( kms_connection_config, decryption_config) assert (file_decryption_properties is not None) result = pq.ParquetFile( path, decryption_properties=file_decryption_properties) result_table = result.read(use_threads=True) assert data_table.equals(result_table)
def test_encrypted_parquet_write_external(tempdir, data_table): """Write an encrypted parquet, with external key material. Currently it's not implemented, so should throw an exception""" path = tempdir / PARQUET_NAME # Encrypt the file with the footer key encryption_config = pe.EncryptionConfiguration(footer_key=FOOTER_KEY_NAME, column_keys={}, internal_key_material=False) kms_connection_config = pe.KmsConnectionConfig( custom_kms_conf={FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8")}) def kms_factory(kms_connection_configuration): return InMemoryKmsClient(kms_connection_configuration) crypto_factory = pe.CryptoFactory(kms_factory) # Write with encryption properties write_encrypted_parquet(path, data_table, encryption_config, kms_connection_config, crypto_factory)
def test_encrypted_parquet_write_read(tempdir, data_table): """Write an encrypted parquet, verify it's encrypted, and then read it.""" path = tempdir / PARQUET_NAME # Encrypt the footer with the footer key, # encrypt column `a` and column `b` with another key, # keep `c` plaintext encryption_config = pe.EncryptionConfiguration( footer_key=FOOTER_KEY_NAME, column_keys={ COL_KEY_NAME: ["a", "b"], }, encryption_algorithm="AES_GCM_V1", cache_lifetime=timedelta(minutes=5.0), data_key_length_bits=256) kms_connection_config = pe.KmsConnectionConfig( custom_kms_conf={ FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), COL_KEY_NAME: COL_KEY.decode("UTF-8"), }) def kms_factory(kms_connection_configuration): return InMemoryKmsClient(kms_connection_configuration) crypto_factory = pe.CryptoFactory(kms_factory) # Write with encryption properties write_encrypted_parquet(path, data_table, encryption_config, kms_connection_config, crypto_factory) verify_file_encrypted(path) # Read with decryption properties decryption_config = pe.DecryptionConfiguration(cache_lifetime=timedelta( minutes=5.0)) result_table = read_encrypted_parquet(path, decryption_config, kms_connection_config, crypto_factory) assert data_table.equals(result_table)
def test_encrypted_parquet_write_no_col_key(tempdir, data_table): """Write an encrypted parquet, but give only footer key, without column key.""" path = tempdir / 'encrypted_table_no_col_key.in_mem.parquet' # Encrypt the footer with the footer key encryption_config = pe.EncryptionConfiguration(footer_key=FOOTER_KEY_NAME) kms_connection_config = pe.KmsConnectionConfig( custom_kms_conf={ FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), COL_KEY_NAME: COL_KEY.decode("UTF-8"), }) def kms_factory(kms_connection_configuration): return InMemoryKmsClient(kms_connection_configuration) crypto_factory = pe.CryptoFactory(kms_factory) with pytest.raises(OSError, match="Either column_keys or uniform_encryption " "must be set"): # Write with encryption properties write_encrypted_parquet(path, data_table, encryption_config, kms_connection_config, crypto_factory)