예제 #1
0
def test_encrypted_parquet_encryption_configuration():
    def validate_encryption_configuration(encryption_config):
        assert(FOOTER_KEY_NAME == encryption_config.footer_key)
        assert(["a", "b"] == encryption_config.column_keys[COL_KEY_NAME])
        assert("AES_GCM_CTR_V1" == encryption_config.encryption_algorithm)
        assert(encryption_config.plaintext_footer)
        assert(not encryption_config.double_wrapping)
        assert(timedelta(minutes=10.0) == encryption_config.cache_lifetime)
        assert(not encryption_config.internal_key_material)
        assert(192 == encryption_config.data_key_length_bits)

    encryption_config = pe.EncryptionConfiguration(
        footer_key=FOOTER_KEY_NAME,
        column_keys={COL_KEY_NAME: ["a", "b"], },
        encryption_algorithm="AES_GCM_CTR_V1",
        plaintext_footer=True,
        double_wrapping=False,
        cache_lifetime=timedelta(minutes=10.0),
        internal_key_material=False,
        data_key_length_bits=192,
    )
    validate_encryption_configuration(encryption_config)

    encryption_config_1 = pe.EncryptionConfiguration(
        footer_key=FOOTER_KEY_NAME)
    encryption_config_1.column_keys = {COL_KEY_NAME: ["a", "b"], }
    encryption_config_1.encryption_algorithm = "AES_GCM_CTR_V1"
    encryption_config_1.plaintext_footer = True
    encryption_config_1.double_wrapping = False
    encryption_config_1.cache_lifetime = timedelta(minutes=10.0)
    encryption_config_1.internal_key_material = False
    encryption_config_1.data_key_length_bits = 192
    validate_encryption_configuration(encryption_config_1)
예제 #2
0
def test_encrypted_parquet_write_read_plain_footer_single_wrapping(
        tempdir, data_table):
    """Write an encrypted parquet, with plaintext footer
    and with single wrapping,
    verify it's encrypted, and then read plaintext columns."""
    path = tempdir / PARQUET_NAME

    # Encrypt the footer with the footer key,
    # encrypt column `a` and column `b` with another key,
    # keep `c` plaintext
    encryption_config = pe.EncryptionConfiguration(footer_key=FOOTER_KEY_NAME,
                                                   column_keys={
                                                       COL_KEY_NAME:
                                                       ["a", "b"],
                                                   },
                                                   plaintext_footer=True,
                                                   double_wrapping=False)

    kms_connection_config = pe.KmsConnectionConfig(
        custom_kms_conf={
            FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"),
            COL_KEY_NAME: COL_KEY.decode("UTF-8"),
        })

    def kms_factory(kms_connection_configuration):
        return InMemoryKmsClient(kms_connection_configuration)

    crypto_factory = pe.CryptoFactory(kms_factory)
    # Write with encryption properties
    write_encrypted_parquet(path, data_table, encryption_config,
                            kms_connection_config, crypto_factory)
예제 #3
0
def basic_encryption_config():
    basic_encryption_config = pe.EncryptionConfiguration(
        footer_key=FOOTER_KEY_NAME,
        column_keys={
            COL_KEY_NAME: ["a", "b"],
        })
    return basic_encryption_config
예제 #4
0
def test_encrypted_parquet_write_no_col_key(tempdir, data_table):
    """Write an encrypted parquet, but give only footer key,
    without column key."""
    path = tempdir / 'encrypted_table_no_col_key.in_mem.parquet'

    # Encrypt the footer with the footer key
    encryption_config = pe.EncryptionConfiguration(
        footer_key=FOOTER_KEY_NAME)

    kms_connection_config = pe.KmsConnectionConfig(
        custom_kms_conf={
            FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"),
            COL_KEY_NAME: COL_KEY.decode("UTF-8"),
        }
    )

    def kms_factory(kms_connection_configuration):
        return InMemoryKmsClient(kms_connection_configuration)

    crypto_factory = pe.CryptoFactory(kms_factory)
    with pytest.raises(OSError,
                       match="Either column_keys or uniform_encryption "
                       "must be set"):
        # Write with encryption properties
        write_encrypted_parquet(path, data_table, encryption_config,
                                kms_connection_config, crypto_factory)
예제 #5
0
def parquet_write_read_with_vault(parquet_filename):
    """An example for writing an encrypted parquet and reading an
    encrypted parquet using master keys managed by Hashicorp Vault KMS.
    Note that for this implementation requests dependency is needed
    and environment properties VAULT_URL and VAULT_TOKEN should be set.
    Please enable the transit engine.
    """
    path = parquet_filename

    table = pa.Table.from_pydict({
        'a': pa.array([1, 2, 3]),
        'b': pa.array(['a', 'b', 'c']),
        'c': pa.array(['x', 'y', 'z'])
    })

    # Encrypt the footer with the footer key,
    # encrypt column `a` with one key
    # and column `b` with another key,
    # keep `c` plaintext
    footer_key_name = "footer_key"
    col_a_key_name = "col_a_key"
    col_b_key_name = "col_b_key"

    encryption_config = pe.EncryptionConfiguration(
        footer_key=footer_key_name,
        column_keys={
            col_a_key_name: ["a"],
            col_b_key_name: ["b"],
        })

    kms_connection_config = pe.KmsConnectionConfig(
        kms_instance_url=os.environ.get('VAULT_URL', ''),
        key_access_token=os.environ.get('VAULT_TOKEN', ''),
    )

    def kms_factory(kms_connection_configuration):
        return VaultClient(kms_connection_configuration)

    # Write with encryption properties
    crypto_factory = pe.CryptoFactory(kms_factory)
    file_encryption_properties = crypto_factory.file_encryption_properties(
        kms_connection_config, encryption_config)
    with pq.ParquetWriter(path,
                          table.schema,
                          encryption_properties=file_encryption_properties) \
            as writer:
        writer.write_table(table)

    # Read with decryption properties
    file_decryption_properties = crypto_factory.file_decryption_properties(
        kms_connection_config)
    result = pq.ParquetFile(
        path, decryption_properties=file_decryption_properties)
    result_table = result.read()
    assert table.equals(result_table)
예제 #6
0
def test_encrypted_parquet_write_read_wrong_key(tempdir, data_table):
    """Write an encrypted parquet, verify it's encrypted,
    and then read it using wrong keys."""
    path = tempdir / PARQUET_NAME

    # Encrypt the footer with the footer key,
    # encrypt column `a` and column `b` with another key,
    # keep `c` plaintext
    encryption_config = pe.EncryptionConfiguration(
        footer_key=FOOTER_KEY_NAME,
        column_keys={
            COL_KEY_NAME: ["a", "b"],
        },
        encryption_algorithm="AES_GCM_V1",
        cache_lifetime=timedelta(minutes=5.0),
        data_key_length_bits=256)

    kms_connection_config = pe.KmsConnectionConfig(
        custom_kms_conf={
            FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"),
            COL_KEY_NAME: COL_KEY.decode("UTF-8"),
        }
    )

    def kms_factory(kms_connection_configuration):
        return InMemoryKmsClient(kms_connection_configuration)

    crypto_factory = pe.CryptoFactory(kms_factory)
    # Write with encryption properties
    write_encrypted_parquet(path, data_table, encryption_config,
                            kms_connection_config, crypto_factory)
    verify_file_encrypted(path)

    # Read with decryption properties
    wrong_kms_connection_config = pe.KmsConnectionConfig(
        custom_kms_conf={
            # Wrong keys - mixup in names
            FOOTER_KEY_NAME: COL_KEY.decode("UTF-8"),
            COL_KEY_NAME: FOOTER_KEY.decode("UTF-8"),
        }
    )
    decryption_config = pe.DecryptionConfiguration(
        cache_lifetime=timedelta(minutes=5.0))
    with pytest.raises(ValueError, match=r"Incorrect master key used"):
        read_encrypted_parquet(
            path, decryption_config, wrong_kms_connection_config,
            crypto_factory)
예제 #7
0
def test_encrypted_parquet_write_external(tempdir, data_table):
    """Write an encrypted parquet, with external key
    material.
    Currently it's not implemented, so should throw
    an exception"""
    path = tempdir / PARQUET_NAME

    # Encrypt the file with the footer key
    encryption_config = pe.EncryptionConfiguration(footer_key=FOOTER_KEY_NAME,
                                                   column_keys={},
                                                   internal_key_material=False)

    kms_connection_config = pe.KmsConnectionConfig(
        custom_kms_conf={FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8")})

    def kms_factory(kms_connection_configuration):
        return InMemoryKmsClient(kms_connection_configuration)

    crypto_factory = pe.CryptoFactory(kms_factory)
    # Write with encryption properties
    write_encrypted_parquet(path, data_table, encryption_config,
                            kms_connection_config, crypto_factory)
예제 #8
0
def test_encrypted_parquet_write_read(tempdir, data_table):
    """Write an encrypted parquet, verify it's encrypted, and then read it."""
    path = tempdir / PARQUET_NAME

    # Encrypt the footer with the footer key,
    # encrypt column `a` and column `b` with another key,
    # keep `c` plaintext
    encryption_config = pe.EncryptionConfiguration(
        footer_key=FOOTER_KEY_NAME,
        column_keys={
            COL_KEY_NAME: ["a", "b"],
        },
        encryption_algorithm="AES_GCM_V1",
        cache_lifetime=timedelta(minutes=5.0),
        data_key_length_bits=256)

    kms_connection_config = pe.KmsConnectionConfig(
        custom_kms_conf={
            FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"),
            COL_KEY_NAME: COL_KEY.decode("UTF-8"),
        })

    def kms_factory(kms_connection_configuration):
        return InMemoryKmsClient(kms_connection_configuration)

    crypto_factory = pe.CryptoFactory(kms_factory)
    # Write with encryption properties
    write_encrypted_parquet(path, data_table, encryption_config,
                            kms_connection_config, crypto_factory)
    verify_file_encrypted(path)

    # Read with decryption properties
    decryption_config = pe.DecryptionConfiguration(cache_lifetime=timedelta(
        minutes=5.0))
    result_table = read_encrypted_parquet(path, decryption_config,
                                          kms_connection_config,
                                          crypto_factory)
    assert data_table.equals(result_table)