Exemple #1
0
def test_drop_tables_purge(client, current_ops, refresh_call, tmpdir):
    mock_snapshots = [MockSnapshot(location="snap-a.avro",
                                   manifests=[
                                       MockManifest("a-manifest.avro"),
                                       MockManifest("b-manifest.avro")],
                                   manifest_to_entries={
                                       "a-manifest.avro": MockReader(
                                           [MockManifestEntry("a.parquet"),
                                            MockManifestEntry("b.parquet")]),
                                       "b-manifest.avro": MockReader(
                                           [MockManifestEntry("c.parquet"),
                                            MockManifestEntry("d.parquet")])
                                   }),
                      MockSnapshot(location="snap-b.avro",
                                   manifests=[
                                       MockManifest("b-manifest.avro"),
                                       MockManifest("c-manifest.avro"),
                                       MockManifest("d-manifest.avro")],
                                   manifest_to_entries={
                                       "b-manifest.avro": MockReader(
                                           [MockManifestEntry("c.parquet"),
                                            MockManifestEntry("d.parquet")]),
                                       "c-manifest.avro": MockReader(
                                           [MockManifestEntry("e.parquet"),
                                            MockManifestEntry("f.parquet")]),
                                       "d-manifest.avro": MockReader(
                                           [MockManifestEntry("g.parquet"),
                                            MockManifestEntry("h.parquet")])
                                   })
                      ]
    ops = MockTableOperations(MockMetadata(mock_snapshots), "a.json")
    current_ops.return_value = ops

    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}
    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.drop("test", "test_123", purge=True)

    assert len(ops.deleted) == len(set(ops.deleted)), "Paths should only be deleted once"
    assert "a.json" in ops.deleted
    assert "snap-a.avro" in ops.deleted
    assert "snap-b.avro" in ops.deleted
    assert "a-manifest.avro" in ops.deleted
    assert "b-manifest.avro" in ops.deleted
    assert "c-manifest.avro" in ops.deleted
    assert "d-manifest.avro" in ops.deleted
    assert "a.parquet" in ops.deleted
    assert "b.parquet" in ops.deleted
    assert "c.parquet" in ops.deleted
    assert "d.parquet" in ops.deleted
    assert "e.parquet" in ops.deleted
    assert "f.parquet" in ops.deleted
    assert "g.parquet" in ops.deleted
    assert "h.parquet" in ops.deleted
Exemple #2
0
def test_load_tables_check_no_location(client, current_call, refresh_call):
    parameters = {"table_type": "ICEBERG", "partition_spec": []}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(
        parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port'}
    tables = HiveTables(conf)
    with raises(RuntimeError):
        tables.load("test.test_123")
Exemple #3
0
def test_load_tables_check_missing_iceberg_type(client, current_call, refresh_call):
    parameters = {"partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metdata.json"}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port'}
    tables = HiveTables(conf)
    with raises(RuntimeError):
        tables.load("test.test_123")
Exemple #4
0
def test_load_tables_check_valid_props(client, current_call, refresh_call):
    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port'}
    tables = HiveTables(conf)
    tables.load("test.test_123")
Exemple #5
0
def test_drop_tables(client, metadata, refresh_call, tmpdir):

    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)
    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.drop("test", "test_123", purge=False)
    client.return_value.__enter__.return_value.drop_table.assert_called_with("test", "test_123", deleteData=False)
Exemple #6
0
def test_create_tables(client, current_call, base_scan_schema, base_scan_partition, tmpdir):

    client.return_value.__enter__.return_value.get_table.side_effect = NoSuchObjectException()
    current_call.return_value = None
    client.return_value.__enter__.return_value.lock.return_value = LockResponse("x", LockState.WAITING)
    client.return_value.__enter__.return_value.check_lock.return_value = LockResponse("x", LockState.ACQUIRED)
    tbl = client.return_value.__enter__.return_value.create_table.call_args_list
    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.create(base_scan_schema, "test.test_123", base_scan_partition)

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(tbl[0].args[0].parameters)
    client.return_value.__enter__.return_value.get_table.side_effect = None
    current_call.return_value = tbl[0].args[0].parameters['metadata_location']

    tables.load("test.test_123")