예제 #1
0
def test_drop_tables_purge(client, current_ops, refresh_call, tmpdir):
    mock_snapshots = [MockSnapshot(location="snap-a.avro",
                                   manifests=[
                                       MockManifest("a-manifest.avro"),
                                       MockManifest("b-manifest.avro")],
                                   manifest_to_entries={
                                       "a-manifest.avro": MockReader(
                                           [MockManifestEntry("a.parquet"),
                                            MockManifestEntry("b.parquet")]),
                                       "b-manifest.avro": MockReader(
                                           [MockManifestEntry("c.parquet"),
                                            MockManifestEntry("d.parquet")])
                                   }),
                      MockSnapshot(location="snap-b.avro",
                                   manifests=[
                                       MockManifest("b-manifest.avro"),
                                       MockManifest("c-manifest.avro"),
                                       MockManifest("d-manifest.avro")],
                                   manifest_to_entries={
                                       "b-manifest.avro": MockReader(
                                           [MockManifestEntry("c.parquet"),
                                            MockManifestEntry("d.parquet")]),
                                       "c-manifest.avro": MockReader(
                                           [MockManifestEntry("e.parquet"),
                                            MockManifestEntry("f.parquet")]),
                                       "d-manifest.avro": MockReader(
                                           [MockManifestEntry("g.parquet"),
                                            MockManifestEntry("h.parquet")])
                                   })
                      ]
    ops = MockTableOperations(MockMetadata(mock_snapshots), "a.json")
    current_ops.return_value = ops

    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}
    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.drop("test", "test_123", purge=True)

    assert len(ops.deleted) == len(set(ops.deleted)), "Paths should only be deleted once"
    assert "a.json" in ops.deleted
    assert "snap-a.avro" in ops.deleted
    assert "snap-b.avro" in ops.deleted
    assert "a-manifest.avro" in ops.deleted
    assert "b-manifest.avro" in ops.deleted
    assert "c-manifest.avro" in ops.deleted
    assert "d-manifest.avro" in ops.deleted
    assert "a.parquet" in ops.deleted
    assert "b.parquet" in ops.deleted
    assert "c.parquet" in ops.deleted
    assert "d.parquet" in ops.deleted
    assert "e.parquet" in ops.deleted
    assert "f.parquet" in ops.deleted
    assert "g.parquet" in ops.deleted
    assert "h.parquet" in ops.deleted
예제 #2
0
def test_drop_tables(client, metadata, refresh_call, tmpdir):

    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)
    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.drop("test", "test_123", purge=False)
    client.return_value.__enter__.return_value.drop_table.assert_called_with("test", "test_123", deleteData=False)