def test_drop_tables_purge(client, current_ops, refresh_call, tmpdir): mock_snapshots = [MockSnapshot(location="snap-a.avro", manifests=[ MockManifest("a-manifest.avro"), MockManifest("b-manifest.avro")], manifest_to_entries={ "a-manifest.avro": MockReader( [MockManifestEntry("a.parquet"), MockManifestEntry("b.parquet")]), "b-manifest.avro": MockReader( [MockManifestEntry("c.parquet"), MockManifestEntry("d.parquet")]) }), MockSnapshot(location="snap-b.avro", manifests=[ MockManifest("b-manifest.avro"), MockManifest("c-manifest.avro"), MockManifest("d-manifest.avro")], manifest_to_entries={ "b-manifest.avro": MockReader( [MockManifestEntry("c.parquet"), MockManifestEntry("d.parquet")]), "c-manifest.avro": MockReader( [MockManifestEntry("e.parquet"), MockManifestEntry("f.parquet")]), "d-manifest.avro": MockReader( [MockManifestEntry("g.parquet"), MockManifestEntry("h.parquet")]) }) ] ops = MockTableOperations(MockMetadata(mock_snapshots), "a.json") current_ops.return_value = ops parameters = {"table_type": "ICEBERG", "partition_spec": [], "metadata_location": "s3://path/to/iceberg.metadata.json"} client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters) conf = {"hive.metastore.uris": 'thrift://hms:port', "hive.metastore.warehouse.dir": tmpdir} tables = HiveTables(conf) tables.drop("test", "test_123", purge=True) assert len(ops.deleted) == len(set(ops.deleted)), "Paths should only be deleted once" assert "a.json" in ops.deleted assert "snap-a.avro" in ops.deleted assert "snap-b.avro" in ops.deleted assert "a-manifest.avro" in ops.deleted assert "b-manifest.avro" in ops.deleted assert "c-manifest.avro" in ops.deleted assert "d-manifest.avro" in ops.deleted assert "a.parquet" in ops.deleted assert "b.parquet" in ops.deleted assert "c.parquet" in ops.deleted assert "d.parquet" in ops.deleted assert "e.parquet" in ops.deleted assert "f.parquet" in ops.deleted assert "g.parquet" in ops.deleted assert "h.parquet" in ops.deleted
def test_load_tables_check_no_location(client, current_call, refresh_call): parameters = {"table_type": "ICEBERG", "partition_spec": []} client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable( parameters) conf = {"hive.metastore.uris": 'thrift://hms:port'} tables = HiveTables(conf) with raises(RuntimeError): tables.load("test.test_123")
def test_load_tables_check_missing_iceberg_type(client, current_call, refresh_call): parameters = {"partition_spec": [], "metadata_location": "s3://path/to/iceberg.metdata.json"} client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters) conf = {"hive.metastore.uris": 'thrift://hms:port'} tables = HiveTables(conf) with raises(RuntimeError): tables.load("test.test_123")
def test_load_tables_check_valid_props(client, current_call, refresh_call): parameters = {"table_type": "ICEBERG", "partition_spec": [], "metadata_location": "s3://path/to/iceberg.metadata.json"} client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters) conf = {"hive.metastore.uris": 'thrift://hms:port'} tables = HiveTables(conf) tables.load("test.test_123")
def test_drop_tables(client, metadata, refresh_call, tmpdir): parameters = {"table_type": "ICEBERG", "partition_spec": [], "metadata_location": "s3://path/to/iceberg.metadata.json"} client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters) conf = {"hive.metastore.uris": 'thrift://hms:port', "hive.metastore.warehouse.dir": tmpdir} tables = HiveTables(conf) tables.drop("test", "test_123", purge=False) client.return_value.__enter__.return_value.drop_table.assert_called_with("test", "test_123", deleteData=False)
def test_create_tables(client, current_call, base_scan_schema, base_scan_partition, tmpdir): client.return_value.__enter__.return_value.get_table.side_effect = NoSuchObjectException() current_call.return_value = None client.return_value.__enter__.return_value.lock.return_value = LockResponse("x", LockState.WAITING) client.return_value.__enter__.return_value.check_lock.return_value = LockResponse("x", LockState.ACQUIRED) tbl = client.return_value.__enter__.return_value.create_table.call_args_list conf = {"hive.metastore.uris": 'thrift://hms:port', "hive.metastore.warehouse.dir": tmpdir} tables = HiveTables(conf) tables.create(base_scan_schema, "test.test_123", base_scan_partition) client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(tbl[0].args[0].parameters) client.return_value.__enter__.return_value.get_table.side_effect = None current_call.return_value = tbl[0].args[0].parameters['metadata_location'] tables.load("test.test_123")