Exemple #1
0
def test_drop_tables_purge(client, current_ops, refresh_call, tmpdir):
    mock_snapshots = [MockSnapshot(location="snap-a.avro",
                                   manifests=[
                                       MockManifest("a-manifest.avro"),
                                       MockManifest("b-manifest.avro")],
                                   manifest_to_entries={
                                       "a-manifest.avro": MockReader(
                                           [MockManifestEntry("a.parquet"),
                                            MockManifestEntry("b.parquet")]),
                                       "b-manifest.avro": MockReader(
                                           [MockManifestEntry("c.parquet"),
                                            MockManifestEntry("d.parquet")])
                                   }),
                      MockSnapshot(location="snap-b.avro",
                                   manifests=[
                                       MockManifest("b-manifest.avro"),
                                       MockManifest("c-manifest.avro"),
                                       MockManifest("d-manifest.avro")],
                                   manifest_to_entries={
                                       "b-manifest.avro": MockReader(
                                           [MockManifestEntry("c.parquet"),
                                            MockManifestEntry("d.parquet")]),
                                       "c-manifest.avro": MockReader(
                                           [MockManifestEntry("e.parquet"),
                                            MockManifestEntry("f.parquet")]),
                                       "d-manifest.avro": MockReader(
                                           [MockManifestEntry("g.parquet"),
                                            MockManifestEntry("h.parquet")])
                                   })
                      ]
    ops = MockTableOperations(MockMetadata(mock_snapshots), "a.json")
    current_ops.return_value = ops

    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}
    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.drop("test", "test_123", purge=True)

    assert len(ops.deleted) == len(set(ops.deleted)), "Paths should only be deleted once"
    assert "a.json" in ops.deleted
    assert "snap-a.avro" in ops.deleted
    assert "snap-b.avro" in ops.deleted
    assert "a-manifest.avro" in ops.deleted
    assert "b-manifest.avro" in ops.deleted
    assert "c-manifest.avro" in ops.deleted
    assert "d-manifest.avro" in ops.deleted
    assert "a.parquet" in ops.deleted
    assert "b.parquet" in ops.deleted
    assert "c.parquet" in ops.deleted
    assert "d.parquet" in ops.deleted
    assert "e.parquet" in ops.deleted
    assert "f.parquet" in ops.deleted
    assert "g.parquet" in ops.deleted
    assert "h.parquet" in ops.deleted
Exemple #2
0
def test_create_tables_failed(client, current_call, refresh_call, base_scan_schema, base_scan_partition, tmpdir):
    client.return_value.__enter__.return_value.get_table.side_effect = NoSuchObjectException()
    current_call.return_value = None
    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    with pytest.raises(AlreadyExistsException):
        tables.create(base_scan_schema, "test.test_123", base_scan_partition)
    assert len(os.listdir(os.path.join(tmpdir, "test.db", "test_123", "metadata"))) == 0
def test_load_tables_check_no_location(client, current_call, refresh_call):
    parameters = {"table_type": "ICEBERG", "partition_spec": []}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(
        parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port'}
    tables = HiveTables(conf)
    with raises(RuntimeError):
        tables.load("test.test_123")
Exemple #4
0
def test_load_tables_check_missing_iceberg_type(client, current_call, refresh_call):
    parameters = {"partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metdata.json"}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port'}
    tables = HiveTables(conf)
    with raises(RuntimeError):
        tables.load("test.test_123")
Exemple #5
0
def test_load_tables_check_valid_props(client, current_call, refresh_call):
    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)

    conf = {"hive.metastore.uris": 'thrift://hms:port'}
    tables = HiveTables(conf)
    tables.load("test.test_123")
Exemple #6
0
def test_drop_tables(client, metadata, refresh_call, tmpdir):

    parameters = {"table_type": "ICEBERG",
                  "partition_spec": [],
                  "metadata_location": "s3://path/to/iceberg.metadata.json"}

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(parameters)
    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.drop("test", "test_123", purge=False)
    client.return_value.__enter__.return_value.drop_table.assert_called_with("test", "test_123", deleteData=False)
Exemple #7
0
def test_get_client(mock_hmsclient):
    conf = {"hive.metastore.uris": 'thrift://hms:123'}
    tables = HiveTables(conf)
    tables.get_client()
    mock_hmsclient.HMSClient.assert_called_with(iprot=None, oprot=None, host="hms", port=123)

    mock_iprot = mock.Mock()
    mock_oprot = mock.Mock()
    conf = {HiveTables.IPROT: mock_iprot, HiveTables.OPROT: mock_oprot}
    tables = HiveTables(conf)
    tables.get_client()
    mock_hmsclient.HMSClient.assert_called_with(iprot=mock_iprot, oprot=mock_oprot, host=None, port=None)
Exemple #8
0
def test_create_tables(client, current_call, base_scan_schema, base_scan_partition, tmpdir):

    client.return_value.__enter__.return_value.get_table.side_effect = NoSuchObjectException()
    current_call.return_value = None
    client.return_value.__enter__.return_value.lock.return_value = LockResponse("x", LockState.WAITING)
    client.return_value.__enter__.return_value.check_lock.return_value = LockResponse("x", LockState.ACQUIRED)
    tbl = client.return_value.__enter__.return_value.create_table.call_args_list
    conf = {"hive.metastore.uris": 'thrift://hms:port',
            "hive.metastore.warehouse.dir": tmpdir}
    tables = HiveTables(conf)
    tables.create(base_scan_schema, "test.test_123", base_scan_partition)

    client.return_value.__enter__.return_value.get_table.return_value = MockHMSTable(tbl[0].args[0].parameters)
    client.return_value.__enter__.return_value.get_table.side_effect = None
    current_call.return_value = tbl[0].args[0].parameters['metadata_location']

    tables.load("test.test_123")