Beispiel #1
0
def test_read_simple_table_update_incremental():
    table_path = "../rust/tests/data/simple_table"
    dt = DeltaTable(table_path, version=0)
    assert dt.to_pyarrow_dataset().to_table().to_pydict() == {
        "id": [0, 1, 2, 3, 4]
    }
    dt.update_incremental()
    assert dt.to_pyarrow_dataset().to_table().to_pydict() == {"id": [5, 7, 9]}
Beispiel #2
0
def test_write_recordbatchreader(tmp_path: pathlib.Path,
                                 existing_table: DeltaTable,
                                 sample_data: pa.Table):
    batches = existing_table.to_pyarrow_dataset().to_batches()
    reader = RecordBatchReader.from_batches(sample_data.schema, batches)

    write_deltalake(str(tmp_path), reader, mode="overwrite")
    assert DeltaTable(str(tmp_path)).to_pyarrow_table() == sample_data
Beispiel #3
0
def test_read_table_with_column_subset():
    table_path = "../rust/tests/data/delta-0.8.0-partitioned"
    dt = DeltaTable(table_path)
    expected = {
        "value": ["1", "2", "3", "6", "7", "5", "4"],
        "day": ["1", "3", "5", "20", "20", "4", "5"],
    }
    assert (dt.to_pyarrow_dataset().to_table(
        columns=["value", "day"]).to_pydict() == expected)
Beispiel #4
0
def test_read_partitioned_table_to_dict():
    table_path = "../rust/tests/data/delta-0.8.0-partitioned"
    dt = DeltaTable(table_path)
    expected = {
        "value": ["1", "2", "3", "6", "7", "5", "4"],
        "year": ["2020", "2020", "2020", "2021", "2021", "2021", "2021"],
        "month": ["1", "2", "2", "12", "12", "12", "4"],
        "day": ["1", "3", "5", "20", "20", "4", "5"],
    }
    assert dt.to_pyarrow_dataset().to_table().to_pydict() == expected
Beispiel #5
0
def test_read_table_with_edge_timestamps():
    table_path = "../rust/tests/data/table_with_edge_timestamps"
    dt = DeltaTable(table_path)
    assert dt.to_pyarrow_dataset(
        parquet_read_options=ParquetReadOptions(coerce_int96_timestamp_unit="ms")
    ).to_table().to_pydict() == {
        "BIG_DATE": [datetime(9999, 12, 31, 0, 0, 0), datetime(9999, 12, 30, 0, 0, 0)],
        "NORMAL_DATE": [datetime(2022, 1, 1, 0, 0, 0), datetime(2022, 2, 1, 0, 0, 0)],
        "SOME_VALUE": [1, 2],
    }
Beispiel #6
0
def test_write_iterator(tmp_path: pathlib.Path, existing_table: DeltaTable,
                        sample_data: pa.Table):
    batches = existing_table.to_pyarrow_dataset().to_batches()
    with pytest.raises(ValueError):
        write_deltalake(str(tmp_path), batches, mode="overwrite")

    write_deltalake(str(tmp_path),
                    batches,
                    schema=sample_data.schema,
                    mode="overwrite")
    assert DeltaTable(str(tmp_path)).to_pyarrow_table() == sample_data
Beispiel #7
0
def test_read_table_with_filter():
    table_path = "../rust/tests/data/delta-0.8.0-partitioned"
    dt = DeltaTable(table_path)
    expected = {
        "value": ["6", "7", "5"],
        "year": ["2021", "2021", "2021"],
        "month": ["12", "12", "12"],
        "day": ["20", "20", "4"],
    }
    filter_expr = (ds.field("year") == "2021") & (ds.field("month") == "12")

    dataset = dt.to_pyarrow_dataset()

    assert len(list(dataset.get_fragments(filter=filter_expr))) == 2
    assert dataset.to_table(filter=filter_expr).to_pydict() == expected
Beispiel #8
0
def test_read_table_with_stats():
    table_path = "../rust/tests/data/COVID-19_NYT"
    dt = DeltaTable(table_path)
    dataset = dt.to_pyarrow_dataset()

    filter_expr = ds.field("date") > "2021-02-20"
    assert len(list(dataset.get_fragments(filter=filter_expr))) == 2

    data = dataset.to_table(filter=filter_expr)
    assert data.num_rows < 147181 + 47559

    filter_expr = ds.field("cases") < 0
    assert len(list(dataset.get_fragments(filter=filter_expr))) == 0

    data = dataset.to_table(filter=filter_expr)
    assert data.num_rows == 0
def test_read_simple_table_to_dict():
    table_path = "../rust/tests/data/simple_table"
    dt = DeltaTable(table_path)
    assert dt.to_pyarrow_dataset().to_table().to_pydict() == {"id": [5, 7, 9]}
Beispiel #10
0
def test_read_simple_table_by_version_to_dict():
    table_path = "../rust/tests/data/delta-0.2.0"
    dt = DeltaTable(table_path, version=2)
    assert dt.to_pyarrow_dataset().to_table().to_pydict() == {
        "value": [1, 2, 3]
    }
Beispiel #11
0
def test_read_simple_table_using_options_to_dict():
    table_path = "../rust/tests/data/delta-0.2.0"
    dt = DeltaTable(table_path, version=2, storage_options={})
    assert dt.to_pyarrow_dataset().to_table().to_pydict() == {"value": [1, 2, 3]}
Beispiel #12
0
def test_read_empty_delta_table_after_delete():
    table_path = "../rust/tests/data/delta-0.8-empty"
    dt = DeltaTable(table_path)
    expected = {"column": []}

    assert dt.to_pyarrow_dataset().to_table().to_pydict() == expected