Example #1
0
def test_df2pyarrow_500years():
    """Summary files can have DATE columns with timespans outside the
    Pandas dataframe nanosecond limitation. This should not present
    a problem to the PyArrow conversion"""
    dateindex = [dt(1000, 1, 1, 0, 0, 0), dt(3000, 1, 1, 0, 0, 0)]
    dframe = pd.DataFrame(
        columns=["FOO", "BAR"], index=dateindex, data=[[1, 2], [3, 4]]
    ).astype("int32")

    # The index name should be ignored:
    dframe.index.name = "BOGUS"
    pyat = _df2pyarrow(dframe)

    with pytest.raises(pyarrow.lib.ArrowInvalid):
        # We cannot convert this back to Pandas, since it will bail on failing
        # to use nanosecond timestamps in the dataframe object for these dates.
        # This is maybe a PyArrow bug/limitation that we must be aware of.
        pyat.to_pandas()

    assert (
        np.array(pyat.column(0))
        == [
            np.datetime64("1000-01-01T00:00:00.000000000"),
            np.datetime64("3000-01-01T00:00:00.000000000"),
        ]
    ).all()
Example #2
0
def test_df2pyarrow_mix_int_float():
    """Test that mixed integer and float columns are conserved"""
    dframe = pd.DataFrame(columns=["FOO", "BAR"], data=[[1, 2], [3, 4]]).astype("int32")
    dframe["BAR"] *= 1.1  # Make it into a float type.
    pyat_df = _df2pyarrow(dframe).to_pandas()

    # For the comparison:
    dframe["BAR"] = dframe["BAR"].astype("float32")

    pd.testing.assert_frame_equal(dframe, pyat_df[["FOO", "BAR"]])
Example #3
0
def test_df2pyarrow_ints():
    """Test a dummy integer table converted into PyArrow"""
    dframe = pd.DataFrame(columns=["FOO", "BAR"], data=[[1, 2], [3, 4]]).astype("int32")
    pyat_df = _df2pyarrow(dframe).to_pandas()

    pd.testing.assert_frame_equal(dframe, pyat_df[["FOO", "BAR"]])

    # Millisecond datetimes:
    assert (
        pyat_df["DATE"].to_numpy()
        == [
            np.datetime64("1970-01-01T00:00:00.000000000"),
            np.datetime64("1970-01-01T00:00:00.001000000"),  # Milliseconds
        ]
    ).all()
Example #4
0
def test_df2pyarrow_meta():
    """Test that metadata in summary dataframes dframe.attrs are passed on to
    pyarrow tables"""
    dframe = pd.DataFrame(columns=["FOO", "BAR"], data=[[1, 2], [3, 4]]).astype("int32")
    dframe.attrs["meta"] = {
        "FOO": {"unit": "barf", "is_interesting": False},
        "ignoreme": "ignored",
    }
    pyat = _df2pyarrow(dframe)
    assert pyat.select(["FOO"]).schema[0].metadata == {
        b"unit": b"barf",
        b"is_interesting": b"False",
    }
    assert pyat.select(["BAR"]).schema[0].metadata == {}

    assert "is_interesting" in pyat.schema.to_string()
    assert "ignored" not in pyat.schema.to_string()
Example #5
0
def test_df2pyarrow_strings():
    """Check that dataframes can have string columns passing through PyArrow"""
    dframe = pd.DataFrame(columns=["FOO", "BAR"], data=[["hei", "hopp"]])
    pyat_df = _df2pyarrow(dframe).to_pandas()
    pd.testing.assert_frame_equal(dframe, pyat_df[["FOO", "BAR"]])