Beispiel #1
0
def test_columnar_convert_rowframe():
    converter = ColumnarConverter(
        "some_name",
        "foo",
        None,
        column_defaults={},
        selected_columns={},
        transform_columns={},
    )

    frame1 = IndexedArray(np.random.rand(3, 4, 5), index=[1111, -222, 33])
    frame2 = IndexedArray(np.random.rand(6, 7))

    ids, columns, type_info = converter.convert(frame1)

    assert ids == [1111, -222, 33]
    assert columns == {}
    _check_type_info(type_info, [("foo", frame1.values)])
    # check identity, to validate non-copying
    assert type_info[0][1] is frame1.values

    ids, columns, type_info = converter.convert({"a": frame1, "b": frame2})

    np.testing.assert_array_equal(ids, [*frame1.index, *frame2.index])
    assert columns == {}
    _check_type_info(type_info, [("a", frame1.values), ("b", frame2.values)])
    assert type_info[0][1] is frame1.values
    assert type_info[1][1] is frame2.values
Beispiel #2
0
def test_columnar_convert_features():
    converter = ColumnarConverter("some_name", "foo", {}, {"x": "x"}, True)
    df = _EMPTY_DF.assign(a=[1, 2], b=[100, 200], x=123)
    shared, features = converter.convert(df)

    assert all(shared["foo"]["x"] == 123)
    assert np.array_equal(features["foo"], [[1, 100], [2, 200]])
Beispiel #3
0
def test_columnar_convert_selected_columns_missing():
    converter = ColumnarConverter(
        "some_name", "foo", {}, {"before": "after", "same": "same"}, False
    )

    with pytest.raises(
        ValueError, match=r"some_name\['x'\]: expected 'before', 'same' columns, found:"
    ):
        converter.convert({"x": _EMPTY_DF})
Beispiel #4
0
def test_columnar_convert_column_default():
    converter = ColumnarConverter("some_name", "foo", {"before": 123}, {}, False)
    shared, features = converter.convert({"x": _EMPTY_DF, "y": _EMPTY_DF})

    assert "x" in shared
    assert "y" in shared

    for df in shared.values():
        assert all(df["before"] == 123)
Beispiel #5
0
def test_columnar_convert_type_column():
    converter = ColumnarConverter(
        "some_name",
        "foo",
        "type_column",
        {},
        {
            "type_column": "TC",
            "data": "D"
        },
        False,
        {},
    )

    df = pd.DataFrame(
        {
            "type_column": ["c", "a", "a", "c", "b"],
            "data": [1, 2, 3, 4, 5]
        },
        index=[1, 10, 100, 1000, 10000],
    )
    shared, type_starts, features = converter.convert(df)

    assert set(shared.columns) == {"D"}
    assert list(shared.index) == [10, 100, 10000, 1, 1000]
    assert list(shared["D"]) == [2, 3, 5, 1, 4]
    assert type_starts == [("a", 0), ("b", 2), ("c", 3)]
    assert features == {"a": None, "b": None, "c": None}

    # invalid configurations
    with pytest.raises(
            ValueError,
            match=r"allow_features: expected no features .* \('type_column'\)"
    ):
        ColumnarConverter("some_name", "foo", "type_column", {},
                          {"type_column": "TC"}, True, {})

    with pytest.raises(
            ValueError,
            match=
            r"selected_columns: expected type column \('type_column'\) .* found only 'TC', 'data'",
    ):
        ColumnarConverter(
            "some_name",
            "foo",
            "type_column",
            {},
            {
                "TC": "type_column",
                "data": "D"
            },
            False,
            {},
        )
Beispiel #6
0
def test_columnar_convert_column_default():
    converter = ColumnarConverter("some_name", "foo", None, {"before": 123},
                                  {}, False, {})
    shared, type_starts, features = converter.convert({
        "x": _EMPTY_DF,
        "y": _EMPTY_DF
    })

    assert type_starts == [("x", 0), ("y", 2)]

    assert all(shared["before"] == 123)
Beispiel #7
0
def test_columnar_convert_column_default_selected_columns():
    # the defaulting happens before the renaming
    converter = ColumnarConverter("x", "foo", {"before": 123},
                                  {"before": "after"}, False)
    shared, features = converter.convert({"x": _EMPTY_DF, "y": _EMPTY_DF})

    assert "x" in shared
    assert "y" in shared

    for df in shared.values():
        assert "before" not in df
        assert all(df["after"] == 123)
Beispiel #8
0
def test_columnar_convert_type_default():
    converter = ColumnarConverter(
        name="some_name",
        default_type="foo",
        type_column=None,
        column_defaults={},
        selected_columns={},
        transform_columns={},
    )
    ids, columns, type_info = converter.convert(_EMPTY_DF)
    np.testing.assert_array_equal(ids, [1, 2])
    assert columns == {}
    _check_type_info(type_info, [("foo", _empty_array(2))])
Beispiel #9
0
def test_columnar_convert_column_default_selected_columns():
    # the defaulting happens before the renaming
    converter = ColumnarConverter("x", "foo", None, {"before": 123},
                                  {"before": "after"}, False, {})
    shared, type_starts, features = converter.convert({
        "x": _EMPTY_DF,
        "y": _EMPTY_DF
    })

    assert type_starts == [("x", 0), ("y", 2)]

    assert "before" not in shared
    assert all(shared["after"] == 123)
Beispiel #10
0
def test_columnar_convert_selected_columns():
    df = _EMPTY_DF.assign(before="abc", same=10)

    converter = ColumnarConverter("some_name", "foo", None, {}, {
        "before": "after",
        "same": "same"
    }, False, {})
    shared, type_starts, features = converter.convert({"x": df, "y": df})

    assert type_starts == [("x", 0), ("y", 2)]

    assert "before" not in shared
    assert all(shared["after"] == "abc")
    assert all(shared["same"] == 10)
Beispiel #11
0
def test_columnar_convert_features():
    converter = ColumnarConverter(
        name="some_name",
        default_type="foo",
        type_column=None,
        column_defaults={},
        selected_columns={"x": "x"},
        transform_columns={},
    )
    df = _EMPTY_DF.assign(a=[1, 2], b=[100, 200], x=123)
    ids, columns, type_info = converter.convert(df)

    _check_type_info(type_info, [("foo", [[1, 100], [2, 200]])])
    np.testing.assert_array_equal(columns["x"], 123)
Beispiel #12
0
def test_columnar_convert_selected_columns():
    df = _EMPTY_DF.assign(before="abc", same=10)

    converter = ColumnarConverter(
        "some_name", "foo", {}, {"before": "after", "same": "same"}, False
    )
    shared, features = converter.convert({"x": df, "y": df})

    assert "x" in shared
    assert "y" in shared

    for df in shared.values():
        assert "before" not in df
        assert all(df["after"] == "abc")
        assert all(df["same"] == 10)
Beispiel #13
0
def test_columnar_convert_type_column():
    converter = ColumnarConverter(
        name="some_name",
        default_type="foo",
        type_column="type_column",
        column_defaults={},
        selected_columns={
            "type_column": "TC",
            "data": "D"
        },
        transform_columns={},
    )

    df = pd.DataFrame(
        {
            "type_column": ["c", "a", "a", "c", "b"],
            "data": [1, 2, 3, 4, 5]
        },
        index=[1, 10, 100, 1000, 10000],
    )
    ids, columns, type_info = converter.convert(df)

    assert columns.keys() == {"D"}
    np.testing.assert_array_equal(ids, [10, 100, 10000, 1, 1000])
    np.testing.assert_array_equal(columns["D"], [2, 3, 5, 1, 4])
    _check_type_info(
        type_info,
        [("a", _empty_array(2)), ("b", _empty_array(1)),
         ("c", _empty_array(2))],
    )

    # invalid configuration
    with pytest.raises(
            ValueError,
            match=
            r"selected_columns: expected type column \('type_column'\) .* found only 'TC', 'data'",
    ):
        ColumnarConverter(
            name="some_name",
            default_type="foo",
            type_column="type_column",
            column_defaults={},
            selected_columns={
                "TC": "type_column",
                "data": "D"
            },
            transform_columns={},
        )
Beispiel #14
0
def test_columnar_convert_column_default():
    converter = ColumnarConverter(
        name="some_name",
        default_type="foo",
        type_column=None,
        column_defaults={"before": 123},
        selected_columns={"before": "before"},
        transform_columns={},
    )
    ids, columns, type_info = converter.convert({
        "x": _EMPTY_DF,
        "y": _EMPTY_DF
    })

    _check_type_info(type_info, [("x", _empty_array(2)),
                                 ("y", _empty_array(2))])
    np.testing.assert_array_equal(columns["before"], 123)
Beispiel #15
0
def test_columnar_convert_selected_columns_missing():
    converter = ColumnarConverter(
        name="some_name",
        default_type="foo",
        type_column=None,
        column_defaults={},
        selected_columns={
            "before": "after",
            "same": "same"
        },
        transform_columns={},
    )

    with pytest.raises(
            ValueError,
            match=r"some_name\['x'\]: expected 'before', 'same' columns, found:"
    ):
        converter.convert({"x": _EMPTY_DF})
Beispiel #16
0
def test_columnar_convert_transform_columns():

    columns = {"x": np.complex128(1), "y": np.uint16(2), "z": np.float32(3.0)}

    dfs = {
        name: pd.DataFrame({
            "s": [0],
            "t": [1],
            "w": [w]
        }, index=[i])
        for i, (name, w) in enumerate(columns.items())
    }

    converter = ColumnarConverter(
        name="some_name",
        default_type="foo",
        type_column=None,
        column_defaults={},
        selected_columns={
            "s": "ss",
            "t": "tt",
            "w": "ww",
        },
        transform_columns={
            "w": lambda x: x + 1,
        },
    )

    ids, columns, type_info = converter.convert(dfs)

    assert columns["ww"][0] == 2
    assert columns["ww"][1] == 3
    assert columns["ww"][2] == 4

    _check_type_info(
        type_info,
        [("x", _empty_array(1)), ("y", _empty_array(1)),
         ("z", _empty_array(1))],
    )

    np.testing.assert_array_equal(columns["ss"], 0)
    np.testing.assert_array_equal(columns["tt"], 1)
Beispiel #17
0
def test_columnar_convert_column_default_selected_columns():
    # the defaulting happens before the renaming
    converter = ColumnarConverter(
        name="x",
        default_type="foo",
        type_column=None,
        column_defaults={"before": 123},
        selected_columns={"before": "after"},
        transform_columns={},
    )
    ids, columns, type_info = converter.convert({
        "x": _EMPTY_DF,
        "y": _EMPTY_DF
    })

    _check_type_info(type_info, [("x", _empty_array(2)),
                                 ("y", _empty_array(2))])

    assert "before" not in columns
    np.testing.assert_array_equal(columns["after"], 123)
Beispiel #18
0
def test_columnar_convert_rowframe_ndarray_invalid():
    converter = ColumnarConverter(
        "some_name",
        "foo",
        None,
        column_defaults={},
        selected_columns={"bar": "baz"},
        transform_columns={},
    )

    frame = IndexedArray(np.random.rand(3, 4, 5))

    with pytest.raises(
            ValueError,
            match=
            r"some_name\['foo'\]: expected a Pandas DataFrame when selecting columns 'bar', found IndexedArray",
    ):
        converter.convert(frame)

    with pytest.raises(
            ValueError,
            match=
            r"some_name\['foo'\]: expected a Pandas DataFrame when selecting columns 'bar', found ndarray",
    ):
        converter.convert(frame.values)
Beispiel #19
0
def test_columnar_convert_transform_columns():

    columns = {"x": np.complex128(1), "y": np.uint16(2), "z": np.float32(3.0)}

    dfs = {
        name: pd.DataFrame({
            "s": [0],
            "t": [1],
            "w": [w]
        }, index=[i])
        for i, (name, w) in enumerate(columns.items())
    }

    converter = ColumnarConverter(
        "some_name",
        float,
        None,
        column_defaults={},
        selected_columns={
            "s": "ss",
            "t": "tt",
            "w": "ww",
        },
        transform_columns={
            "w": lambda x: x + 1,
        },
        allow_features=False,
    )

    converted, type_starts, _ = converter.convert(dfs)

    assert (
        converted.iloc[type_starts[0][1]:type_starts[1][1]]["ww"] == 2).all()
    assert (converted[type_starts[1][1]:type_starts[2][1]]["ww"] == 3).all()
    assert (converted[type_starts[2][1]:]["ww"] == 4).all()

    assert (converted["ss"] == 0).all()
    assert (converted["tt"] == 1).all()
Beispiel #20
0
def test_columnar_convert_selected_columns():
    df = _EMPTY_DF.assign(before="abc", same=10)

    converter = ColumnarConverter(
        name="some_name",
        default_type="foo",
        type_column=None,
        column_defaults={},
        selected_columns={
            "before": "after",
            "same": "same"
        },
        transform_columns={},
    )
    ids, columns, type_info = converter.convert({"x": df, "y": df})

    np.testing.assert_array_equal(ids, [1, 2, 1, 2])
    _check_type_info(type_info, [("x", _empty_array(2)),
                                 ("y", _empty_array(2))])

    assert "before" not in columns
    np.testing.assert_array_equal(columns["after"], "abc")
    np.testing.assert_array_equal(columns["same"], 10)
Beispiel #21
0
def test_columnar_convert_invalid_input():
    converter = ColumnarConverter("some_name", "foo", None, {}, {}, False, {})

    with pytest.raises(TypeError, match="some_name: expected dict, found int"):
        converter.convert(1)

    with pytest.raises(
            TypeError,
            match=r"some_name\['x'\]: expected pandas DataFrame, found int",
    ):
        converter.convert({"x": 1})
Beispiel #22
0
def test_columnar_convert_invalid_input():
    converter = ColumnarConverter(
        name="some_name",
        default_type="foo",
        type_column=None,
        column_defaults={},
        selected_columns={},
        transform_columns={},
    )

    with pytest.raises(TypeError, match="some_name: expected dict, found int"):
        converter.convert(1)

    with pytest.raises(
            TypeError,
            match=
            r"some_name\['x'\]: expected IndexedArray or pandas DataFrame, found int",
    ):
        converter.convert({"x": 1})
Beispiel #23
0
def test_columnar_convert_ndarray():
    converter = ColumnarConverter(
        "some_name",
        "foo",
        None,
        column_defaults={},
        selected_columns={},
        transform_columns={},
    )

    arr1 = np.random.rand(3, 4, 5)
    arr2 = np.random.rand(6, 7)

    # single array, default type
    ids, columns, type_info = converter.convert(arr1)

    assert ids == range(3)
    assert columns == {}
    _check_type_info(type_info, [("foo", arr1)])
    assert type_info[0][1] is arr1

    # multiple arrays, explicit types; the IDs are wrong (duplicated) here, but that's detected
    # elsewhere
    ids, columns, type_info = converter.convert({"a": arr1, "b": arr2})

    np.testing.assert_array_equal(ids, [*range(3), *range(6)])
    assert columns == {}
    _check_type_info(type_info, [("a", arr1), ("b", arr2)])
    assert type_info[0][1] is arr1
    assert type_info[1][1] is arr2

    # check it says which type
    with pytest.raises(
            ValueError,
            match=r"some_name\['foo'\]: could not convert NumPy array"):
        converter.convert(np.zeros(123))
Beispiel #24
0
def test_columnar_convert_type_default():
    converter = ColumnarConverter("some_name", "foo", {}, {}, False)
    shared, features = converter.convert(_EMPTY_DF)
    assert "foo" in shared
    assert "foo" in features
Beispiel #25
0
def test_columnar_convert_type_default():
    converter = ColumnarConverter("some_name", "foo", None, {}, {}, False, {})
    shared, type_starts, features = converter.convert(_EMPTY_DF)
    assert type_starts == [("foo", 0)]
    assert "foo" in features
Beispiel #26
0
def test_columnar_convert_disallow_features():
    converter = ColumnarConverter("some_name", "foo", None, {}, {}, False, {})
    df = _EMPTY_DF.assign(a=1)
    with pytest.raises(ValueError,
                       match="expected zero feature columns, found 'a'"):
        shared, type_starts, features = converter.convert(df)