Esempio n. 1
0
def test_filter_table():
    ctx = Prosto("My Prosto")

    tbl = ctx.populate(
        table_name="Base table",
        attributes=["A", "B"],
        func=
        "lambda **m: pd.DataFrame({'A': [1.0, 2.0, 3.0], 'B': ['x', 'yy', 'zzz']})",
        tables=[])

    # This (boolean) column will be used for filtering
    clm = ctx.compute(
        name="filter_column",
        table=tbl.id,
        func=
        "lambda x, param: (x['A'] > param) & (x['B'].str.len() < 3)",  # Return a boolean Series
        columns=["A", "B"],
        model={"param": 1.5})

    tbl.evaluate()
    clm.evaluate()

    tbl = ctx.filter(table_name="Filtered table",
                     attributes=["super"],
                     func=None,
                     tables=["Base table"],
                     columns=["filter_column"])

    tbl.evaluate()

    assert len(tbl.get_df().columns) == 1  # Only one link-attribute is created
    assert len(tbl.get_df()) == 1
    assert tbl.get_df()['super'][0] == 1

    #
    # Test topology
    #
    topology = Topology(ctx)
    topology.translate()
    layers = topology.elem_layers

    assert len(layers) == 3

    assert set([x.id for x in layers[0]]) == {"Base table"}
    assert set([x.id for x in layers[1]]) == {"filter_column"}
    assert set([x.id for x in layers[2]]) == {"Filtered table"}
Esempio n. 2
0
def test_filter_inheritance():
    """Test topology augmentation. Use columns from the parent table by automatically adding the merge operation to topology."""
    ctx = Prosto("My Prosto")

    base_tbl = ctx.populate(
        table_name="Base table",
        attributes=["A", "B"],
        func=
        "lambda **m: pd.DataFrame({'A': [1.0, 2.0, 3.0], 'B': ['x', 'yy', 'zzz']})",
        tables=[])

    # This (boolean) column will be used for filtering
    clm = ctx.compute(
        name="filter_column",
        table=base_tbl.id,
        func=
        "lambda x, param: (x['A'] > param) & (x['B'].str.len() < 3)",  # Return a boolean Series
        columns=["A", "B"],
        model={"param": 1.5})

    f_tbl = ctx.filter(table_name="Filtered table",
                       attributes=["super"],
                       func=None,
                       tables=["Base table"],
                       columns=["filter_column"])

    # In this calculate column, we use a column of the filtered table which actually exists only in the base table
    clm = ctx.calculate(name="My column",
                        table=f_tbl.id,
                        func="lambda x: x + 1.0",
                        columns=["A"],
                        model=None)

    ctx.run()

    clm_data = f_tbl.get_series('My column')

    assert np.isclose(len(clm_data), 1)
    assert np.isclose(clm_data[0], 3.0)

    # This column had to be added automatically by the augmentation procedure
    # It is inherited from the base table and materialized via merge operation
    # It stores original values of the inherited base column
    clm_data = f_tbl.get_series('A')
    assert np.isclose(clm_data[0], 2)
Esempio n. 3
0
def test_compute():
    ctx = Prosto("My Prosto")

    tbl = ctx.populate(
        table_name="My table", attributes=["A"],
        func="lambda **m: pd.DataFrame({'A': [1, 2, 3]})", tables=[]
    )

    clm = ctx.compute(
        name="My column", table=tbl.id,
        func="lambda x, **model: x.shift(**model)", columns=["A"], model={"periods": -1}
    )

    tbl.evaluate()
    clm.evaluate()

    clm_data = tbl.get_series('My column')
    assert np.isclose(clm_data[0], 2.0)
    assert np.isclose(clm_data[1], 3.0)
    assert pd.isna(clm_data[2])

    #
    # Test topology
    #
    topology = Topology(ctx)
    topology.translate()  # All data will be reset
    layers = topology.elem_layers

    assert len(layers) == 2

    assert set([x.id for x in layers[0]]) == {"My table"}
    assert set([x.id for x in layers[1]]) == {"My column"}

    ctx.run()

    clm_data = tbl.get_series('My column')
    assert np.isclose(clm_data[0], 2.0)
    assert np.isclose(clm_data[1], 3.0)
    assert pd.isna(clm_data[2])