def test_filter_table(): ctx = Prosto("My Prosto") tbl = ctx.populate( table_name="Base table", attributes=["A", "B"], func= "lambda **m: pd.DataFrame({'A': [1.0, 2.0, 3.0], 'B': ['x', 'yy', 'zzz']})", tables=[]) # This (boolean) column will be used for filtering clm = ctx.compute( name="filter_column", table=tbl.id, func= "lambda x, param: (x['A'] > param) & (x['B'].str.len() < 3)", # Return a boolean Series columns=["A", "B"], model={"param": 1.5}) tbl.evaluate() clm.evaluate() tbl = ctx.filter(table_name="Filtered table", attributes=["super"], func=None, tables=["Base table"], columns=["filter_column"]) tbl.evaluate() assert len(tbl.get_df().columns) == 1 # Only one link-attribute is created assert len(tbl.get_df()) == 1 assert tbl.get_df()['super'][0] == 1 # # Test topology # topology = Topology(ctx) topology.translate() layers = topology.elem_layers assert len(layers) == 3 assert set([x.id for x in layers[0]]) == {"Base table"} assert set([x.id for x in layers[1]]) == {"filter_column"} assert set([x.id for x in layers[2]]) == {"Filtered table"}
def test_filter_inheritance(): """Test topology augmentation. Use columns from the parent table by automatically adding the merge operation to topology.""" ctx = Prosto("My Prosto") base_tbl = ctx.populate( table_name="Base table", attributes=["A", "B"], func= "lambda **m: pd.DataFrame({'A': [1.0, 2.0, 3.0], 'B': ['x', 'yy', 'zzz']})", tables=[]) # This (boolean) column will be used for filtering clm = ctx.compute( name="filter_column", table=base_tbl.id, func= "lambda x, param: (x['A'] > param) & (x['B'].str.len() < 3)", # Return a boolean Series columns=["A", "B"], model={"param": 1.5}) f_tbl = ctx.filter(table_name="Filtered table", attributes=["super"], func=None, tables=["Base table"], columns=["filter_column"]) # In this calculate column, we use a column of the filtered table which actually exists only in the base table clm = ctx.calculate(name="My column", table=f_tbl.id, func="lambda x: x + 1.0", columns=["A"], model=None) ctx.run() clm_data = f_tbl.get_series('My column') assert np.isclose(len(clm_data), 1) assert np.isclose(clm_data[0], 3.0) # This column had to be added automatically by the augmentation procedure # It is inherited from the base table and materialized via merge operation # It stores original values of the inherited base column clm_data = f_tbl.get_series('A') assert np.isclose(clm_data[0], 2)
def test_compute(): ctx = Prosto("My Prosto") tbl = ctx.populate( table_name="My table", attributes=["A"], func="lambda **m: pd.DataFrame({'A': [1, 2, 3]})", tables=[] ) clm = ctx.compute( name="My column", table=tbl.id, func="lambda x, **model: x.shift(**model)", columns=["A"], model={"periods": -1} ) tbl.evaluate() clm.evaluate() clm_data = tbl.get_series('My column') assert np.isclose(clm_data[0], 2.0) assert np.isclose(clm_data[1], 3.0) assert pd.isna(clm_data[2]) # # Test topology # topology = Topology(ctx) topology.translate() # All data will be reset layers = topology.elem_layers assert len(layers) == 2 assert set([x.id for x in layers[0]]) == {"My table"} assert set([x.id for x in layers[1]]) == {"My column"} ctx.run() clm_data = tbl.get_series('My column') assert np.isclose(clm_data[0], 2.0) assert np.isclose(clm_data[1], 3.0) assert pd.isna(clm_data[2])