def test_calc_csql(): # # Test 2: function in-query # ctx = Prosto("My Prosto") ctx.column_sql("TABLE My_table (A) FUNC lambda **m: pd.DataFrame({'A': [1, 2, 3]})") ctx.column_sql("CALCULATE My_table (A) -> new_column FUNC lambda x: float(x)") assert ctx.get_table("My_table") assert ctx.get_column("My_table", "new_column") ctx.run() assert list(ctx.get_table("My_table").get_series('new_column')) == [1.0, 2.0, 3.0] # # Test 2: function by-reference # ctx = Prosto("My Prosto") df = pd.DataFrame({'A': [1, 2, 3]}) # Use FUNC "lambda **m: df" (df cannot be resolved during population) ctx.column_sql("TABLE My_table (A)", df) ctx.column_sql("CALCULATE My_table (A) -> new_column", lambda x: float(x)) assert ctx.get_table("My_table") assert ctx.get_column("My_table", "new_column") ctx.run() assert list(ctx.get_table("My_table").get_series('new_column')) == [1.0, 2.0, 3.0]
def test_aggregate_csql(): ctx = Prosto("My Prosto") facts_df = pd.DataFrame({ 'A': ['a', 'a', 'b', 'b'], 'M': [1.0, 2.0, 3.0, 4.0], 'N': [4.0, 3.0, 2.0, 1.0] }) groups_df = pd.DataFrame({'A': ['a', 'b', 'c']}) ctx.column_sql("TABLE Facts (A, M, N)", lambda **m: facts_df) ctx.column_sql("TABLE Groups (A)", lambda **m: groups_df) ctx.column_sql("LINK Facts (A) -> new_column -> Groups (A)") ctx.column_sql("AGGREGATE Facts (M) -> new_column -> Groups (Aggregate)", lambda x, bias, **model: x.sum() + bias, {"bias": 0.0}) assert ctx.get_table("Facts") assert ctx.get_table("Groups") assert ctx.get_column("Facts", "new_column") ctx.run() assert list( ctx.get_table("Groups").get_series('Aggregate')) == [3.0, 7.0, 0.0]
def test_roll_csql(): ctx = Prosto("My Prosto") df = pd.DataFrame({'A': [1.0, 2.0, 3.0]}) ctx.column_sql("TABLE My_table (A)", lambda **m: df) ctx.column_sql("ROLL My_table (A) -> new_column WINDOW 2", lambda x: x.sum()) assert ctx.get_table("My_table") assert ctx.get_column("My_table", "new_column") ctx.run() assert list(ctx.get_table("My_table").get_series('new_column')) == [ None, 3.0, 5.0 ]
def test_link_csql(): ctx = Prosto("My Prosto") facts_df = pd.DataFrame({'A': ['a', 'a', 'b', 'b']}) groups_df = pd.DataFrame({'A': ['a', 'b', 'c']}) ctx.column_sql("TABLE Facts (A)", lambda **m: facts_df) ctx.column_sql("TABLE Groups (A)", lambda **m: groups_df) ctx.column_sql("LINK Facts (A) -> new_column -> Groups (A)") assert ctx.get_table("Facts") assert ctx.get_table("Groups") assert ctx.get_column("Facts", "new_column") ctx.run() assert list(ctx.get_table("Facts").get_series('new_column')) == [0, 0, 1, 1]
def test_csql_project(): ctx = Prosto("My Prosto") facts_df = pd.DataFrame({'A': ['a', 'a', 'b', 'b']}) ctx.column_sql("TABLE Facts (A)", lambda **m: facts_df) ctx.column_sql("PROJECT Facts (A) -> new_column -> Groups (A)") assert ctx.get_table("Facts") assert ctx.get_table("Groups") assert ctx.get_column("Facts", "new_column") ctx.run() assert len(ctx.get_table("Groups").get_df()) == 2 assert len(ctx.get_table("Groups").get_df().columns) == 1 assert list(ctx.get_table("Facts").get_series('new_column')) == [0, 0, 1, 1]