Example #1
0
def test_calc_csql():
    #
    # Test 2: function in-query
    #
    ctx = Prosto("My Prosto")

    ctx.column_sql("TABLE  My_table (A) FUNC lambda **m: pd.DataFrame({'A': [1, 2, 3]})")
    ctx.column_sql("CALCULATE  My_table (A) -> new_column FUNC lambda x: float(x)")

    assert ctx.get_table("My_table")
    assert ctx.get_column("My_table", "new_column")

    ctx.run()

    assert list(ctx.get_table("My_table").get_series('new_column')) == [1.0, 2.0, 3.0]

    #
    # Test 2: function by-reference
    #
    ctx = Prosto("My Prosto")

    df = pd.DataFrame({'A': [1, 2, 3]})  # Use FUNC "lambda **m: df" (df cannot be resolved during population)

    ctx.column_sql("TABLE  My_table (A)", df)
    ctx.column_sql("CALCULATE My_table (A) -> new_column", lambda x: float(x))

    assert ctx.get_table("My_table")
    assert ctx.get_column("My_table", "new_column")

    ctx.run()

    assert list(ctx.get_table("My_table").get_series('new_column')) == [1.0, 2.0, 3.0]
Example #2
0
def test_aggregate_csql():
    ctx = Prosto("My Prosto")

    facts_df = pd.DataFrame({
        'A': ['a', 'a', 'b', 'b'],
        'M': [1.0, 2.0, 3.0, 4.0],
        'N': [4.0, 3.0, 2.0, 1.0]
    })
    groups_df = pd.DataFrame({'A': ['a', 'b', 'c']})

    ctx.column_sql("TABLE  Facts (A, M, N)", lambda **m: facts_df)
    ctx.column_sql("TABLE  Groups (A)", lambda **m: groups_df)

    ctx.column_sql("LINK  Facts (A) -> new_column -> Groups (A)")
    ctx.column_sql("AGGREGATE  Facts (M) -> new_column -> Groups (Aggregate)",
                   lambda x, bias, **model: x.sum() + bias, {"bias": 0.0})

    assert ctx.get_table("Facts")
    assert ctx.get_table("Groups")
    assert ctx.get_column("Facts", "new_column")

    ctx.run()

    assert list(
        ctx.get_table("Groups").get_series('Aggregate')) == [3.0, 7.0, 0.0]
Example #3
0
def test_roll_csql():
    ctx = Prosto("My Prosto")

    df = pd.DataFrame({'A': [1.0, 2.0, 3.0]})

    ctx.column_sql("TABLE  My_table (A)", lambda **m: df)
    ctx.column_sql("ROLL  My_table (A) -> new_column WINDOW 2",
                   lambda x: x.sum())

    assert ctx.get_table("My_table")
    assert ctx.get_column("My_table", "new_column")

    ctx.run()

    assert list(ctx.get_table("My_table").get_series('new_column')) == [
        None, 3.0, 5.0
    ]
Example #4
0
def test_link_csql():
    ctx = Prosto("My Prosto")

    facts_df = pd.DataFrame({'A': ['a', 'a', 'b', 'b']})
    groups_df = pd.DataFrame({'A': ['a', 'b', 'c']})

    ctx.column_sql("TABLE  Facts (A)", lambda **m: facts_df)
    ctx.column_sql("TABLE  Groups (A)", lambda **m: groups_df)
    ctx.column_sql("LINK  Facts (A) -> new_column -> Groups (A)")

    assert ctx.get_table("Facts")
    assert ctx.get_table("Groups")
    assert ctx.get_column("Facts", "new_column")

    ctx.run()

    assert list(ctx.get_table("Facts").get_series('new_column')) == [0, 0, 1, 1]
Example #5
0
def test_csql_project():
    ctx = Prosto("My Prosto")

    facts_df = pd.DataFrame({'A': ['a', 'a', 'b', 'b']})

    ctx.column_sql("TABLE  Facts (A)", lambda **m: facts_df)
    ctx.column_sql("PROJECT  Facts (A) -> new_column -> Groups (A)")

    assert ctx.get_table("Facts")
    assert ctx.get_table("Groups")
    assert ctx.get_column("Facts", "new_column")

    ctx.run()

    assert len(ctx.get_table("Groups").get_df()) == 2
    assert len(ctx.get_table("Groups").get_df().columns) == 1
    assert list(ctx.get_table("Facts").get_series('new_column')) == [0, 0, 1, 1]