Example #1
0
def test_aggregate_csql():
    ctx = Prosto("My Prosto")

    facts_df = pd.DataFrame({
        'A': ['a', 'a', 'b', 'b'],
        'M': [1.0, 2.0, 3.0, 4.0],
        'N': [4.0, 3.0, 2.0, 1.0]
    })
    groups_df = pd.DataFrame({'A': ['a', 'b', 'c']})

    ctx.column_sql("TABLE  Facts (A, M, N)", lambda **m: facts_df)
    ctx.column_sql("TABLE  Groups (A)", lambda **m: groups_df)

    ctx.column_sql("LINK  Facts (A) -> new_column -> Groups (A)")
    ctx.column_sql("AGGREGATE  Facts (M) -> new_column -> Groups (Aggregate)",
                   lambda x, bias, **model: x.sum() + bias, {"bias": 0.0})

    assert ctx.get_table("Facts")
    assert ctx.get_table("Groups")
    assert ctx.get_column("Facts", "new_column")

    ctx.run()

    assert list(
        ctx.get_table("Groups").get_series('Aggregate')) == [3.0, 7.0, 0.0]
Example #2
0
def test_calc_csql():
    #
    # Test 2: function in-query
    #
    ctx = Prosto("My Prosto")

    ctx.column_sql("TABLE  My_table (A) FUNC lambda **m: pd.DataFrame({'A': [1, 2, 3]})")
    ctx.column_sql("CALCULATE  My_table (A) -> new_column FUNC lambda x: float(x)")

    assert ctx.get_table("My_table")
    assert ctx.get_column("My_table", "new_column")

    ctx.run()

    assert list(ctx.get_table("My_table").get_series('new_column')) == [1.0, 2.0, 3.0]

    #
    # Test 2: function by-reference
    #
    ctx = Prosto("My Prosto")

    df = pd.DataFrame({'A': [1, 2, 3]})  # Use FUNC "lambda **m: df" (df cannot be resolved during population)

    ctx.column_sql("TABLE  My_table (A)", df)
    ctx.column_sql("CALCULATE My_table (A) -> new_column", lambda x: float(x))

    assert ctx.get_table("My_table")
    assert ctx.get_column("My_table", "new_column")

    ctx.run()

    assert list(ctx.get_table("My_table").get_series('new_column')) == [1.0, 2.0, 3.0]
Example #3
0
def test_roll_csql():
    ctx = Prosto("My Prosto")

    df = pd.DataFrame({'A': [1.0, 2.0, 3.0]})

    ctx.column_sql("TABLE  My_table (A)", lambda **m: df)
    ctx.column_sql("ROLL  My_table (A) -> new_column WINDOW 2",
                   lambda x: x.sum())

    assert ctx.get_table("My_table")
    assert ctx.get_column("My_table", "new_column")

    ctx.run()

    assert list(ctx.get_table("My_table").get_series('new_column')) == [
        None, 3.0, 5.0
    ]
Example #4
0
def test_link_csql():
    ctx = Prosto("My Prosto")

    facts_df = pd.DataFrame({'A': ['a', 'a', 'b', 'b']})
    groups_df = pd.DataFrame({'A': ['a', 'b', 'c']})

    ctx.column_sql("TABLE  Facts (A)", lambda **m: facts_df)
    ctx.column_sql("TABLE  Groups (A)", lambda **m: groups_df)
    ctx.column_sql("LINK  Facts (A) -> new_column -> Groups (A)")

    assert ctx.get_table("Facts")
    assert ctx.get_table("Groups")
    assert ctx.get_column("Facts", "new_column")

    ctx.run()

    assert list(ctx.get_table("Facts").get_series('new_column')) == [0, 0, 1, 1]
Example #5
0
def test_csql_project():
    ctx = Prosto("My Prosto")

    facts_df = pd.DataFrame({'A': ['a', 'a', 'b', 'b']})

    ctx.column_sql("TABLE  Facts (A)", lambda **m: facts_df)
    ctx.column_sql("PROJECT  Facts (A) -> new_column -> Groups (A)")

    assert ctx.get_table("Facts")
    assert ctx.get_table("Groups")
    assert ctx.get_column("Facts", "new_column")

    ctx.run()

    assert len(ctx.get_table("Groups").get_df()) == 2
    assert len(ctx.get_table("Groups").get_df().columns) == 1
    assert list(ctx.get_table("Facts").get_series('new_column')) == [0, 0, 1, 1]
Example #6
0
def test_product_csql():
    ctx = Prosto("My Prosto")

    t1_df = pd.DataFrame({'A': [1.0, 2.0, 3.0]})
    t2_df = pd.DataFrame({'B': ['x', 'y', 'z']})

    ctx.column_sql("TABLE  Table_1 (A)", lambda **m: t1_df)
    ctx.column_sql("TABLE  Table_2 (B)", lambda **m: t2_df)
    ctx.column_sql("PRODUCT  Table_1; Table_2 -> t1; t2 -> Product")

    assert ctx.get_table("Product")

    ctx.run()

    product = ctx.get_table("Product")

    assert len(product.get_df().columns) == 2
    assert len(product.get_df()) == 9

    assert product.get_df().columns.to_list() == ["t1", "t2"]
Example #7
0
def test_filter_csql():
    ctx = Prosto("My Prosto")

    base_df = pd.DataFrame({'A': [1.0, 2.0, 3.0], 'B': ['x', 'yy', 'zzz']})

    ctx.column_sql("TABLE  Base (A, B)", lambda **m: base_df)
    ctx.column_sql("CALCULATE  Base (A, B) -> filter_column", lambda x, param:
                   (x['A'] > param) & (len(x['B']) < 3), {"param": 1.5})
    ctx.column_sql("FILTER Base (filter_column) -> super -> Filtered")

    assert ctx.get_table("Base")
    assert ctx.get_table("Filtered")

    ctx.run()

    assert list(ctx.get_table("Filtered").get_series('super')) == [1]

    #
    # Filter with a predicate function and no explicit calculate column
    #
    ctx = Prosto("My Prosto")

    base_df = pd.DataFrame({'A': [1.0, 2.0, 3.0], 'B': ['x', 'yy', 'zzz']})

    ctx.column_sql("TABLE  Base (A, B)", base_df)
    ctx.column_sql("FILTER Base (A, B) -> super -> Filtered", lambda x, param:
                   (x['A'] > param) & (len(x['B']) < 3), {"param": 1.5})

    assert ctx.get_table("Base")
    assert ctx.get_table("Filtered")

    ctx.run()

    assert list(ctx.get_table("Filtered").get_series('super')) == [1]
Example #8
0
def test_filter_calculate():
    """
    Test resolution of inherited attributes which do not exist in the filtered table but must be automatically merged from the base table.
    Scenario: populate, filter, calculate column in filtered table using column in base table (which has to be inherited)
    """
    ctx = Prosto("My Prosto")

    base_df = pd.DataFrame({'A': [1.0, 2.0, 3.0], 'B': ['x', 'yy', 'zzz']})

    ctx.column_sql("TABLE  Base (A, B)", lambda **m: base_df)
    ctx.column_sql("FILTER Base (A) -> super -> Filtered", lambda x: x < 3.0)
    ctx.column_sql(
        "CALCULATE  Filtered (B) -> filter_column",  # <-- Here we use columns A and B which exist only in the base table
        lambda x: len(x))

    ctx.run()

    assert ctx.get_table("Filtered").get_series('filter_column').to_list() == [
        1, 2
    ]
Example #9
0
def test_filter_project():
    """
    Test resolution of inherited attributes which do not exist in the filtered table but must be automatically merged from the base table.
    Scenario: populate, filter, project the filtered table using a column in the base table (which has to be inherited)
    """
    ctx = Prosto("My Prosto")

    base_df = pd.DataFrame({
        'A': [1.0, 2.0, 3.0, 4.0],
        'B': ['x', 'x', 'y', 'zzz']
    })

    ctx.column_sql("TABLE  Base(A, B)", lambda **m: base_df)
    ctx.column_sql("FILTER Base (A, B) -> super -> Filtered",
                   lambda x: x['A'] < 4.0)
    ctx.column_sql("FILTER Filtered (A) -> super -> Filtered_2",
                   lambda x: x < 3.0)
    ctx.column_sql(
        "PROJECT Filtered_2 (B) -> new_column -> Groups(C)"
    )  # <-- Here we use columns which exist only in the base table

    ctx.run()

    assert ctx.get_table("Groups").get_series('C').to_list() == ['x']