Example #1
0
def test_calculate_value():
    ctx = Prosto("My Prosto")

    tbl = ctx.populate(
        table_name="My table", attributes=["A"],
        func="lambda **m: pd.DataFrame({'A': [1, 2, 3]})", tables=[]
    )

    clm = ctx.calculate(
        name="My column", table=tbl.id,
        func="lambda x: float(x)", columns=["A"], model=None
    )

    tbl.evaluate()
    clm.evaluate()

    clm_data = tbl.get_series('My column')
    v0 = clm_data[0]
    v1 = clm_data[1]
    v2 = clm_data[2]

    assert np.isclose(v0, 1.0)
    assert np.isclose(v1, 2.0)
    assert np.isclose(v2, 3.0)

    assert isinstance(v0, float)
    assert isinstance(v1, float)
    assert isinstance(v2, float)
Example #2
0
def test_filter_inheritance():
    """Test topology augmentation. Use columns from the parent table by automatically adding the merge operation to topology."""
    ctx = Prosto("My Prosto")

    base_tbl = ctx.populate(
        table_name="Base table",
        attributes=["A", "B"],
        func=
        "lambda **m: pd.DataFrame({'A': [1.0, 2.0, 3.0], 'B': ['x', 'yy', 'zzz']})",
        tables=[])

    # This (boolean) column will be used for filtering
    clm = ctx.compute(
        name="filter_column",
        table=base_tbl.id,
        func=
        "lambda x, param: (x['A'] > param) & (x['B'].str.len() < 3)",  # Return a boolean Series
        columns=["A", "B"],
        model={"param": 1.5})

    f_tbl = ctx.filter(table_name="Filtered table",
                       attributes=["super"],
                       func=None,
                       tables=["Base table"],
                       columns=["filter_column"])

    # In this calculate column, we use a column of the filtered table which actually exists only in the base table
    clm = ctx.calculate(name="My column",
                        table=f_tbl.id,
                        func="lambda x: x + 1.0",
                        columns=["A"],
                        model=None)

    ctx.run()

    clm_data = f_tbl.get_series('My column')

    assert np.isclose(len(clm_data), 1)
    assert np.isclose(clm_data[0], 3.0)

    # This column had to be added automatically by the augmentation procedure
    # It is inherited from the base table and materialized via merge operation
    # It stores original values of the inherited base column
    clm_data = f_tbl.get_series('A')
    assert np.isclose(clm_data[0], 2)
Example #3
0
def test_product_inheritance():
    """
    We add an addition calculate column to the product table which uses a column of a base table.
    The system has to automatically insert a new operation by resolving this missing column.
    """
    ctx = Prosto("My Prosto")

    t1 = ctx.populate(
        table_name="Table 1", attributes=["A"],
        func="lambda **m: pd.DataFrame({'A': [1.0, 2.0, 3.0]})", tables=[]
    )

    t2 = ctx.populate(
        table_name="Table 2", attributes=["B"],
        func="lambda **m: pd.DataFrame({'B': ['x', 'y', 'z']})", tables=[]
    )

    product = ctx.product(
        table_name="Product", attributes=["t1", "t2"],
        tables=["Table 1", "Table 2"]
    )

    # In this calculate column, we use a column of the product table which actually exists only in a base table
    clm = ctx.calculate(
        name="My column", table=product.id,
        func="lambda x: x + 1.0", columns=["A"], model=None
    )

    ctx.run()

    # We get two columns in addition to two attributes: one merge (augmented) and one calculate column
    assert len(product.get_df().columns) == 4

    clm_data = product.get_series('My column')

    assert clm_data.to_list() == [2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0]
Example #4
0
def test_calculate_with_path():
    """Test topology augmentation. Calculation with column paths which have to be automatically produce merge operation."""
    ctx = Prosto("My Prosto")

    # Facts
    f_tbl = ctx.populate(
        table_name="Facts", attributes=["A", "M"],
        func="lambda **m: pd.DataFrame({'A': ['a', 'a', 'b', 'b'], 'M': [1.0, 2.0, 3.0, 4.0]})", tables=[]
    )

    # Groups
    df = pd.DataFrame({'A': ['a', 'b', 'c'], 'B': [3.0, 2.0, 1.0]})
    g_tbl = ctx.populate(
        table_name="Groups", attributes=["A", "B"],
        func="lambda **m: pd.DataFrame({'A': ['a', 'b', 'c'], 'B': [3.0, 2.0, 1.0]})", tables=[]
    )

    # Link
    l_clm = ctx.link(
        name="Link", table=f_tbl.id, type=g_tbl.id,
        columns=["A"], linked_columns=["A"]
    )

    # Calculate
    clm = ctx.calculate(
        name="My column", table=f_tbl.id,
        func="lambda x: x['M'] + x['Link::B']", columns=["M", "Link::B"], model=None
    )

    ctx.run()

    clm_data = f_tbl.get_series('My column')
    assert clm_data[0] == 4.0
    assert clm_data[1] == 5.0
    assert clm_data[2] == 5.0
    assert clm_data[3] == 6.0
Example #5
0
def test_calculate_value():
    ctx = Prosto("My Prosto")
    ctx.incremental = True

    tbl = ctx.create_table(
        table_name="My table",
        attributes=["A"],
    )

    clm = ctx.calculate(name="My column",
                        table=tbl.id,
                        func="lambda x: float(x)",
                        columns=["A"],
                        model=None)

    ctx.run()  # Inference on empty data

    tbl.data.add({"A": 1})  # New record is added and marked as added

    # Assert new change status
    assert tbl.data.added_length() == 1

    ctx.run()

    # Assert clean change status and results of inference
    assert tbl.data.added_length() == 0

    tbl.data.add({"A": 2})
    tbl.data.add({"A": 3})

    # Assert new change status
    assert tbl.data.added_length() == 2

    # For debug purpose, modify an old row (which has not been recently added but was evaluated before)
    tbl_df = tbl.data.get_df()
    tbl_df['A'][0] = 10  # Old value is 1. Prosto does not see this change

    ctx.run()

    # The manual modification is invisible for Prosto and hence it should not be re-computed and the derived column will have to have the old value
    assert tbl_df['My column'][0] == 1

    # Assert clean change status and results of inference
    assert tbl.data.added_length() == 0

    tbl.data.remove(1)  # Remove one oldest record by marking it as removed

    # Assert new change status
    assert tbl.data.removed_length() == 1

    ctx.run()

    # Assert clean change status and results of inference
    assert tbl.data.removed_length() == 0

    tbl.data.remove_all()  # Remove all records by marking them as removed

    # Assert new change status

    ctx.run()

    # Assert clean change status and results of inference
    assert tbl.data.added_range.start == 3
    assert tbl.data.added_range.end == 3
    assert tbl.data.removed_range.start == 3
    assert tbl.data.removed_range.end == 3