Beispiel #1
0
def test_invalid_init_args(diamond_es):
    error_text = "parent_dataframe must match first relationship in path"
    with pytest.raises(AssertionError, match=error_text):
        path = backward_path(diamond_es, ["stores", "transactions"])
        ft.AggregationFeature(
            ft.IdentityFeature(diamond_es["transactions"].ww["amount"]),
            "customers",
            ft.primitives.Mean,
            relationship_path=path,
        )

    error_text = (
        "Base feature must be defined on the dataframe at the end of relationship_path"
    )
    with pytest.raises(AssertionError, match=error_text):
        path = backward_path(diamond_es, ["regions", "stores"])
        ft.AggregationFeature(
            ft.IdentityFeature(diamond_es["transactions"].ww["amount"]),
            "regions",
            ft.primitives.Mean,
            relationship_path=path,
        )

    error_text = "All relationships in path must be backward"
    with pytest.raises(AssertionError, match=error_text):
        backward = backward_path(diamond_es, ["customers", "transactions"])
        forward = RelationshipPath([(True, r) for _, r in backward])
        path = RelationshipPath(list(forward) + list(backward))
        ft.AggregationFeature(
            ft.IdentityFeature(diamond_es["transactions"].ww["amount"]),
            "transactions",
            ft.primitives.Mean,
            relationship_path=path,
        )
Beispiel #2
0
def test_invalid_init_args(diamond_es):
    error_text = 'parent_entity must match first relationship in path'
    with pytest.raises(AssertionError, match=error_text):
        path = backward_path(diamond_es, ['stores', 'transactions'])
        ft.AggregationFeature(diamond_es['transactions']['amount'],
                              diamond_es['customers'],
                              ft.primitives.Mean,
                              relationship_path=path)

    error_text = 'Base feature must be defined on the entity at the end of relationship_path'
    with pytest.raises(AssertionError, match=error_text):
        path = backward_path(diamond_es, ['regions', 'stores'])
        ft.AggregationFeature(diamond_es['transactions']['amount'],
                              diamond_es['regions'],
                              ft.primitives.Mean,
                              relationship_path=path)

    error_text = 'All relationships in path must be backward'
    with pytest.raises(AssertionError, match=error_text):
        backward = backward_path(diamond_es, ['customers', 'transactions'])
        forward = RelationshipPath([(True, r) for _, r in backward])
        path = RelationshipPath(list(forward) + list(backward))
        ft.AggregationFeature(diamond_es['transactions']['amount'],
                              diamond_es['transactions'],
                              ft.primitives.Mean,
                              relationship_path=path)
Beispiel #3
0
def test_feature_trie_with_needs_full_entity(diamond_es):
    pd_es = diamond_es
    amount = ft.IdentityFeature(pd_es['transactions']['amount'])

    path_through_customers = backward_path(
        pd_es, ['regions', 'customers', 'transactions'])
    agg = ft.AggregationFeature(amount,
                                pd_es['regions'],
                                primitive=ft.primitives.Mean,
                                relationship_path=path_through_customers)
    trans_of_agg = ft.TransformFeature(agg, ft.primitives.CumSum)

    path_through_stores = backward_path(pd_es,
                                        ['regions', 'stores', 'transactions'])
    trans = ft.TransformFeature(amount, ft.primitives.CumSum)
    agg_of_trans = ft.AggregationFeature(trans,
                                         pd_es['regions'],
                                         primitive=ft.primitives.Mean,
                                         relationship_path=path_through_stores)

    features = [agg, trans_of_agg, agg_of_trans]
    feature_set = FeatureSet(features)
    trie = feature_set.feature_trie

    assert trie.value == \
        (True, {agg.unique_name(), trans_of_agg.unique_name()}, {agg_of_trans.unique_name()})
    assert trie.get_node(path_through_customers).value == \
        (True, {amount.unique_name()}, set())
    assert trie.get_node(path_through_customers[:1]).value == (True, set(),
                                                               set())
    assert trie.get_node(path_through_stores).value == \
        (True, {amount.unique_name(), trans.unique_name()}, set())
    assert trie.get_node(path_through_stores[:1]).value == (False, set(),
                                                            set())
Beispiel #4
0
def test_feature_trie_without_needs_full_entity(diamond_es):
    es = diamond_es
    country_name = ft.IdentityFeature(es['countries']['name'])
    direct_name = ft.DirectFeature(country_name, es['regions'])
    amount = ft.IdentityFeature(es['transactions']['amount'])

    path_through_customers = backward_path(
        es, ['regions', 'customers', 'transactions'])
    through_customers = ft.AggregationFeature(
        amount,
        es['regions'],
        primitive=ft.primitives.Mean,
        relationship_path=path_through_customers)
    path_through_stores = backward_path(es,
                                        ['regions', 'stores', 'transactions'])
    through_stores = ft.AggregationFeature(
        amount,
        es['regions'],
        primitive=ft.primitives.Mean,
        relationship_path=path_through_stores)
    customers_to_transactions = backward_path(es,
                                              ['customers', 'transactions'])
    customers_mean = ft.AggregationFeature(
        amount,
        es['customers'],
        primitive=ft.primitives.Mean,
        relationship_path=customers_to_transactions)

    negation = ft.TransformFeature(customers_mean, ft.primitives.Negate)
    regions_to_customers = backward_path(es, ['regions', 'customers'])
    mean_of_mean = ft.AggregationFeature(
        negation,
        es['regions'],
        primitive=ft.primitives.Mean,
        relationship_path=regions_to_customers)

    features = [direct_name, through_customers, through_stores, mean_of_mean]

    feature_set = FeatureSet(features)
    trie = feature_set.feature_trie

    assert trie.value == \
        (False, set(), {f.unique_name() for f in features})
    assert trie.get_node(direct_name.relationship_path).value == \
        (False, set(), {country_name.unique_name()})
    assert trie.get_node(regions_to_customers).value == \
        (False, set(), {negation.unique_name(), customers_mean.unique_name()})
    regions_to_stores = backward_path(es, ['regions', 'stores'])
    assert trie.get_node(regions_to_stores).value == (False, set(), set())
    assert trie.get_node(path_through_customers).value == \
        (False, set(), {amount.unique_name()})
    assert trie.get_node(path_through_stores).value == \
        (False, set(), {amount.unique_name()})
Beispiel #5
0
def test_init_with_single_possible_path(diamond_es):
    # This uses diamond_es to test that there being a cycle somewhere in the
    # graph doesn't cause an error.
    feat = ft.AggregationFeature(diamond_es['transactions']['amount'],
                                 diamond_es['customers'], ft.primitives.Mean)
    expected_path = backward_path(diamond_es, ['customers', 'transactions'])
    assert feat.relationship_path == expected_path
Beispiel #6
0
def test_feature_trie_with_needs_full_dataframe(diamond_es):
    pd_es = diamond_es
    amount = ft.IdentityFeature(pd_es["transactions"].ww["amount"])

    path_through_customers = backward_path(
        pd_es, ["regions", "customers", "transactions"]
    )
    agg = ft.AggregationFeature(
        amount,
        "regions",
        primitive=ft.primitives.Mean,
        relationship_path=path_through_customers,
    )
    trans_of_agg = ft.TransformFeature(agg, ft.primitives.CumSum)

    path_through_stores = backward_path(pd_es, ["regions", "stores", "transactions"])
    trans = ft.TransformFeature(amount, ft.primitives.CumSum)
    agg_of_trans = ft.AggregationFeature(
        trans,
        "regions",
        primitive=ft.primitives.Mean,
        relationship_path=path_through_stores,
    )

    features = [agg, trans_of_agg, agg_of_trans]
    feature_set = FeatureSet(features)
    trie = feature_set.feature_trie

    assert trie.value == (
        True,
        {agg.unique_name(), trans_of_agg.unique_name()},
        {agg_of_trans.unique_name()},
    )
    assert trie.get_node(path_through_customers).value == (
        True,
        {amount.unique_name()},
        set(),
    )
    assert trie.get_node(path_through_customers[:1]).value == (True, set(), set())
    assert trie.get_node(path_through_stores).value == (
        True,
        {amount.unique_name(), trans.unique_name()},
        set(),
    )
    assert trie.get_node(path_through_stores[:1]).value == (False, set(), set())
Beispiel #7
0
def test_name_with_multiple_possible_paths(diamond_es):
    path = backward_path(diamond_es, ['regions', 'customers', 'transactions'])
    feat = ft.AggregationFeature(diamond_es['transactions']['amount'],
                                 diamond_es['regions'],
                                 ft.primitives.Mean,
                                 relationship_path=path)

    assert feat.get_name() == "MEAN(customers.transactions.amount)"
    assert feat.relationship_path_name() == 'customers.transactions'
Beispiel #8
0
def test_init_with_single_possible_path(diamond_es):
    # This uses diamond_es to test that there being a cycle somewhere in the
    # graph doesn't cause an error.
    feat = ft.AggregationFeature(
        ft.IdentityFeature(diamond_es["transactions"].ww["amount"]),
        "customers",
        ft.primitives.Mean,
    )
    expected_path = backward_path(diamond_es, ["customers", "transactions"])
    assert feat.relationship_path == expected_path
def test_diamond_entityset(diamond_es):
    es = diamond_es

    amount = ft.IdentityFeature(es['transactions']['amount'])
    path = backward_path(es, ['regions', 'customers', 'transactions'])
    through_customers = ft.AggregationFeature(amount, es['regions'],
                                              primitive=ft.primitives.Sum,
                                              relationship_path=path)
    path = backward_path(es, ['regions', 'stores', 'transactions'])
    through_stores = ft.AggregationFeature(amount, es['regions'],
                                           primitive=ft.primitives.Sum,
                                           relationship_path=path)

    feature_set = FeatureSet([through_customers, through_stores])
    calculator = FeatureSetCalculator(es,
                                      time_last=datetime(2011, 4, 8),
                                      feature_set=feature_set)
    df = calculator.run(np.array([0, 1, 2]))
    assert (df['SUM(stores.transactions.amount)'] == [94, 261, 128]).all()
    assert (df['SUM(customers.transactions.amount)'] == [72, 411, 0]).all()
Beispiel #10
0
def test_name_with_multiple_possible_paths(diamond_es):
    path = backward_path(diamond_es, ["regions", "customers", "transactions"])
    feat = ft.AggregationFeature(
        ft.IdentityFeature(diamond_es["transactions"].ww["amount"]),
        "regions",
        ft.primitives.Mean,
        relationship_path=path,
    )

    assert feat.get_name() == "MEAN(customers.transactions.amount)"
    assert feat.relationship_path_name() == "customers.transactions"
Beispiel #11
0
def test_init_with_multiple_possible_paths(diamond_es):
    error_text = "There are multiple possible paths to the base entity. " \
                 "You must specify a relationship path."
    with pytest.raises(RuntimeError, match=error_text):
        ft.AggregationFeature(diamond_es['transactions']['amount'],
                              diamond_es['regions'], ft.primitives.Mean)

    # Does not raise if path specified.
    path = backward_path(diamond_es, ['regions', 'customers', 'transactions'])
    ft.AggregationFeature(diamond_es['transactions']['amount'],
                          diamond_es['regions'],
                          ft.primitives.Mean,
                          relationship_path=path)
Beispiel #12
0
def test_diamond_entityset(diamond_es):
    es = diamond_es

    amount = ft.IdentityFeature(es["transactions"].ww["amount"])
    path = backward_path(es, ["regions", "customers", "transactions"])
    through_customers = ft.AggregationFeature(amount,
                                              "regions",
                                              primitive=ft.primitives.Sum,
                                              relationship_path=path)
    path = backward_path(es, ["regions", "stores", "transactions"])
    through_stores = ft.AggregationFeature(amount,
                                           "regions",
                                           primitive=ft.primitives.Sum,
                                           relationship_path=path)

    feature_set = FeatureSet([through_customers, through_stores])
    calculator = FeatureSetCalculator(es,
                                      time_last=datetime(2011, 4, 8),
                                      feature_set=feature_set)
    df = calculator.run(np.array([0, 1, 2]))
    df = to_pandas(df, index="id", sort_index=True)

    assert (df["SUM(stores.transactions.amount)"] == [94, 261, 128]).all()
    assert (df["SUM(customers.transactions.amount)"] == [72, 411, 0]).all()
Beispiel #13
0
def test_init_with_multiple_possible_paths(diamond_es):
    error_text = (
        "There are multiple possible paths to the base dataframe. "
        "You must specify a relationship path."
    )
    with pytest.raises(RuntimeError, match=error_text):
        ft.AggregationFeature(
            ft.IdentityFeature(diamond_es["transactions"].ww["amount"]),
            "regions",
            ft.primitives.Mean,
        )

    # Does not raise if path specified.
    path = backward_path(diamond_es, ["regions", "customers", "transactions"])
    ft.AggregationFeature(
        ft.IdentityFeature(diamond_es["transactions"].ww["amount"]),
        "regions",
        ft.primitives.Mean,
        relationship_path=path,
    )
def test_get_backward_entities_deep(es):
    entities = es.get_backward_entities('customers', deep=True)
    path_to_log = backward_path(es, ['customers', 'sessions', 'log'])
    path_to_sessions = backward_path(es, ['customers', 'sessions'])
    assert list(entities) == [('sessions', path_to_sessions),
                              ('log', path_to_log)]
Beispiel #15
0
def test_get_backward_dataframes_deep(es):
    dataframes = es.get_backward_dataframes("customers", deep=True)
    path_to_log = backward_path(es, ["customers", "sessions", "log"])
    path_to_sessions = backward_path(es, ["customers", "sessions"])
    assert list(dataframes) == [("sessions", path_to_sessions), ("log", path_to_log)]
Beispiel #16
0
def test_get_backward_dataframes(es):
    dataframes = es.get_backward_dataframes("customers")
    path_to_sessions = backward_path(es, ["customers", "sessions"])
    assert list(dataframes) == [("sessions", path_to_sessions)]
def test_get_backward_entities(es):
    entities = es.get_backward_entities('customers')
    path_to_sessions = backward_path(es, ['customers', 'sessions'])
    assert list(entities) == [('sessions', path_to_sessions)]
Beispiel #18
0
def test_feature_trie_without_needs_full_dataframe(diamond_es):
    es = diamond_es
    country_name = ft.IdentityFeature(es["countries"].ww["name"])
    direct_name = ft.DirectFeature(country_name, "regions")
    amount = ft.IdentityFeature(es["transactions"].ww["amount"])

    path_through_customers = backward_path(es, ["regions", "customers", "transactions"])
    through_customers = ft.AggregationFeature(
        amount,
        "regions",
        primitive=ft.primitives.Mean,
        relationship_path=path_through_customers,
    )
    path_through_stores = backward_path(es, ["regions", "stores", "transactions"])
    through_stores = ft.AggregationFeature(
        amount,
        "regions",
        primitive=ft.primitives.Mean,
        relationship_path=path_through_stores,
    )
    customers_to_transactions = backward_path(es, ["customers", "transactions"])
    customers_mean = ft.AggregationFeature(
        amount,
        "customers",
        primitive=ft.primitives.Mean,
        relationship_path=customers_to_transactions,
    )

    negation = ft.TransformFeature(customers_mean, ft.primitives.Negate)
    regions_to_customers = backward_path(es, ["regions", "customers"])
    mean_of_mean = ft.AggregationFeature(
        negation,
        "regions",
        primitive=ft.primitives.Mean,
        relationship_path=regions_to_customers,
    )

    features = [direct_name, through_customers, through_stores, mean_of_mean]

    feature_set = FeatureSet(features)
    trie = feature_set.feature_trie

    assert trie.value == (False, set(), {f.unique_name() for f in features})
    assert trie.get_node(direct_name.relationship_path).value == (
        False,
        set(),
        {country_name.unique_name()},
    )
    assert trie.get_node(regions_to_customers).value == (
        False,
        set(),
        {negation.unique_name(), customers_mean.unique_name()},
    )
    regions_to_stores = backward_path(es, ["regions", "stores"])
    assert trie.get_node(regions_to_stores).value == (False, set(), set())
    assert trie.get_node(path_through_customers).value == (
        False,
        set(),
        {amount.unique_name()},
    )
    assert trie.get_node(path_through_stores).value == (
        False,
        set(),
        {amount.unique_name()},
    )