def test_invalid_init_args(diamond_es): error_text = "parent_dataframe must match first relationship in path" with pytest.raises(AssertionError, match=error_text): path = backward_path(diamond_es, ["stores", "transactions"]) ft.AggregationFeature( ft.IdentityFeature(diamond_es["transactions"].ww["amount"]), "customers", ft.primitives.Mean, relationship_path=path, ) error_text = ( "Base feature must be defined on the dataframe at the end of relationship_path" ) with pytest.raises(AssertionError, match=error_text): path = backward_path(diamond_es, ["regions", "stores"]) ft.AggregationFeature( ft.IdentityFeature(diamond_es["transactions"].ww["amount"]), "regions", ft.primitives.Mean, relationship_path=path, ) error_text = "All relationships in path must be backward" with pytest.raises(AssertionError, match=error_text): backward = backward_path(diamond_es, ["customers", "transactions"]) forward = RelationshipPath([(True, r) for _, r in backward]) path = RelationshipPath(list(forward) + list(backward)) ft.AggregationFeature( ft.IdentityFeature(diamond_es["transactions"].ww["amount"]), "transactions", ft.primitives.Mean, relationship_path=path, )
def test_invalid_init_args(diamond_es): error_text = 'parent_entity must match first relationship in path' with pytest.raises(AssertionError, match=error_text): path = backward_path(diamond_es, ['stores', 'transactions']) ft.AggregationFeature(diamond_es['transactions']['amount'], diamond_es['customers'], ft.primitives.Mean, relationship_path=path) error_text = 'Base feature must be defined on the entity at the end of relationship_path' with pytest.raises(AssertionError, match=error_text): path = backward_path(diamond_es, ['regions', 'stores']) ft.AggregationFeature(diamond_es['transactions']['amount'], diamond_es['regions'], ft.primitives.Mean, relationship_path=path) error_text = 'All relationships in path must be backward' with pytest.raises(AssertionError, match=error_text): backward = backward_path(diamond_es, ['customers', 'transactions']) forward = RelationshipPath([(True, r) for _, r in backward]) path = RelationshipPath(list(forward) + list(backward)) ft.AggregationFeature(diamond_es['transactions']['amount'], diamond_es['transactions'], ft.primitives.Mean, relationship_path=path)
def test_feature_trie_with_needs_full_entity(diamond_es): pd_es = diamond_es amount = ft.IdentityFeature(pd_es['transactions']['amount']) path_through_customers = backward_path( pd_es, ['regions', 'customers', 'transactions']) agg = ft.AggregationFeature(amount, pd_es['regions'], primitive=ft.primitives.Mean, relationship_path=path_through_customers) trans_of_agg = ft.TransformFeature(agg, ft.primitives.CumSum) path_through_stores = backward_path(pd_es, ['regions', 'stores', 'transactions']) trans = ft.TransformFeature(amount, ft.primitives.CumSum) agg_of_trans = ft.AggregationFeature(trans, pd_es['regions'], primitive=ft.primitives.Mean, relationship_path=path_through_stores) features = [agg, trans_of_agg, agg_of_trans] feature_set = FeatureSet(features) trie = feature_set.feature_trie assert trie.value == \ (True, {agg.unique_name(), trans_of_agg.unique_name()}, {agg_of_trans.unique_name()}) assert trie.get_node(path_through_customers).value == \ (True, {amount.unique_name()}, set()) assert trie.get_node(path_through_customers[:1]).value == (True, set(), set()) assert trie.get_node(path_through_stores).value == \ (True, {amount.unique_name(), trans.unique_name()}, set()) assert trie.get_node(path_through_stores[:1]).value == (False, set(), set())
def test_feature_trie_without_needs_full_entity(diamond_es): es = diamond_es country_name = ft.IdentityFeature(es['countries']['name']) direct_name = ft.DirectFeature(country_name, es['regions']) amount = ft.IdentityFeature(es['transactions']['amount']) path_through_customers = backward_path( es, ['regions', 'customers', 'transactions']) through_customers = ft.AggregationFeature( amount, es['regions'], primitive=ft.primitives.Mean, relationship_path=path_through_customers) path_through_stores = backward_path(es, ['regions', 'stores', 'transactions']) through_stores = ft.AggregationFeature( amount, es['regions'], primitive=ft.primitives.Mean, relationship_path=path_through_stores) customers_to_transactions = backward_path(es, ['customers', 'transactions']) customers_mean = ft.AggregationFeature( amount, es['customers'], primitive=ft.primitives.Mean, relationship_path=customers_to_transactions) negation = ft.TransformFeature(customers_mean, ft.primitives.Negate) regions_to_customers = backward_path(es, ['regions', 'customers']) mean_of_mean = ft.AggregationFeature( negation, es['regions'], primitive=ft.primitives.Mean, relationship_path=regions_to_customers) features = [direct_name, through_customers, through_stores, mean_of_mean] feature_set = FeatureSet(features) trie = feature_set.feature_trie assert trie.value == \ (False, set(), {f.unique_name() for f in features}) assert trie.get_node(direct_name.relationship_path).value == \ (False, set(), {country_name.unique_name()}) assert trie.get_node(regions_to_customers).value == \ (False, set(), {negation.unique_name(), customers_mean.unique_name()}) regions_to_stores = backward_path(es, ['regions', 'stores']) assert trie.get_node(regions_to_stores).value == (False, set(), set()) assert trie.get_node(path_through_customers).value == \ (False, set(), {amount.unique_name()}) assert trie.get_node(path_through_stores).value == \ (False, set(), {amount.unique_name()})
def test_init_with_single_possible_path(diamond_es): # This uses diamond_es to test that there being a cycle somewhere in the # graph doesn't cause an error. feat = ft.AggregationFeature(diamond_es['transactions']['amount'], diamond_es['customers'], ft.primitives.Mean) expected_path = backward_path(diamond_es, ['customers', 'transactions']) assert feat.relationship_path == expected_path
def test_feature_trie_with_needs_full_dataframe(diamond_es): pd_es = diamond_es amount = ft.IdentityFeature(pd_es["transactions"].ww["amount"]) path_through_customers = backward_path( pd_es, ["regions", "customers", "transactions"] ) agg = ft.AggregationFeature( amount, "regions", primitive=ft.primitives.Mean, relationship_path=path_through_customers, ) trans_of_agg = ft.TransformFeature(agg, ft.primitives.CumSum) path_through_stores = backward_path(pd_es, ["regions", "stores", "transactions"]) trans = ft.TransformFeature(amount, ft.primitives.CumSum) agg_of_trans = ft.AggregationFeature( trans, "regions", primitive=ft.primitives.Mean, relationship_path=path_through_stores, ) features = [agg, trans_of_agg, agg_of_trans] feature_set = FeatureSet(features) trie = feature_set.feature_trie assert trie.value == ( True, {agg.unique_name(), trans_of_agg.unique_name()}, {agg_of_trans.unique_name()}, ) assert trie.get_node(path_through_customers).value == ( True, {amount.unique_name()}, set(), ) assert trie.get_node(path_through_customers[:1]).value == (True, set(), set()) assert trie.get_node(path_through_stores).value == ( True, {amount.unique_name(), trans.unique_name()}, set(), ) assert trie.get_node(path_through_stores[:1]).value == (False, set(), set())
def test_name_with_multiple_possible_paths(diamond_es): path = backward_path(diamond_es, ['regions', 'customers', 'transactions']) feat = ft.AggregationFeature(diamond_es['transactions']['amount'], diamond_es['regions'], ft.primitives.Mean, relationship_path=path) assert feat.get_name() == "MEAN(customers.transactions.amount)" assert feat.relationship_path_name() == 'customers.transactions'
def test_init_with_single_possible_path(diamond_es): # This uses diamond_es to test that there being a cycle somewhere in the # graph doesn't cause an error. feat = ft.AggregationFeature( ft.IdentityFeature(diamond_es["transactions"].ww["amount"]), "customers", ft.primitives.Mean, ) expected_path = backward_path(diamond_es, ["customers", "transactions"]) assert feat.relationship_path == expected_path
def test_diamond_entityset(diamond_es): es = diamond_es amount = ft.IdentityFeature(es['transactions']['amount']) path = backward_path(es, ['regions', 'customers', 'transactions']) through_customers = ft.AggregationFeature(amount, es['regions'], primitive=ft.primitives.Sum, relationship_path=path) path = backward_path(es, ['regions', 'stores', 'transactions']) through_stores = ft.AggregationFeature(amount, es['regions'], primitive=ft.primitives.Sum, relationship_path=path) feature_set = FeatureSet([through_customers, through_stores]) calculator = FeatureSetCalculator(es, time_last=datetime(2011, 4, 8), feature_set=feature_set) df = calculator.run(np.array([0, 1, 2])) assert (df['SUM(stores.transactions.amount)'] == [94, 261, 128]).all() assert (df['SUM(customers.transactions.amount)'] == [72, 411, 0]).all()
def test_name_with_multiple_possible_paths(diamond_es): path = backward_path(diamond_es, ["regions", "customers", "transactions"]) feat = ft.AggregationFeature( ft.IdentityFeature(diamond_es["transactions"].ww["amount"]), "regions", ft.primitives.Mean, relationship_path=path, ) assert feat.get_name() == "MEAN(customers.transactions.amount)" assert feat.relationship_path_name() == "customers.transactions"
def test_init_with_multiple_possible_paths(diamond_es): error_text = "There are multiple possible paths to the base entity. " \ "You must specify a relationship path." with pytest.raises(RuntimeError, match=error_text): ft.AggregationFeature(diamond_es['transactions']['amount'], diamond_es['regions'], ft.primitives.Mean) # Does not raise if path specified. path = backward_path(diamond_es, ['regions', 'customers', 'transactions']) ft.AggregationFeature(diamond_es['transactions']['amount'], diamond_es['regions'], ft.primitives.Mean, relationship_path=path)
def test_diamond_entityset(diamond_es): es = diamond_es amount = ft.IdentityFeature(es["transactions"].ww["amount"]) path = backward_path(es, ["regions", "customers", "transactions"]) through_customers = ft.AggregationFeature(amount, "regions", primitive=ft.primitives.Sum, relationship_path=path) path = backward_path(es, ["regions", "stores", "transactions"]) through_stores = ft.AggregationFeature(amount, "regions", primitive=ft.primitives.Sum, relationship_path=path) feature_set = FeatureSet([through_customers, through_stores]) calculator = FeatureSetCalculator(es, time_last=datetime(2011, 4, 8), feature_set=feature_set) df = calculator.run(np.array([0, 1, 2])) df = to_pandas(df, index="id", sort_index=True) assert (df["SUM(stores.transactions.amount)"] == [94, 261, 128]).all() assert (df["SUM(customers.transactions.amount)"] == [72, 411, 0]).all()
def test_init_with_multiple_possible_paths(diamond_es): error_text = ( "There are multiple possible paths to the base dataframe. " "You must specify a relationship path." ) with pytest.raises(RuntimeError, match=error_text): ft.AggregationFeature( ft.IdentityFeature(diamond_es["transactions"].ww["amount"]), "regions", ft.primitives.Mean, ) # Does not raise if path specified. path = backward_path(diamond_es, ["regions", "customers", "transactions"]) ft.AggregationFeature( ft.IdentityFeature(diamond_es["transactions"].ww["amount"]), "regions", ft.primitives.Mean, relationship_path=path, )
def test_get_backward_entities_deep(es): entities = es.get_backward_entities('customers', deep=True) path_to_log = backward_path(es, ['customers', 'sessions', 'log']) path_to_sessions = backward_path(es, ['customers', 'sessions']) assert list(entities) == [('sessions', path_to_sessions), ('log', path_to_log)]
def test_get_backward_dataframes_deep(es): dataframes = es.get_backward_dataframes("customers", deep=True) path_to_log = backward_path(es, ["customers", "sessions", "log"]) path_to_sessions = backward_path(es, ["customers", "sessions"]) assert list(dataframes) == [("sessions", path_to_sessions), ("log", path_to_log)]
def test_get_backward_dataframes(es): dataframes = es.get_backward_dataframes("customers") path_to_sessions = backward_path(es, ["customers", "sessions"]) assert list(dataframes) == [("sessions", path_to_sessions)]
def test_get_backward_entities(es): entities = es.get_backward_entities('customers') path_to_sessions = backward_path(es, ['customers', 'sessions']) assert list(entities) == [('sessions', path_to_sessions)]
def test_feature_trie_without_needs_full_dataframe(diamond_es): es = diamond_es country_name = ft.IdentityFeature(es["countries"].ww["name"]) direct_name = ft.DirectFeature(country_name, "regions") amount = ft.IdentityFeature(es["transactions"].ww["amount"]) path_through_customers = backward_path(es, ["regions", "customers", "transactions"]) through_customers = ft.AggregationFeature( amount, "regions", primitive=ft.primitives.Mean, relationship_path=path_through_customers, ) path_through_stores = backward_path(es, ["regions", "stores", "transactions"]) through_stores = ft.AggregationFeature( amount, "regions", primitive=ft.primitives.Mean, relationship_path=path_through_stores, ) customers_to_transactions = backward_path(es, ["customers", "transactions"]) customers_mean = ft.AggregationFeature( amount, "customers", primitive=ft.primitives.Mean, relationship_path=customers_to_transactions, ) negation = ft.TransformFeature(customers_mean, ft.primitives.Negate) regions_to_customers = backward_path(es, ["regions", "customers"]) mean_of_mean = ft.AggregationFeature( negation, "regions", primitive=ft.primitives.Mean, relationship_path=regions_to_customers, ) features = [direct_name, through_customers, through_stores, mean_of_mean] feature_set = FeatureSet(features) trie = feature_set.feature_trie assert trie.value == (False, set(), {f.unique_name() for f in features}) assert trie.get_node(direct_name.relationship_path).value == ( False, set(), {country_name.unique_name()}, ) assert trie.get_node(regions_to_customers).value == ( False, set(), {negation.unique_name(), customers_mean.unique_name()}, ) regions_to_stores = backward_path(es, ["regions", "stores"]) assert trie.get_node(regions_to_stores).value == (False, set(), set()) assert trie.get_node(path_through_customers).value == ( False, set(), {amount.unique_name()}, ) assert trie.get_node(path_through_stores).value == ( False, set(), {amount.unique_name()}, )