def test_make_deep_agg_feat_of_dfeat_of_agg_feat(entityset, backend): """ The graph looks like this (higher implies parent): C C = Customers, the entity we're trying to predict on | S = Sessions, a child of Customers P S L = Log, a child of both Sessions and Log \ / P = Products, a parent of Log which is not a descendent of customers L We're trying to calculate a DFeat from L to P on an agg_feat of P on L, and then aggregate it with another agg_feat of C on L. """ log_count_feat = Count(entityset['log']['id'], parent_entity=entityset['products']) product_purchases_feat = DirectFeature(log_count_feat, child_entity=entityset['log']) purchase_popularity = Mean(product_purchases_feat, parent_entity=entityset['customers']) pandas_backend = backend([purchase_popularity]) df = pandas_backend.calculate_all_features(instance_ids=[0], time_last=None) v = df[purchase_popularity.get_name()][0] assert (v == 38.0 / 10.0)
def test_mean_nan(es): array = pd.Series([5, 5, 5, 5, 5]) mean_func_nans_default = Mean().get_function() mean_func_nans_false = Mean(skipna=False).get_function() mean_func_nans_true = Mean(skipna=True).get_function() assert mean_func_nans_default(array) == 5 assert mean_func_nans_false(array) == 5 assert mean_func_nans_true(array) == 5 array = pd.Series([5, np.nan, np.nan, np.nan, np.nan, 10]) assert mean_func_nans_default(array) == 7.5 assert isnan(mean_func_nans_false(array)) assert mean_func_nans_true(array) == 7.5 array_nans = pd.Series([np.nan, np.nan, np.nan, np.nan]) assert isnan(mean_func_nans_default(array_nans)) assert isnan(mean_func_nans_false(array_nans)) assert isnan(mean_func_nans_true(array_nans)) # test naming default_feat = ft.Feature(es["log"]["value"], parent_entity=es["customers"], primitive=Mean) assert default_feat.get_name() == "MEAN(log.value)" ignore_nan_feat = ft.Feature(es["log"]["value"], parent_entity=es["customers"], primitive=Mean(skipna=True)) assert ignore_nan_feat.get_name() == "MEAN(log.value)" include_nan_feat = ft.Feature(es["log"]["value"], parent_entity=es["customers"], primitive=Mean(skipna=False)) assert include_nan_feat.get_name() == "MEAN(log.value, skipna=False)"
def test_check_input_types(es): count = Count(es["sessions"]["id"], es["customers"]) mean = Mean(count, es[u"régions"]) assert mean._check_input_types() boolean = count > 3 mean = Mean(count, es[u"régions"], where=boolean) assert mean._check_input_types()
def test_mean_nan(): array = np.array([5, 5, 5, 5, 5]) mean_func_nans_default = Mean().get_function() mean_func_nans_false = Mean(ignore_nans=False).get_function() mean_func_nans_true = Mean(ignore_nans=True).get_function() assert mean_func_nans_default(array) == 5 assert mean_func_nans_false(array) == 5 assert mean_func_nans_true(array) == 5 array = np.array([5, np.nan, np.nan, np.nan, np.nan, 10]) assert isnan(mean_func_nans_default(array)) assert isnan(mean_func_nans_false(array)) assert mean_func_nans_true(array) == 7.5 array_nans = np.array([np.nan, np.nan, np.nan, np.nan]) assert isnan(mean_func_nans_default(array)) assert isnan(mean_func_nans_false(array_nans)) assert isnan(mean_func_nans_true(array_nans))
def test_deep_agg_feat_chain(entityset, backend): """ Agg feat of agg feat: region.Mean(customer.Count(Log)) """ customer_count_feat = Count(entityset['log']['id'], parent_entity=entityset['customers']) region_avg_feat = Mean(customer_count_feat, parent_entity=entityset[u'régions']) pandas_backend = backend([region_avg_feat]) df = pandas_backend.calculate_all_features(instance_ids=['United States'], time_last=None) v = df[region_avg_feat.get_name()][0] assert (v == 17 / 3.)
def test_make_compare_feat(entityset, backend): """ Feature we're creating is: Number of sessions for each customer where the number of logs in the session is less than 3 """ Count.max_stack_depth = 2 log_count_feat = Count(entityset['log']['id'], parent_entity=entityset['sessions']) mean_agg_feat = Mean(log_count_feat, parent_entity=entityset['customers']) mean_feat = DirectFeature(mean_agg_feat, child_entity=entityset['sessions']) feat = log_count_feat > mean_feat pandas_backend = backend([feat]) df = pandas_backend.calculate_all_features(instance_ids=[0, 1, 2], time_last=None) name = feat.get_name() instances = df[name] v0, v1, v2 = instances[0:3] assert v0 assert v1 assert not v2
def test_deep_agg_feat_chain(entityset, backend): """ Agg feat of agg feat: region.Mean(customer.Count(Log)) """ customer_count_feat = Count(entityset['log']['id'], parent_entity=entityset['customers']) region_avg_feat = Mean(customer_count_feat, parent_entity=entityset['regions']) pandas_backend = backend([region_avg_feat]) df = pandas_backend.calculate_all_features(instance_ids=['United States'], time_last=None) v = df[region_avg_feat.get_name()][0] assert (v == 17 / 3.)