def test_trend(es): trend = ft.Feature([es['log']['value'], es['log']['datetime']], parent_entity=es['customers'], primitive=Trend) feature_set = FeatureSet([trend]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run([0, 1, 2]) true_results = [-0.812730, 4.870378, np.nan] np.testing.assert_almost_equal(df[trend.get_name()].values.tolist(), true_results, decimal=5)
def test_make_agg_feat_of_agg_feat(es): log_count_feat = ft.Feature(es['log']['id'], parent_entity=es['sessions'], primitive=Count) customer_sum_feat = ft.Feature(log_count_feat, parent_entity=es['customers'], primitive=Sum) feature_set = FeatureSet([customer_sum_feat]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run([0]) v = df[customer_sum_feat.get_name()][0] assert (v == 10)
def test_make_agg_feat_where_count(es): agg_feat = ft.Feature(es['log']['id'], parent_entity=es['sessions'], where=IdentityFeature( es['log']['product_id']) == 'coke zero', primitive=Count) feature_set = FeatureSet([agg_feat]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run(np.array([0])) if isinstance(df, dd.DataFrame): df = df.compute() v = df[agg_feat.get_name()][0] assert (v == 3)
def test_make_3_stacked_agg_feats(df): """ Tests stacking 3 agg features. The test specifically uses non numeric indices to test how ancestor variables are handled as dataframes are merged together """ if isinstance(df, dd.DataFrame): pytest.xfail('normalize_entity fails with dask DataFrame') es = ft.EntitySet() vtypes = { 'id': variable_types.Index, 'e1': variable_types.Categorical, 'e2': variable_types.Categorical, 'e3': variable_types.Categorical, 'val': variable_types.Numeric } es.entity_from_dataframe(dataframe=df, index="id", entity_id="e0", variable_types=vtypes) es.normalize_entity(base_entity_id="e0", new_entity_id="e1", index="e1", additional_variables=["e2", "e3"]) es.normalize_entity(base_entity_id="e1", new_entity_id="e2", index="e2", additional_variables=["e3"]) es.normalize_entity(base_entity_id="e2", new_entity_id="e3", index="e3") sum_1 = ft.Feature(es["e0"]["val"], parent_entity=es["e1"], primitive=Sum) sum_2 = ft.Feature(sum_1, parent_entity=es["e2"], primitive=Sum) sum_3 = ft.Feature(sum_2, parent_entity=es["e3"], primitive=Sum) feature_set = FeatureSet([sum_3]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run(np.array(["z"])) v = df[sum_3.get_name()][0] assert (v == 5)
def test_precalculated_features(pd_es): error_msg = 'This primitive should never be used because the features are precalculated' class ErrorPrim(AggregationPrimitive): """A primitive whose function raises an error.""" name = "error_prim" input_types = [Numeric] return_type = Numeric def get_function(self): def error(s): raise RuntimeError(error_msg) return error value = ft.Feature(pd_es['log']['value']) agg = ft.Feature(value, parent_entity=pd_es['sessions'], primitive=ErrorPrim) agg2 = ft.Feature(agg, parent_entity=pd_es['customers'], primitive=ErrorPrim) direct = ft.Feature(agg2, entity=pd_es['sessions']) # Set up a FeatureSet which knows which features are precalculated. precalculated_feature_trie = Trie(default=set, path_constructor=RelationshipPath) precalculated_feature_trie.get_node(direct.relationship_path).value.add(agg2.unique_name()) feature_set = FeatureSet([direct], approximate_feature_trie=precalculated_feature_trie) # Fake precalculated data. values = [0, 1, 2] parent_fm = pd.DataFrame({agg2.get_name(): values}) precalculated_fm_trie = Trie(path_constructor=RelationshipPath) precalculated_fm_trie.get_node(direct.relationship_path).value = parent_fm calculator = FeatureSetCalculator(pd_es, feature_set=feature_set, precalculated_features=precalculated_fm_trie) instance_ids = [0, 2, 3, 5] fm = calculator.run(np.array(instance_ids)) assert list(fm[direct.get_name()]) == [values[0], values[0], values[1], values[2]] # Calculating without precalculated features should error. with pytest.raises(RuntimeError, match=error_msg): FeatureSetCalculator(pd_es, feature_set=FeatureSet([direct])).run(instance_ids)
def test_make_3_stacked_agg_feats(): """ Tests stacking 3 agg features. The test specifically uses non numeric indices to test how ancestor variables are handled as dataframes are merged together """ df = pd.DataFrame({ "id": ["a", "b", "c", "d", "e"], "e1": ["h", "h", "i", "i", "j"], "e2": ["x", "x", "y", "y", "x"], "e3": ["z", "z", "z", "z", "z"], "val": [1, 1, 1, 1, 1] }) es = ft.EntitySet() es.entity_from_dataframe(dataframe=df, index="id", entity_id="e0") es.normalize_entity(base_entity_id="e0", new_entity_id="e1", index="e1", additional_variables=["e2", "e3"]) es.normalize_entity(base_entity_id="e1", new_entity_id="e2", index="e2", additional_variables=["e3"]) es.normalize_entity(base_entity_id="e2", new_entity_id="e3", index="e3") sum_1 = ft.Feature(es["e0"]["val"], parent_entity=es["e1"], primitive=Sum) sum_2 = ft.Feature(sum_1, parent_entity=es["e2"], primitive=Sum) sum_3 = ft.Feature(sum_2, parent_entity=es["e3"], primitive=Sum) feature_set = FeatureSet([sum_3]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run(np.array(["z"])) v = df[sum_3.get_name()][0] assert (v == 5)
def test_direct_from_column(es): # should be same behavior as test_direct_from_identity device = Feature(es["sessions"].ww["device_type"]) d = DirectFeature(base_feature=device, child_dataframe_name="log") feature_set = FeatureSet([d]) calculator = FeatureSetCalculator(es, feature_set=feature_set, time_last=None) df = calculator.run(np.array([0, 5])) df = to_pandas(df, index="id", sort_index=True) v = df[d.get_name()].tolist() if es.dataframe_type == Library.SPARK.value: expected = ["0", "1"] else: expected = [0, 1] assert v == expected
def test_make_agg_feat_of_agg_feat(es): log_count_feat = ft.Feature(es['log'].ww['id'], parent_dataframe_name='sessions', primitive=Count) customer_sum_feat = ft.Feature(log_count_feat, parent_dataframe_name='customers', primitive=Sum) feature_set = FeatureSet([customer_sum_feat]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run(np.array([0])) df = to_pandas(df, index='id') v = df[customer_sum_feat.get_name()].values[0] assert (v == 10)
def test_direct_from_column(es): # should be same behavior as test_direct_from_identity device = Feature(es['sessions'].ww['device_type']) d = DirectFeature(base_feature=device, child_dataframe_name='log') feature_set = FeatureSet([d]) calculator = FeatureSetCalculator(es, feature_set=feature_set, time_last=None) df = calculator.run(np.array([0, 5])) df = to_pandas(df, index='id', sort_index=True) v = df[d.get_name()].tolist() if es.dataframe_type == Library.KOALAS.value: expected = ['0', '1'] else: expected = [0, 1] assert v == expected
def test_calls_progress_callback(es): # call with all feature types. make sure progress callback calls sum to 1 identity = ft.Feature(es['customers'].ww['age']) direct = ft.Feature(es['cohorts'].ww['cohort_name'], 'customers') agg = ft.Feature(es['sessions'].ww['id'], parent_dataframe_name='customers', primitive=Count) agg_apply = ft.Feature( es['log'].ww['datetime'], parent_dataframe_name='customers', primitive=TimeSinceLast ) # this feature is handle differently than simple features trans = ft.Feature(agg, primitive=Negate) trans_full = ft.Feature(agg, primitive=CumSum) groupby_trans = ft.Feature(agg, primitive=CumSum, groupby=ft.Feature( es['customers'].ww['cohort'])) if es.dataframe_type != Library.PANDAS.value: all_features = [identity, direct, agg, trans] else: all_features = [ identity, direct, agg, agg_apply, trans, trans_full, groupby_trans ] feature_set = FeatureSet(all_features) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) class MockProgressCallback: def __init__(self): self.total = 0 def __call__(self, update): self.total += update mock_progress_callback = MockProgressCallback() instance_ids = [0, 1, 2] calculator.run(np.array(instance_ids), mock_progress_callback) assert np.isclose(mock_progress_callback.total, 1) # testing again with a time_last with no data feature_set = FeatureSet(all_features) calculator = FeatureSetCalculator(es, time_last=pd.Timestamp("1950"), feature_set=feature_set) mock_progress_callback = MockProgressCallback() calculator.run(np.array(instance_ids), mock_progress_callback) assert np.isclose(mock_progress_callback.total, 1)
def calc_results(time_last, ids, precalculated_features=None, training_window=None, include_cutoff_time=True): update_progress_callback = None if progress_bar is not None: def update_progress_callback(done): previous_progress = progress_bar.n progress_bar.update(done * group.shape[0]) if progress_callback is not None: update, progress_percent, time_elapsed = update_progress_callback_parameters(progress_bar, previous_progress) progress_callback(update, progress_percent, time_elapsed) calculator = FeatureSetCalculator(entityset, feature_set, time_last, training_window=training_window, precalculated_features=precalculated_features) matrix = calculator.run(ids, progress_callback=update_progress_callback, include_cutoff_time=include_cutoff_time) return matrix
def test_deep_agg_feat_chain(es): """ Agg feat of agg feat: region.Mean(customer.Count(Log)) """ customer_count_feat = ft.Feature(es['log']['id'], parent_entity=es['customers'], primitive=Count) region_avg_feat = ft.Feature(customer_count_feat, parent_entity=es[u'régions'], primitive=Mean) feature_set = FeatureSet([region_avg_feat]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run(np.array(['United States'])) if isinstance(df, dd.DataFrame): df = df.compute().set_index('id') v = df[region_avg_feat.get_name()][0] assert (v == 17 / 3.)
def test_calls_progress_callback(es): # call with all feature types. make sure progress callback calls sum to 1 identity = ft.Feature(es['customers']['age']) direct = ft.Feature(es['cohorts']['cohort_name'], es['customers']) agg = ft.Feature(es["sessions"]["id"], parent_entity=es['customers'], primitive=Count) agg_apply = ft.Feature( es["log"]["datetime"], parent_entity=es['customers'], primitive=TimeSinceLast ) # this feature is handle differently than simple features trans = ft.Feature(agg, primitive=Negate) trans_full = ft.Feature(agg, primitive=CumSum) groupby_trans = ft.Feature(agg, primitive=CumSum, groupby=es["customers"]["cohort"]) if not all(isinstance(entity.df, pd.DataFrame) for entity in es.entities): all_features = [identity, direct, agg, trans] else: all_features = [ identity, direct, agg, agg_apply, trans, trans_full, groupby_trans ] feature_set = FeatureSet(all_features) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) class MockProgressCallback: def __init__(self): self.total = 0 def __call__(self, update): self.total += update mock_progress_callback = MockProgressCallback() instance_ids = [0, 1, 2] calculator.run(np.array(instance_ids), mock_progress_callback) assert np.isclose(mock_progress_callback.total, 1) # testing again with a time_last with no data feature_set = FeatureSet(all_features) calculator = FeatureSetCalculator(es, time_last=pd.Timestamp("1950"), feature_set=feature_set) mock_progress_callback = MockProgressCallback() calculator.run(np.array(instance_ids), mock_progress_callback) assert np.isclose(mock_progress_callback.total, 1)
def test_two_kinds_of_dependents(es): v = ft.Feature(es['log']['value']) product = ft.Feature(es['log']['product_id']) agg = ft.Feature(v, parent_entity=es['customers'], where=product == 'coke zero', primitive=Sum) p = ft.Feature(agg, primitive=Percentile) g = ft.Feature(agg, primitive=Absolute) agg2 = ft.Feature(v, parent_entity=es['sessions'], where=product == 'coke zero', primitive=Sum) agg3 = ft.Feature(agg2, parent_entity=es['customers'], primitive=Sum) feature_set = FeatureSet([p, g, agg3]) calculator = FeatureSetCalculator(es, feature_set) df = calculator.run(np.array([0, 1])) assert df[p.get_name()].tolist() == [2. / 3, 1.0] assert df[g.get_name()].tolist() == [15, 26]
def test_trend(pd_es): trend = ft.Feature([ ft.Feature(pd_es['log'].ww['value']), ft.Feature(pd_es['log'].ww['datetime']) ], parent_dataframe_name='customers', primitive=Trend) feature_set = FeatureSet([trend]) calculator = FeatureSetCalculator(pd_es, time_last=None, feature_set=feature_set) df = calculator.run(np.array([0, 1, 2])) true_results = [-0.812730, 4.870378, np.nan] np.testing.assert_almost_equal(df[trend.get_name()].tolist(), true_results, decimal=5)
def test_make_agg_feat_of_agg_feat(es): log_count_feat = ft.Feature(es['log']['id'], parent_entity=es['sessions'], primitive=Count) customer_sum_feat = ft.Feature(log_count_feat, parent_entity=es['customers'], primitive=Sum) feature_set = FeatureSet([customer_sum_feat]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run(np.array([0])) if isinstance(df, dd.DataFrame): df = df.compute().set_index('id') df.index = pd.Int64Index(df.index) v = df[customer_sum_feat.get_name()][0] assert (v == 10)
def test_diamond_entityset(diamond_es): es = diamond_es amount = ft.IdentityFeature(es['transactions']['amount']) path = backward_path(es, ['regions', 'customers', 'transactions']) through_customers = ft.AggregationFeature(amount, es['regions'], primitive=ft.primitives.Sum, relationship_path=path) path = backward_path(es, ['regions', 'stores', 'transactions']) through_stores = ft.AggregationFeature(amount, es['regions'], primitive=ft.primitives.Sum, relationship_path=path) feature_set = FeatureSet([through_customers, through_stores]) calculator = FeatureSetCalculator(es, time_last=datetime(2011, 4, 8), feature_set=feature_set) df = calculator.run(np.array([0, 1, 2])) assert (df['SUM(stores.transactions.amount)'] == [94, 261, 128]).all() assert (df['SUM(customers.transactions.amount)'] == [72, 411, 0]).all()
def test_arithmetic_of_transform(es): diff1 = ft.Feature([es['log']['value']], primitive=Diff) diff2 = ft.Feature([es['log']['value_2']], primitive=Diff) to_test = [(AddNumeric, [np.nan, 7., -7., 10.]), (SubtractNumeric, [np.nan, 3., -3., 4.]), (MultiplyNumeric, [np.nan, 10., 10., 21.]), (DivideNumeric, [np.nan, 2.5, 2.5, 2.3333333333333335])] features = [] for test in to_test: features.append(ft.Feature([diff1, diff2], primitive=test[0]())) feature_set = FeatureSet(features) calculator = FeatureSetCalculator(es, feature_set=feature_set) df = calculator.run(np.array([0, 2, 12, 13])) for i, test in enumerate(to_test): v = df[features[i].get_name()].values.tolist() assert np.isnan(v.pop(0)) assert np.isnan(test[1].pop(0)) assert v == test[1]
def calc_results(time_last, ids, precalculated_features=None, training_window=None): progress_callback = None if progress_bar is not None: def progress_callback(done): progress_bar.update(done * group.shape[0]) calculator = FeatureSetCalculator( entityset, feature_set, time_last, training_window=training_window, precalculated_features=precalculated_features) matrix = calculator.run(ids, progress_callback=progress_callback) return matrix
def test_make_agg_feat_using_prev_time(es): agg_feat = ft.Feature(es['log']['id'], parent_entity=es['sessions'], use_previous=Timedelta(10, 's'), primitive=Count) feature_set = FeatureSet([agg_feat]) calculator = FeatureSetCalculator(es, time_last=datetime(2011, 4, 9, 10, 30, 10), feature_set=feature_set) df = calculator.run(np.array([0])) if isinstance(df, dd.DataFrame): df = df.compute() v = df[agg_feat.get_name()][0] assert (v == 2) calculator = FeatureSetCalculator(es, time_last=datetime(2011, 4, 9, 10, 30, 30), feature_set=feature_set) df = calculator.run(np.array([0])) if isinstance(df, dd.DataFrame): df = df.compute() v = df[agg_feat.get_name()][0] assert (v == 1)
def test_make_agg_feat_using_prev_time(es): agg_feat = ft.Feature( es["log"].ww["id"], parent_dataframe_name="sessions", use_previous=Timedelta(10, "s"), primitive=Count, ) feature_set = FeatureSet([agg_feat]) calculator = FeatureSetCalculator(es, time_last=datetime( 2011, 4, 9, 10, 30, 10), feature_set=feature_set) df = to_pandas(calculator.run(np.array([0]))) v = df[agg_feat.get_name()][0] assert v == 2 calculator = FeatureSetCalculator(es, time_last=datetime( 2011, 4, 9, 10, 30, 30), feature_set=feature_set) df = to_pandas(calculator.run(np.array([0]))) v = df[agg_feat.get_name()][0] assert v == 1
def test_deep_agg_feat_chain(es): """ Agg feat of agg feat: region.Mean(customer.Count(Log)) """ customer_count_feat = ft.Feature(es['log'].ww['id'], parent_dataframe_name='customers', primitive=Count) region_avg_feat = ft.Feature(customer_count_feat, parent_dataframe_name=u'régions', primitive=Mean) feature_set = FeatureSet([region_avg_feat]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run(np.array(['United States'])) df = to_pandas(df, index='id') v = df[region_avg_feat.get_name()][0] assert (v == 17 / 3.)
def test_make_agg_feat_multiple_dtypes(es): compare_prod = IdentityFeature(es['log']['product_id']) == 'coke zero' agg_feat = ft.Feature(es['log']['id'], parent_entity=es['sessions'], where=compare_prod, primitive=Count) agg_feat2 = ft.Feature(es['log']['product_id'], parent_entity=es['sessions'], where=compare_prod, primitive=Mode) feature_set = FeatureSet([agg_feat, agg_feat2]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run(np.array([0])) v = df[agg_feat.get_name()][0] v2 = df[agg_feat2.get_name()][0] assert (v == 3) assert (v2 == 'coke zero')
def test_make_dfeat_of_agg_feat_on_self(es): """ The graph looks like this: R R = Regions, a parent of customers | C C = Customers, the entity we're trying to predict on | etc. We're trying to calculate a DFeat from C to R on an agg_feat of R on C. """ customer_count_feat = ft.Feature(es['customers']['id'], parent_entity=es[u'régions'], primitive=Count) num_customers_feat = DirectFeature(customer_count_feat, child_entity=es['customers']) feature_set = FeatureSet([num_customers_feat]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run(np.array([0])) v = df[num_customers_feat.get_name()][0] assert (v == 3)
def test_make_agg_feat_where_count_feat(es): """ Feature we're creating is: Number of sessions for each customer where the number of logs in the session is less than 3 """ log_count_feat = ft.Feature(es['log']['id'], parent_entity=es['sessions'], primitive=Count) feat = ft.Feature(es['sessions']['id'], parent_entity=es['customers'], where=log_count_feat > 1, primitive=Count) feature_set = FeatureSet([feat]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run(np.array([0, 1])) name = feat.get_name() instances = df[name] v0, v1 = instances[0:2] assert (v0 == 2) assert (v1 == 2)
def test_topn(es): topn = ft.Feature(es['log']['product_id'], parent_entity=es['customers'], primitive=NMostCommon(n=2)) feature_set = FeatureSet([topn]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run([0, 1, 2]) true_results = pd.DataFrame( [['toothpaste', 'coke zero'], ['coke zero', 'Haribo sugar-free gummy bears'], ['taco clock', np.nan]]) assert ([name in df.columns for name in topn.get_feature_names()]) for i in range(df.shape[0]): if i == 0: # coke zero and toothpase have same number of occurrences assert set(true_results.loc[i].values) == set(df.loc[i].values) else: for i1, i2 in zip(true_results.loc[i], df.iloc[i]): assert (pd.isnull(i1) and pd.isnull(i2)) or (i1 == i2)
def test_arithmetic_of_transform(es): if not all(isinstance(e.df, pd.DataFrame) for e in es.entities): pytest.xfail("Test uses Diff which is not supported in Dask or Koalas") diff1 = ft.Feature([es['log']['value']], primitive=Diff) diff2 = ft.Feature([es['log']['value_2']], primitive=Diff) to_test = [(AddNumeric, [np.nan, 7., -7., 10.]), (SubtractNumeric, [np.nan, 3., -3., 4.]), (MultiplyNumeric, [np.nan, 10., 10., 21.]), (DivideNumeric, [np.nan, 2.5, 2.5, 2.3333333333333335])] features = [] for test in to_test: features.append(ft.Feature([diff1, diff2], primitive=test[0]())) feature_set = FeatureSet(features) calculator = FeatureSetCalculator(es, feature_set=feature_set) df = calculator.run(np.array([0, 2, 12, 13])) for i, test in enumerate(to_test): v = df[features[i].get_name()].values.tolist() assert np.isnan(v.pop(0)) assert np.isnan(test[1].pop(0)) assert v == test[1]
def test_diamond_entityset(diamond_es): es = diamond_es amount = ft.IdentityFeature(es["transactions"].ww["amount"]) path = backward_path(es, ["regions", "customers", "transactions"]) through_customers = ft.AggregationFeature(amount, "regions", primitive=ft.primitives.Sum, relationship_path=path) path = backward_path(es, ["regions", "stores", "transactions"]) through_stores = ft.AggregationFeature(amount, "regions", primitive=ft.primitives.Sum, relationship_path=path) feature_set = FeatureSet([through_customers, through_stores]) calculator = FeatureSetCalculator(es, time_last=datetime(2011, 4, 8), feature_set=feature_set) df = calculator.run(np.array([0, 1, 2])) df = to_pandas(df, index="id", sort_index=True) assert (df["SUM(stores.transactions.amount)"] == [94, 261, 128]).all() assert (df["SUM(customers.transactions.amount)"] == [72, 411, 0]).all()
def test_make_agg_feat_where_count_or_device_type_feat(es): """ Feature we're creating is: Number of sessions for each customer where the number of logs in the session is less than 3 """ log_count_feat = ft.Feature(es['log']['id'], parent_entity=es['sessions'], primitive=Count) compare_count = log_count_feat > 1 compare_device_type = IdentityFeature(es['sessions']['device_type']) == 1 or_feat = compare_count.OR(compare_device_type) feat = ft.Feature(es['sessions']['id'], parent_entity=es['customers'], where=or_feat, primitive=Count) feature_set = FeatureSet([feat]) calculator = FeatureSetCalculator(es, time_last=None, feature_set=feature_set) df = calculator.run(np.array([0])) name = feat.get_name() instances = df[name] assert (instances[0] == 3)
def test_make_agg_feat_using_prev_n_events(es): if es.dataframe_type != Library.PANDAS.value: pytest.xfail("Distrubuted entitysets do not support use_previous") agg_feat_1 = ft.Feature( es["log"].ww["value"], parent_dataframe_name="sessions", use_previous=Timedelta(1, "observations"), primitive=Min, ) agg_feat_2 = ft.Feature( es["log"].ww["value"], parent_dataframe_name="sessions", use_previous=Timedelta(3, "observations"), primitive=Min, ) assert (agg_feat_1.get_name() != agg_feat_2.get_name() ), "Features should have different names based on use_previous" feature_set = FeatureSet([agg_feat_1, agg_feat_2]) calculator = FeatureSetCalculator(es, time_last=datetime( 2011, 4, 9, 10, 30, 6), feature_set=feature_set) df = calculator.run(np.array([0])) # time_last is included by default v1 = df[agg_feat_1.get_name()][0] v2 = df[agg_feat_2.get_name()][0] assert v1 == 5 assert v2 == 0 calculator = FeatureSetCalculator(es, time_last=datetime( 2011, 4, 9, 10, 30, 30), feature_set=feature_set) df = calculator.run(np.array([0])) v1 = df[agg_feat_1.get_name()][0] v2 = df[agg_feat_2.get_name()][0] assert v1 == 20 assert v2 == 10