def test_make_agg_feat_using_prev_n_events(entityset, backend): agg_feat_1 = Min(entityset['log']['value'], parent_entity=entityset['sessions'], use_previous=Timedelta(1, 'observations', entity=entityset['log'])) agg_feat_2 = Min(entityset['log']['value'], parent_entity=entityset['sessions'], use_previous=Timedelta(3, 'observations', entity=entityset['log'])) assert agg_feat_1.get_name() != agg_feat_2.get_name(), \ 'Features should have different names based on use_previous' pandas_backend = backend([agg_feat_1, agg_feat_2]) df = pandas_backend.calculate_all_features(instance_ids=[0], time_last=datetime( 2011, 4, 9, 10, 30, 6)) # time_last is included by default v1 = df[agg_feat_1.get_name()][0] v2 = df[agg_feat_2.get_name()][0] assert v1 == 5 assert v2 == 0 df = pandas_backend.calculate_all_features(instance_ids=[0], time_last=datetime( 2011, 4, 9, 10, 30, 30)) v1 = df[agg_feat_1.get_name()][0] v2 = df[agg_feat_2.get_name()][0] assert v1 == 20 assert v2 == 10
def test_make_agg_feat_using_prev_n_events(entityset, backend): agg_feat_1 = Min(entityset['log']['value'], parent_entity=entityset['sessions'], use_previous=Timedelta(1, 'observations', entity=entityset['log'])) agg_feat_2 = Min(entityset['log']['value'], parent_entity=entityset['sessions'], use_previous=Timedelta(3, 'observations', entity=entityset['log'])) assert agg_feat_1.get_name() != agg_feat_2.get_name(), \ 'Features should have different names based on use_previous' pandas_backend = backend([agg_feat_1, agg_feat_2]) df = pandas_backend.calculate_all_features(instance_ids=[0], time_last=datetime(2011, 4, 9, 10, 30, 6)) # time_last is included by default v1 = df[agg_feat_1.get_name()][0] v2 = df[agg_feat_2.get_name()][0] assert v1 == 5 assert v2 == 0 df = pandas_backend.calculate_all_features(instance_ids=[0], time_last=datetime(2011, 4, 9, 10, 30, 30)) v1 = df[agg_feat_1.get_name()][0] v2 = df[agg_feat_2.get_name()][0] assert v1 == 20 assert v2 == 10
def test_uses_full_entity_feat_of_approximate(entityset): es = entityset agg_feat = Sum(es['log']['value'], es['sessions']) agg_feat2 = Sum(agg_feat, es['customers']) agg_feat3 = Min(agg_feat, es['customers']) dfeat = DirectFeature(agg_feat2, es['sessions']) dfeat2 = DirectFeature(agg_feat3, es['sessions']) p = Percentile(dfeat) # only dfeat2 should be approximated # because Percentile needs all values feature_matrix_only_dfeat2 = calculate_feature_matrix( [dfeat2], instance_ids=[0, 2], approximate=Timedelta(10, 's'), cutoff_time_in_index=True, cutoff_time=[ datetime(2011, 4, 9, 10, 31, 19), datetime(2011, 4, 9, 11, 0, 0) ]) assert feature_matrix_only_dfeat2[dfeat2.get_name()].tolist() == [1, 0] feature_matrix_approx = calculate_feature_matrix( [p, dfeat, dfeat2, agg_feat], instance_ids=[0, 2], approximate=Timedelta(10, 's'), cutoff_time_in_index=True, cutoff_time=[ datetime(2011, 4, 9, 10, 31, 19), datetime(2011, 4, 9, 11, 0, 0) ]) assert feature_matrix_only_dfeat2[dfeat2.get_name()].tolist( ) == feature_matrix_approx[dfeat2.get_name()].tolist() feature_matrix_small_approx = calculate_feature_matrix( [p, dfeat, dfeat2, agg_feat], instance_ids=[0, 2], approximate=Timedelta(10, 'ms'), cutoff_time_in_index=True, cutoff_time=[ datetime(2011, 4, 9, 10, 31, 19), datetime(2011, 4, 9, 11, 0, 0) ]) feature_matrix_no_approx = calculate_feature_matrix( [p, dfeat, dfeat2, agg_feat], instance_ids=[0, 2], cutoff_time_in_index=True, cutoff_time=[ datetime(2011, 4, 9, 10, 31, 19), datetime(2011, 4, 9, 11, 0, 0) ]) for f in [p, dfeat, agg_feat]: for fm1, fm2 in combinations([ feature_matrix_approx, feature_matrix_small_approx, feature_matrix_no_approx ], 2): assert fm1[f.get_name()].tolist() == fm2[f.get_name()].tolist()
def test_approx_base_feature_is_also_first_class_feature(entityset): es = entityset log_to_products = DirectFeature(es['products']['rating'], es['log']) # This should still be computed properly agg_feat = Min(log_to_products, es['sessions']) customer_agg_feat = Sum(agg_feat, es['customers']) # This is to be approximated sess_to_cust = DirectFeature(customer_agg_feat, es['sessions']) times = [datetime(2011, 4, 9, 10, 31, 19), datetime(2011, 4, 9, 11, 0, 0)] cutoff_time = pd.DataFrame({'time': times, 'instance_id': [0, 2]}) feature_matrix = calculate_feature_matrix([sess_to_cust, agg_feat], entityset, approximate=Timedelta(10, 's'), cutoff_time=cutoff_time) vals1 = feature_matrix[sess_to_cust.get_name()].tolist() assert vals1 == [8.5, 7] vals2 = feature_matrix[agg_feat.get_name()].tolist() assert vals2 == [4, 1.5]