Example #1
0
def test_make_agg_feat_using_prev_n_events(entityset, backend):
    agg_feat_1 = Min(entityset['log']['value'],
                     parent_entity=entityset['sessions'],
                     use_previous=Timedelta(1,
                                            'observations',
                                            entity=entityset['log']))

    agg_feat_2 = Min(entityset['log']['value'],
                     parent_entity=entityset['sessions'],
                     use_previous=Timedelta(3,
                                            'observations',
                                            entity=entityset['log']))

    assert agg_feat_1.get_name() != agg_feat_2.get_name(), \
        'Features should have different names based on use_previous'

    pandas_backend = backend([agg_feat_1, agg_feat_2])
    df = pandas_backend.calculate_all_features(instance_ids=[0],
                                               time_last=datetime(
                                                   2011, 4, 9, 10, 30, 6))

    # time_last is included by default
    v1 = df[agg_feat_1.get_name()][0]
    v2 = df[agg_feat_2.get_name()][0]
    assert v1 == 5
    assert v2 == 0

    df = pandas_backend.calculate_all_features(instance_ids=[0],
                                               time_last=datetime(
                                                   2011, 4, 9, 10, 30, 30))

    v1 = df[agg_feat_1.get_name()][0]
    v2 = df[agg_feat_2.get_name()][0]
    assert v1 == 20
    assert v2 == 10
def test_make_agg_feat_using_prev_n_events(entityset, backend):
    agg_feat_1 = Min(entityset['log']['value'],
                     parent_entity=entityset['sessions'],
                     use_previous=Timedelta(1, 'observations',
                                            entity=entityset['log']))

    agg_feat_2 = Min(entityset['log']['value'],
                     parent_entity=entityset['sessions'],
                     use_previous=Timedelta(3, 'observations',
                                            entity=entityset['log']))

    assert agg_feat_1.get_name() != agg_feat_2.get_name(), \
        'Features should have different names based on use_previous'

    pandas_backend = backend([agg_feat_1, agg_feat_2])
    df = pandas_backend.calculate_all_features(instance_ids=[0],
                                               time_last=datetime(2011, 4, 9, 10, 30, 6))

    # time_last is included by default
    v1 = df[agg_feat_1.get_name()][0]
    v2 = df[agg_feat_2.get_name()][0]
    assert v1 == 5
    assert v2 == 0

    df = pandas_backend.calculate_all_features(instance_ids=[0],
                                               time_last=datetime(2011, 4, 9, 10, 30, 30))

    v1 = df[agg_feat_1.get_name()][0]
    v2 = df[agg_feat_2.get_name()][0]
    assert v1 == 20
    assert v2 == 10
def test_approx_base_feature_is_also_first_class_feature(entityset):
    es = entityset
    log_to_products = DirectFeature(es['products']['rating'], es['log'])
    # This should still be computed properly
    agg_feat = Min(log_to_products, es['sessions'])
    customer_agg_feat = Sum(agg_feat, es['customers'])
    # This is to be approximated
    sess_to_cust = DirectFeature(customer_agg_feat, es['sessions'])
    times = [datetime(2011, 4, 9, 10, 31, 19), datetime(2011, 4, 9, 11, 0, 0)]
    cutoff_time = pd.DataFrame({'time': times, 'instance_id': [0, 2]})
    feature_matrix = calculate_feature_matrix([sess_to_cust, agg_feat],
                                              entityset,
                                              approximate=Timedelta(10, 's'),
                                              cutoff_time=cutoff_time)
    vals1 = feature_matrix[sess_to_cust.get_name()].tolist()
    assert vals1 == [8.5, 7]
    vals2 = feature_matrix[agg_feat.get_name()].tolist()
    assert vals2 == [4, 1.5]
def test_approx_base_feature_is_also_first_class_feature(entityset):
    es = entityset
    log_to_products = DirectFeature(es['products']['rating'], es['log'])
    # This should still be computed properly
    agg_feat = Min(log_to_products, es['sessions'])
    customer_agg_feat = Sum(agg_feat, es['customers'])
    # This is to be approximated
    sess_to_cust = DirectFeature(customer_agg_feat, es['sessions'])
    times = [datetime(2011, 4, 9, 10, 31, 19), datetime(2011, 4, 9, 11, 0, 0)]
    cutoff_time = pd.DataFrame({'time': times, 'instance_id': [0, 2]})
    feature_matrix = calculate_feature_matrix([sess_to_cust, agg_feat],
                                              entityset,
                                              approximate=Timedelta(10, 's'),
                                              cutoff_time=cutoff_time)
    vals1 = feature_matrix[sess_to_cust.get_name()].tolist()
    assert vals1 == [8.5, 7]
    vals2 = feature_matrix[agg_feat.get_name()].tolist()
    assert vals2 == [4, 1.5]