def test_max_hlevel(es):
    kwargs = dict(
        target_entity_id='log',
        entityset=es,
        agg_primitives=[Count, Last],
        trans_primitives=[Hour],
        max_depth=-1,
    )

    dfs_h_n1 = DeepFeatureSynthesis(max_hlevel=-1, **kwargs)
    dfs_h_0 = DeepFeatureSynthesis(max_hlevel=0, **kwargs)
    dfs_h_1 = DeepFeatureSynthesis(max_hlevel=1, **kwargs)
    feats_n1 = dfs_h_n1.build_features()
    feats_n1 = [f.get_name() for f in feats_n1]
    feats_0 = dfs_h_0.build_features()
    feats_0 = [f.get_name() for f in feats_0]
    feats_1 = dfs_h_1.build_features()
    feats_1 = [f.get_name() for f in feats_1]

    customer_log = Last(es['log']['value'], es['customers'])
    session_log = Last(es['log']['value'], es['sessions'])
    log_customer_log = Feature(customer_log, es['log'])
    log_session_log = Feature(session_log, es['log'])
    assert log_customer_log.get_name() in feats_n1
    assert log_session_log.get_name() in feats_n1

    assert log_customer_log.get_name() not in feats_1
    assert log_session_log.get_name() in feats_1

    assert log_customer_log.get_name() not in feats_0
    assert log_session_log.get_name() not in feats_0
def test_allowed_paths(es):
    kwargs = dict(
        target_entity_id='customers',
        entityset=es,
        agg_primitives=[Last],
        trans_primitives=[],
        max_depth=2,
        seed_features=[]
    )
    dfs_unconstrained = DeepFeatureSynthesis(**kwargs)
    features_unconstrained = dfs_unconstrained.build_features()

    unconstrained_names = [f.get_name() for f in features_unconstrained]
    customers_session_feat = Last(es['sessions']['device_type'],
                                  es['customers'])
    customers_session_log_feat = Last(es['log']['value'], es['customers'])
    assert customers_session_feat.get_name() in unconstrained_names
    assert customers_session_log_feat.get_name() in unconstrained_names

    dfs_constrained = DeepFeatureSynthesis(allowed_paths=[['customers',
                                                           'sessions']],
                                           **kwargs)
    features = dfs_constrained.build_features()
    names = [f.get_name() for f in features]
    assert customers_session_feat.get_name() in names
    assert customers_session_log_feat.get_name() not in names
Example #3
0
def test_seed_features(es):
    seed_feature_sessions = Count(es['log']["id"], es['sessions']) > 2
    seed_feature_log = Hour(es['log']['datetime'])
    session_agg = Last(seed_feature_log, es['sessions'])
    dfs_obj = DeepFeatureSynthesis(
        target_entity_id='sessions',
        entityset=es,
        agg_primitives=[Last],
        trans_primitives=[],
        max_depth=2,
        seed_features=[seed_feature_sessions, seed_feature_log])
    features = dfs_obj.build_features()
    assert seed_feature_sessions.get_name() in [f.get_name() for f in features]
    assert session_agg.get_name() in [f.get_name() for f in features]
def test_seed_features(es):
    seed_feature_sessions = Count(es['log']["id"], es['sessions']) > 2
    seed_feature_log = Hour(es['log']['datetime'])
    session_agg = Last(seed_feature_log, es['sessions'])
    dfs_obj = DeepFeatureSynthesis(target_entity_id='sessions',
                                   entityset=es,
                                   agg_primitives=[Last],
                                   trans_primitives=[],
                                   max_depth=2,
                                   seed_features=[seed_feature_sessions,
                                                  seed_feature_log])
    features = dfs_obj.build_features()
    assert seed_feature_sessions.get_name() in [f.get_name()
                                                for f in features]
    assert session_agg.get_name() in [f.get_name() for f in features]
Example #5
0
def test_get_depth(es):
    es = make_ecommerce_entityset()
    f = Feature(es['log']['value'])
    g = Feature(es['log']['value'])
    agg1 = Last(f, es['sessions'])
    agg2 = Last(agg1, es['customers'])
    d1 = Feature(agg2, es['sessions'])
    d2 = Feature(d1, es['log'])
    assert d2.get_depth() == 4
    # Make sure this works if we pass in two of the same
    # feature. This came up when user supplied duplicates
    # in the seed_features of DFS.
    assert d2.get_depth(stop_at=[f, g]) == 4
    assert d2.get_depth(stop_at=[f, g, agg1]) == 3
    assert d2.get_depth(stop_at=[f, g, agg1]) == 3
    assert d2.get_depth(stop_at=[f, g, agg2]) == 2
    assert d2.get_depth(stop_at=[f, g, d1]) == 1
    assert d2.get_depth(stop_at=[f, g, d2]) == 0
Example #6
0
def test_dfs_builds_on_seed_features_more_than_max_depth(es):
    seed_feature_sessions = Count(es['log']["id"], es['sessions']) > 2
    seed_feature_log = Hour(es['log']['datetime'])
    session_agg = Last(seed_feature_log, es['sessions'])

    # Depth of this feat is 2 relative to session_agg, the seed feature,
    # which is greater than max_depth so it shouldn't be built
    session_agg_trans = DirectFeature(Count(session_agg, es['customers']),
                                      es['sessions'])
    dfs_obj = DeepFeatureSynthesis(
        target_entity_id='sessions',
        entityset=es,
        agg_primitives=[Last, Count],
        trans_primitives=[],
        max_depth=1,
        seed_features=[seed_feature_sessions, seed_feature_log])
    features = dfs_obj.build_features()
    assert seed_feature_sessions.get_name() in [f.get_name() for f in features]
    assert session_agg.get_name() in [f.get_name() for f in features]
    assert session_agg_trans.get_name() not in [f.get_name() for f in features]
def test_dfs_builds_on_seed_features_more_than_max_depth(es):
    seed_feature_sessions = Count(es['log']["id"], es['sessions']) > 2
    seed_feature_log = Hour(es['log']['datetime'])
    session_agg = Last(seed_feature_log, es['sessions'])

    # Depth of this feat is 2 relative to session_agg, the seed feature,
    # which is greater than max_depth so it shouldn't be built
    session_agg_trans = DirectFeature(Mode(session_agg, es['customers']),
                                      es['sessions'])
    dfs_obj = DeepFeatureSynthesis(target_entity_id='sessions',
                                   entityset=es,
                                   agg_primitives=[Last, Count],
                                   trans_primitives=[],
                                   max_depth=1,
                                   seed_features=[seed_feature_sessions,
                                                  seed_feature_log])
    features = dfs_obj.build_features()
    assert seed_feature_sessions.get_name() in [f.get_name()
                                                for f in features]
    assert session_agg.get_name() in [f.get_name() for f in features]
    assert session_agg_trans.get_name() not in [f.get_name()
                                                for f in features]
Example #8
0
def test_return_type_inference_numeric_time_index(es_numeric):
    last = Last(es_numeric["log"]["datetime"], es_numeric["customers"])
    assert last.variable_type == Numeric
Example #9
0
def test_return_type_inference_datetime_time_index(es):
    last = Last(es["log"]["datetime"], es["customers"])
    assert last.variable_type == Datetime