def test_allowed_paths(es): kwargs = dict( target_entity_id='customers', entityset=es, agg_primitives=[Last], trans_primitives=[], max_depth=2, seed_features=[] ) dfs_unconstrained = DeepFeatureSynthesis(**kwargs) features_unconstrained = dfs_unconstrained.build_features() unconstrained_names = [f.get_name() for f in features_unconstrained] customers_session_feat = Last(es['sessions']['device_type'], es['customers']) customers_session_log_feat = Last(es['log']['value'], es['customers']) assert customers_session_feat.get_name() in unconstrained_names assert customers_session_log_feat.get_name() in unconstrained_names dfs_constrained = DeepFeatureSynthesis(allowed_paths=[['customers', 'sessions']], **kwargs) features = dfs_constrained.build_features() names = [f.get_name() for f in features] assert customers_session_feat.get_name() in names assert customers_session_log_feat.get_name() not in names
def test_seed_features(es): seed_feature_sessions = Count(es['log']["id"], es['sessions']) > 2 seed_feature_log = Hour(es['log']['datetime']) session_agg = Last(seed_feature_log, es['sessions']) dfs_obj = DeepFeatureSynthesis( target_entity_id='sessions', entityset=es, agg_primitives=[Last], trans_primitives=[], max_depth=2, seed_features=[seed_feature_sessions, seed_feature_log]) features = dfs_obj.build_features() assert seed_feature_sessions.get_name() in [f.get_name() for f in features] assert session_agg.get_name() in [f.get_name() for f in features]
def test_allowed_paths(es): kwargs = dict(target_entity_id='customers', entityset=es, agg_primitives=[Last], trans_primitives=[], max_depth=2, seed_features=[]) dfs_unconstrained = DeepFeatureSynthesis(**kwargs) features_unconstrained = dfs_unconstrained.build_features() unconstrained_names = [f.get_name() for f in features_unconstrained] customers_session_feat = Last(es['sessions']['device_type'], es['customers']) customers_session_log_feat = Last(es['log']['value'], es['customers']) assert customers_session_feat.get_name() in unconstrained_names assert customers_session_log_feat.get_name() in unconstrained_names dfs_constrained = DeepFeatureSynthesis( allowed_paths=[['customers', 'sessions']], **kwargs) features = dfs_constrained.build_features() names = [f.get_name() for f in features] assert customers_session_feat.get_name() in names assert customers_session_log_feat.get_name() not in names
def test_seed_features(es): seed_feature_sessions = Count(es['log']["id"], es['sessions']) > 2 seed_feature_log = Hour(es['log']['datetime']) session_agg = Last(seed_feature_log, es['sessions']) dfs_obj = DeepFeatureSynthesis(target_entity_id='sessions', entityset=es, agg_primitives=[Last], trans_primitives=[], max_depth=2, seed_features=[seed_feature_sessions, seed_feature_log]) features = dfs_obj.build_features() assert seed_feature_sessions.get_name() in [f.get_name() for f in features] assert session_agg.get_name() in [f.get_name() for f in features]
def test_dfs_builds_on_seed_features_more_than_max_depth(es): seed_feature_sessions = Count(es['log']["id"], es['sessions']) > 2 seed_feature_log = Hour(es['log']['datetime']) session_agg = Last(seed_feature_log, es['sessions']) # Depth of this feat is 2 relative to session_agg, the seed feature, # which is greater than max_depth so it shouldn't be built session_agg_trans = DirectFeature(Count(session_agg, es['customers']), es['sessions']) dfs_obj = DeepFeatureSynthesis( target_entity_id='sessions', entityset=es, agg_primitives=[Last, Count], trans_primitives=[], max_depth=1, seed_features=[seed_feature_sessions, seed_feature_log]) features = dfs_obj.build_features() assert seed_feature_sessions.get_name() in [f.get_name() for f in features] assert session_agg.get_name() in [f.get_name() for f in features] assert session_agg_trans.get_name() not in [f.get_name() for f in features]
def test_dfs_builds_on_seed_features_more_than_max_depth(es): seed_feature_sessions = Count(es['log']["id"], es['sessions']) > 2 seed_feature_log = Hour(es['log']['datetime']) session_agg = Last(seed_feature_log, es['sessions']) # Depth of this feat is 2 relative to session_agg, the seed feature, # which is greater than max_depth so it shouldn't be built session_agg_trans = DirectFeature(Mode(session_agg, es['customers']), es['sessions']) dfs_obj = DeepFeatureSynthesis(target_entity_id='sessions', entityset=es, agg_primitives=[Last, Count], trans_primitives=[], max_depth=1, seed_features=[seed_feature_sessions, seed_feature_log]) features = dfs_obj.build_features() assert seed_feature_sessions.get_name() in [f.get_name() for f in features] assert session_agg.get_name() in [f.get_name() for f in features] assert session_agg_trans.get_name() not in [f.get_name() for f in features]