def test_compare_all_nans(es):
    nan_feat = Mode(es['log']['product_id'], es['sessions'])
    compare = nan_feat == 'brown bag'
    # before all data
    time_last = pd.Timestamp('1/1/1993')
    pandas_backend = PandasBackend(es, [nan_feat, compare])
    df = pandas_backend.calculate_all_features(instance_ids=[0, 1, 2],
                                               time_last=time_last)
    assert df[nan_feat.get_name()].dropna().shape[0] == 0
    assert not df[compare.get_name()].any()
def test_compare_all_nans(es):
    nan_feat = Mode(es['log']['product_id'], es['sessions'])
    compare = nan_feat == 'brown bag'
    # before all data
    time_last = pd.Timestamp('1/1/1993')
    pandas_backend = PandasBackend(es, [nan_feat, compare])
    df = pandas_backend.calculate_all_features(instance_ids=[0, 1, 2],
                                               time_last=time_last)
    assert df[nan_feat.get_name()].dropna().shape[0] == 0
    assert not df[compare.get_name()].any()
예제 #3
0
def test_make_agg_feat_multiple_dtypes(entityset, backend):
    compare_prod = IdentityFeature(entityset['log']['product_id']) == 'coke zero'

    agg_feat = Count(entityset['log']['id'],
                     parent_entity=entityset['sessions'],
                     where=compare_prod)

    agg_feat2 = Mode(entityset['log']['product_id'],
                     parent_entity=entityset['sessions'],
                     where=compare_prod)

    pandas_backend = backend([agg_feat, agg_feat2])
    df = pandas_backend.calculate_all_features(instance_ids=[0],
                                               time_last=None)

    v = df[agg_feat.get_name()][0]
    v2 = df[agg_feat2.get_name()][0]
    assert (v == 3)
    assert (v2 == 'coke zero')
def test_make_agg_feat_multiple_dtypes(entityset, backend):
    compare_prod = IdentityFeature(entityset['log']['product_id']) == 'coke zero'

    agg_feat = Count(entityset['log']['id'],
                     parent_entity=entityset['sessions'],
                     where=compare_prod)

    agg_feat2 = Mode(entityset['log']['product_id'],
                     parent_entity=entityset['sessions'],
                     where=compare_prod)

    pandas_backend = backend([agg_feat, agg_feat2])
    df = pandas_backend.calculate_all_features(instance_ids=[0],
                                               time_last=None)

    v = df[agg_feat.get_name()][0]
    v2 = df[agg_feat2.get_name()][0]
    assert (v == 3)
    assert (v2 == 'coke zero')
예제 #5
0
def test_return_type_inference_id(es):
    # direct features should keep Id variable type
    direct_id_feature = Feature(es["sessions"]["customer_id"], es["log"])
    assert direct_id_feature.variable_type == Id

    # aggregations of Id variable types should get converted
    mode = Mode(es["log"]["session_id"], es["customers"])
    assert mode.variable_type == Categorical

    # also test direct feature of aggregation
    mode_direct = Feature(mode, es["sessions"])
    assert mode_direct.variable_type == Categorical
예제 #6
0
def test_limit_mode_uniques(es, session_id_feat, product_id_feat,
                            datetime_feat):
    mode_feat = Mode(product_id_feat, parent_entity=es['sessions'])

    mode_filter = filt.LimitModeUniques()

    assert mode_filter.is_valid(feature=mode_feat,
                                entity=es['sessions'],
                                target_entity_id='customers')

    # percent_unique is 6/15
    mode_filter = filt.LimitModeUniques(threshold=.3)

    assert not mode_filter.is_valid(
        feature=mode_feat, entity=es['sessions'], target_entity_id='customers')
예제 #7
0
def test_dfs_builds_on_seed_features_more_than_max_depth(es):
    seed_feature_sessions = Count(es['log']["id"], es['sessions']) > 2
    seed_feature_log = Hour(es['log']['datetime'])
    session_agg = Last(seed_feature_log, es['sessions'])

    # Depth of this feat is 2 relative to session_agg, the seed feature,
    # which is greater than max_depth so it shouldn't be built
    session_agg_trans = DirectFeature(Mode(session_agg, es['customers']),
                                      es['sessions'])
    dfs_obj = DeepFeatureSynthesis(
        target_entity_id='sessions',
        entityset=es,
        agg_primitives=[Last, Count],
        trans_primitives=[],
        max_depth=1,
        seed_features=[seed_feature_sessions, seed_feature_log])
    features = dfs_obj.build_features()
    assert seed_feature_sessions.get_name() in [f.get_name() for f in features]
    assert session_agg.get_name() in [f.get_name() for f in features]
    assert session_agg_trans.get_name() not in [f.get_name() for f in features]
예제 #8
0
def test_return_type_inference_direct_feature(es):
    mode = Mode(es["log"]["priority_level"], es["customers"])
    mode_session = Feature(mode, es["sessions"])
    assert mode_session.variable_type == es["log"]["priority_level"].__class__
예제 #9
0
def test_return_type_inference(es):
    mode = Mode(es["log"]["priority_level"], es["customers"])
    assert mode.variable_type == es["log"]["priority_level"].__class__