Beispiel #1
0
def test_encode_features_topn(es):
    topn = Feature(es['log']['product_id'],
                   parent_entity=es['customers'],
                   primitive=NMostCommon(n=3))
    features, feature_defs = dfs(entityset=es,
                                 instance_ids=[0, 1, 2],
                                 target_entity="customers",
                                 agg_primitives=[NMostCommon(n=3)])
    features_enc, feature_defs_enc = encode_features(features,
                                                     feature_defs,
                                                     include_unknown=True)
    assert topn.unique_name() in [feat.unique_name() for feat in feature_defs_enc]
    for name in topn.get_feature_names():
        assert name in features_enc.columns
        assert features_enc.columns.tolist().count(name) == 1
def test_encode_features_topn(pd_es):
    topn = Feature(
        Feature(pd_es["log"].ww["product_id"]),
        parent_dataframe_name="customers",
        primitive=NMostCommon(n=3),
    )
    features, feature_defs = dfs(
        entityset=pd_es,
        instance_ids=[0, 1, 2],
        target_dataframe_name="customers",
        agg_primitives=[NMostCommon(n=3)],
    )
    features_enc, feature_defs_enc = encode_features(
        features, feature_defs, include_unknown=True
    )
    assert topn.unique_name() in [feat.unique_name() for feat in feature_defs_enc]
    for name in topn.get_feature_names():
        assert name in features_enc.columns
        assert features_enc.columns.tolist().count(name) == 1