예제 #1
0
def test_encode_features_topn(entityset):
    topn = Feature(entityset['log']['product_id'],
                   parent_entity=entityset['customers'],
                   primitive=NMostCommon(n=3))
    features, feature_defs = dfs(entityset=entityset,
                                 instance_ids=[0, 1, 2],
                                 target_entity="customers",
                                 agg_primitives=[NMostCommon(n=3)])
    features_enc, feature_defs_enc = encode_features(features,
                                                     feature_defs,
                                                     include_unknown=True)
    assert topn.hash() in [feat.hash() for feat in feature_defs_enc]
    for name in topn.get_feature_names():
        assert name in features_enc.columns
def test_encode_features_topn(pd_es):
    topn = Feature(Feature(pd_es['log'].ww['product_id']),
                   parent_dataframe_name='customers',
                   primitive=NMostCommon(n=3))
    features, feature_defs = dfs(entityset=pd_es,
                                 instance_ids=[0, 1, 2],
                                 target_dataframe_name="customers",
                                 agg_primitives=[NMostCommon(n=3)])
    features_enc, feature_defs_enc = encode_features(features,
                                                     feature_defs,
                                                     include_unknown=True)
    assert topn.unique_name() in [
        feat.unique_name() for feat in feature_defs_enc
    ]
    for name in topn.get_feature_names():
        assert name in features_enc.columns
        assert features_enc.columns.tolist().count(name) == 1