def _check_schema_version(version, es, warning_text, caplog, warning_type=None): dictionary = { "ft_version": ft.__version__, "schema_version": version, "entityset": es.to_dictionary(), "feature_list": [], "feature_definitions": {}, } if warning_type == "log" and warning_text: logger = logging.getLogger("featuretools") logger.propagate = True FeaturesDeserializer(dictionary) assert warning_text in caplog.text logger.propagate = False elif warning_type == "warn" and warning_text: with pytest.warns(UserWarning) as record: FeaturesDeserializer(dictionary) assert record[0].message.args[0] == warning_text else: FeaturesDeserializer(dictionary)
def test_feature_use_previous_pd_timedelta(es): value = ft.IdentityFeature(es["log"].ww["id"]) td = pd.Timedelta(12, "W") count_primitive = Count() count_feature = ft.AggregationFeature(value, "customers", count_primitive, use_previous=td) dictionary = { "ft_version": ft.__version__, "schema_version": SCHEMA_VERSION, "entityset": es.to_dictionary(), "feature_list": [count_feature.unique_name(), value.unique_name()], "feature_definitions": { count_feature.unique_name(): count_feature.to_dictionary(), value.unique_name(): value.to_dictionary(), }, } dictionary["primitive_definitions"] = { "0": serialize_primitive(count_primitive) } dictionary["feature_definitions"][ count_feature.unique_name()]["arguments"]["primitive"] = "0" deserializer = FeaturesDeserializer(dictionary) expected = [count_feature, value] assert expected == deserializer.to_list()
def test_base_features_not_in_list(es): max_primitive = Max() mult_primitive = MultiplyNumericScalar(value=2) value = ft.IdentityFeature(es["log"].ww["value"]) value_x2 = ft.TransformFeature(value, mult_primitive) max_feat = ft.AggregationFeature(value_x2, "sessions", max_primitive) dictionary = { "ft_version": ft.__version__, "schema_version": SCHEMA_VERSION, "entityset": es.to_dictionary(), "feature_list": [max_feat.unique_name()], "feature_definitions": { max_feat.unique_name(): max_feat.to_dictionary(), value_x2.unique_name(): value_x2.to_dictionary(), value.unique_name(): value.to_dictionary(), }, } dictionary["primitive_definitions"] = { "0": serialize_primitive(max_primitive), "1": serialize_primitive(mult_primitive), } dictionary["feature_definitions"][ max_feat.unique_name()]["arguments"]["primitive"] = "0" dictionary["feature_definitions"][ value_x2.unique_name()]["arguments"]["primitive"] = "1" deserializer = FeaturesDeserializer(dictionary) expected = [max_feat] assert expected == deserializer.to_list()
def serialize_name_unchanged(original): renamed = original.rename('MyFeature') assert renamed.get_name() == 'MyFeature' serializer = FeaturesSerializer([renamed]) serialized = serializer.to_dict() deserializer = FeaturesDeserializer(serialized) deserialized = deserializer.to_list()[0] assert deserialized.get_name() == 'MyFeature'
def serialize_name_unchanged(original): new_name = 'MyFeature' original_names = original.get_feature_names() renamed = original.rename(new_name) new_names = [new_name] if len(original_names) == 1 else [new_name + '[{}]'.format(i) for i in range(len(original_names))] check_names(renamed, new_name, new_names) serializer = FeaturesSerializer([renamed]) serialized = serializer.to_dict() deserializer = FeaturesDeserializer(serialized) deserialized = deserializer.to_list()[0] check_names(deserialized, new_name, new_names)
def test_single_feature(es): feature = ft.IdentityFeature(es['log']['value']) dictionary = { 'ft_version': ft.__version__, 'schema_version': SCHEMA_VERSION, 'entityset': es.to_dictionary(), 'feature_list': [feature.unique_name()], 'feature_definitions': { feature.unique_name(): feature.to_dictionary() } } deserializer = FeaturesDeserializer(dictionary) expected = [feature] assert expected == deserializer.to_list()
def test_single_feature(es): feature = ft.IdentityFeature(es["log"].ww["value"]) dictionary = { "ft_version": ft.__version__, "schema_version": SCHEMA_VERSION, "entityset": es.to_dictionary(), "feature_list": [feature.unique_name()], "feature_definitions": { feature.unique_name(): feature.to_dictionary() }, } deserializer = FeaturesDeserializer(dictionary) expected = [feature] assert expected == deserializer.to_list()
def test_unknown_primitive_module(es): value = ft.IdentityFeature(es["log"].ww["value"]) max_feat = ft.AggregationFeature(value, "sessions", Max) primitive_dict = serialize_primitive(Max()) primitive_dict["module"] = "fake.module" dictionary = { "ft_version": ft.__version__, "schema_version": SCHEMA_VERSION, "entityset": es.to_dictionary(), "feature_list": [max_feat.unique_name(), value.unique_name()], "feature_definitions": { max_feat.unique_name(): max_feat.to_dictionary(), value.unique_name(): value.to_dictionary(), }, "primitive_definitions": { "0": primitive_dict }, } with pytest.raises(RuntimeError) as excinfo: FeaturesDeserializer(dictionary) error_text = 'Primitive "Max" in module "fake.module" not found' assert error_text == str(excinfo.value)
def _check_schema_version(version, es, warning_text): dictionary = { 'ft_version': ft.__version__, 'schema_version': version, 'entityset': es.to_dictionary(), 'feature_list': [], 'feature_definitions': {} } if warning_text: with pytest.warns(UserWarning) as record: FeaturesDeserializer(dictionary) assert record[0].message.args[0] == warning_text else: FeaturesDeserializer(dictionary)
def test_multioutput_feature(es): value = ft.IdentityFeature(es['log']['product_id']) threecommon = ft.primitives.NMostCommon() tc = ft.Feature(es['log']['product_id'], parent_entity=es["sessions"], primitive=threecommon) features = [tc, value] for i in range(3): features.append( ft.Feature(tc[i], parent_entity=es['customers'], primitive=ft.primitives.NumUnique)) features.append(tc[i]) flist = [feat.unique_name() for feat in features] fd = [feat.to_dictionary() for feat in features] fdict = dict(zip(flist, fd)) dictionary = { 'ft_version': ft.__version__, 'schema_version': SCHEMA_VERSION, 'entityset': es.to_dictionary(), 'feature_list': flist, 'feature_definitions': fdict } deserializer = FeaturesDeserializer(dictionary).to_list() for i in range(len(features)): assert features[i].unique_name() == deserializer[i].unique_name()
def _check_schema_version(version, es, error_text): dictionary = { 'ft_version': ft.__version__, 'schema_version': version, 'entityset': es.to_dictionary(), 'feature_list': [], 'feature_definitions': {} } if error_text: with pytest.raises(RuntimeError) as excinfo: FeaturesDeserializer(dictionary) assert error_text == str(excinfo.value) else: FeaturesDeserializer(dictionary)
def test_multioutput_feature(es): value = ft.IdentityFeature(es["log"].ww["product_id"]) threecommon = ft.primitives.NMostCommon() tc = ft.Feature(value, parent_dataframe_name="sessions", primitive=threecommon) features = [tc, value] for i in range(3): features.append( ft.Feature( tc[i], parent_dataframe_name="customers", primitive=ft.primitives.NumUnique, )) features.append(tc[i]) flist = [feat.unique_name() for feat in features] fd = [feat.to_dictionary() for feat in features] fdict = dict(zip(flist, fd)) dictionary = { "ft_version": ft.__version__, "schema_version": SCHEMA_VERSION, "entityset": es.to_dictionary(), "feature_list": flist, "feature_definitions": fdict, } deserializer = FeaturesDeserializer(dictionary).to_list() for i in range(len(features)): assert features[i].unique_name() == deserializer[i].unique_name()
def test_base_features_in_list(es): value = ft.IdentityFeature(es['log']['value']) max_feat = ft.AggregationFeature(value, es['sessions'], ft.primitives.Max) dictionary = { 'ft_version': ft.__version__, 'schema_version': SCHEMA_VERSION, 'entityset': es.to_dictionary(), 'feature_list': [max_feat.unique_name(), value.unique_name()], 'feature_definitions': { max_feat.unique_name(): max_feat.to_dictionary(), value.unique_name(): value.to_dictionary(), } } deserializer = FeaturesDeserializer(dictionary) expected = [max_feat, value] assert expected == deserializer.to_list()
def test_base_features_in_list(es): value = ft.IdentityFeature(es["log"].ww["value"]) max_feat = ft.AggregationFeature(value, "sessions", ft.primitives.Max) dictionary = { "ft_version": ft.__version__, "schema_version": SCHEMA_VERSION, "entityset": es.to_dictionary(), "feature_list": [max_feat.unique_name(), value.unique_name()], "feature_definitions": { max_feat.unique_name(): max_feat.to_dictionary(), value.unique_name(): value.to_dictionary(), }, } deserializer = FeaturesDeserializer(dictionary) expected = [max_feat, value] assert expected == deserializer.to_list()
def test_feature_use_previous_pd_timedelta(es): value = ft.IdentityFeature(es['log']['id']) td = pd.Timedelta(3, "M") count_feature = ft.AggregationFeature(value, es['customers'], ft.primitives.Count, use_previous=td) dictionary = { 'ft_version': ft.__version__, 'schema_version': SCHEMA_VERSION, 'entityset': es.to_dictionary(), 'feature_list': [count_feature.unique_name(), value.unique_name()], 'feature_definitions': { count_feature.unique_name(): count_feature.to_dictionary(), value.unique_name(): value.to_dictionary(), } } deserializer = FeaturesDeserializer(dictionary) expected = [count_feature, value] assert expected == deserializer.to_list()
def test_unknown_feature_type(es): dictionary = { 'ft_version': ft.__version__, 'schema_version': SCHEMA_VERSION, 'entityset': es.to_dictionary(), 'feature_list': ['feature_1'], 'feature_definitions': { 'feature_1': { 'type': 'FakeFeature', 'dependencies': [], 'arguments': {} } } } deserializer = FeaturesDeserializer(dictionary) with pytest.raises(RuntimeError, match='Unrecognized feature type "FakeFeature"'): deserializer.to_list()
def test_unknown_feature_type(es): dictionary = { "ft_version": ft.__version__, "schema_version": SCHEMA_VERSION, "entityset": es.to_dictionary(), "feature_list": ["feature_1"], "feature_definitions": { "feature_1": { "type": "FakeFeature", "dependencies": [], "arguments": {} } }, } deserializer = FeaturesDeserializer(dictionary) with pytest.raises(RuntimeError, match='Unrecognized feature type "FakeFeature"'): deserializer.to_list()
def test_unknown_primitive_module(es): value = ft.IdentityFeature(es['log']['value']) max_feat = ft.AggregationFeature(value, es['sessions'], ft.primitives.Max) max_dict = max_feat.to_dictionary() max_dict['arguments']['primitive']['module'] = 'fake.module' dictionary = { 'ft_version': ft.__version__, 'schema_version': SCHEMA_VERSION, 'entityset': es.to_dictionary(), 'feature_list': [max_feat.unique_name(), value.unique_name()], 'feature_definitions': { max_feat.unique_name(): max_dict, value.unique_name(): value.to_dictionary(), } } deserializer = FeaturesDeserializer(dictionary) with pytest.raises(RuntimeError) as excinfo: deserializer.to_list() error_text = 'Primitive "Max" in module "fake.module" not found' assert error_text == str(excinfo.value)
def test_version(major, minor, patch, raises=True): version = '.'.join([str(v) for v in [major, minor, patch]]) dictionary = { 'ft_version': ft.__version__, 'schema_version': version, 'entityset': es.to_dictionary(), 'feature_list': [], 'feature_definitions': {} } error_text = ( 'Unable to load features. The schema version of the saved ' 'features (%s) is greater than the latest supported (%s). ' 'You may need to upgrade featuretools.' % (version, SCHEMA_VERSION)) if raises: with pytest.raises(RuntimeError) as excinfo: FeaturesDeserializer(dictionary) assert error_text == str(excinfo.value) else: FeaturesDeserializer(dictionary)
def test_unknown_primitive_type(es): value = ft.IdentityFeature(es["log"].ww["value"]) max_feat = ft.AggregationFeature(value, "sessions", ft.primitives.Max) max_dict = max_feat.to_dictionary() max_dict["arguments"]["primitive"]["type"] = "FakePrimitive" dictionary = { "ft_version": ft.__version__, "schema_version": SCHEMA_VERSION, "entityset": es.to_dictionary(), "feature_list": [max_feat.unique_name(), value.unique_name()], "feature_definitions": { max_feat.unique_name(): max_dict, value.unique_name(): value.to_dictionary(), }, } deserializer = FeaturesDeserializer(dictionary) with pytest.raises(RuntimeError) as excinfo: deserializer.to_list() error_text = ('Primitive "FakePrimitive" in module "%s" not found' % ft.primitives.Max.__module__) assert error_text == str(excinfo.value)
def test_feature_use_previous_pd_dateoffset(es): value = ft.IdentityFeature(es["log"].ww["id"]) do = pd.DateOffset(months=3) count_feature = ft.AggregationFeature(value, "customers", ft.primitives.Count, use_previous=do) dictionary = { "ft_version": ft.__version__, "schema_version": SCHEMA_VERSION, "entityset": es.to_dictionary(), "feature_list": [count_feature.unique_name(), value.unique_name()], "feature_definitions": { count_feature.unique_name(): count_feature.to_dictionary(), value.unique_name(): value.to_dictionary(), }, } deserializer = FeaturesDeserializer(dictionary) expected = [count_feature, value] assert expected == deserializer.to_list() value = ft.IdentityFeature(es["log"].ww["id"]) do = pd.DateOffset(months=3, days=2, minutes=30) count_feature = ft.AggregationFeature(value, "customers", ft.primitives.Count, use_previous=do) dictionary = { "ft_version": ft.__version__, "schema_version": SCHEMA_VERSION, "entityset": es.to_dictionary(), "feature_list": [count_feature.unique_name(), value.unique_name()], "feature_definitions": { count_feature.unique_name(): count_feature.to_dictionary(), value.unique_name(): value.to_dictionary(), }, } deserializer = FeaturesDeserializer(dictionary) expected = [count_feature, value] assert expected == deserializer.to_list()
def test_feature_use_previous_pd_dateoffset(es): value = ft.IdentityFeature(es['log']['id']) do = pd.DateOffset(months=3) count_feature = ft.AggregationFeature(value, es['customers'], ft.primitives.Count, use_previous=do) dictionary = { 'ft_version': ft.__version__, 'schema_version': SCHEMA_VERSION, 'entityset': es.to_dictionary(), 'feature_list': [count_feature.unique_name(), value.unique_name()], 'feature_definitions': { count_feature.unique_name(): count_feature.to_dictionary(), value.unique_name(): value.to_dictionary(), } } deserializer = FeaturesDeserializer(dictionary) expected = [count_feature, value] assert expected == deserializer.to_list() value = ft.IdentityFeature(es['log']['id']) do = pd.DateOffset(months=3, days=2, minutes=30) count_feature = ft.AggregationFeature(value, es['customers'], ft.primitives.Count, use_previous=do) dictionary = { 'ft_version': ft.__version__, 'schema_version': SCHEMA_VERSION, 'entityset': es.to_dictionary(), 'feature_list': [count_feature.unique_name(), value.unique_name()], 'feature_definitions': { count_feature.unique_name(): count_feature.to_dictionary(), value.unique_name(): value.to_dictionary(), } } deserializer = FeaturesDeserializer(dictionary) expected = [count_feature, value] assert expected == deserializer.to_list()