def test_base_features_not_in_list(es):
    max_primitive = Max()
    mult_primitive = MultiplyNumericScalar(value=2)
    value = ft.IdentityFeature(es["log"].ww["value"])
    value_x2 = ft.TransformFeature(value, mult_primitive)
    max_feat = ft.AggregationFeature(value_x2, "sessions", max_primitive)
    dictionary = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [max_feat.unique_name()],
        "feature_definitions": {
            max_feat.unique_name(): max_feat.to_dictionary(),
            value_x2.unique_name(): value_x2.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        },
    }
    dictionary["primitive_definitions"] = {
        "0": serialize_primitive(max_primitive),
        "1": serialize_primitive(mult_primitive),
    }
    dictionary["feature_definitions"][
        max_feat.unique_name()]["arguments"]["primitive"] = "0"
    dictionary["feature_definitions"][
        value_x2.unique_name()]["arguments"]["primitive"] = "1"
    deserializer = FeaturesDeserializer(dictionary)

    expected = [max_feat]
    assert expected == deserializer.to_list()
def test_feature_use_previous_pd_dateoffset(es):
    value = ft.IdentityFeature(es["log"].ww["id"])
    do = pd.DateOffset(months=3)
    count_primitive = Count()
    count_feature = ft.AggregationFeature(value,
                                          "customers",
                                          count_primitive,
                                          use_previous=do)
    dictionary = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [count_feature.unique_name(),
                         value.unique_name()],
        "feature_definitions": {
            count_feature.unique_name(): count_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        },
    }
    dictionary["primitive_definitions"] = {
        "0": serialize_primitive(count_primitive)
    }
    dictionary["feature_definitions"][
        count_feature.unique_name()]["arguments"]["primitive"] = "0"
    deserializer = FeaturesDeserializer(dictionary)

    expected = [count_feature, value]
    assert expected == deserializer.to_list()

    value = ft.IdentityFeature(es["log"].ww["id"])
    do = pd.DateOffset(months=3, days=2, minutes=30)
    count_feature = ft.AggregationFeature(value,
                                          "customers",
                                          count_primitive,
                                          use_previous=do)
    dictionary = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [count_feature.unique_name(),
                         value.unique_name()],
        "feature_definitions": {
            count_feature.unique_name(): count_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        },
    }
    dictionary["primitive_definitions"] = {
        "0": serialize_primitive(count_primitive)
    }
    dictionary["feature_definitions"][
        count_feature.unique_name()]["arguments"]["primitive"] = "0"
    deserializer = FeaturesDeserializer(dictionary)

    expected = [count_feature, value]
    assert expected == deserializer.to_list()
Example #3
0
def test_where_feature_dependency(es):
    max_primitive = Max()
    value = ft.IdentityFeature(es["log"].ww["value"])
    is_purchased = ft.IdentityFeature(es["log"].ww["purchased"])
    max_feature = ft.AggregationFeature(
        value, "sessions", max_primitive, where=is_purchased
    )
    features = [max_feature]
    serializer = FeaturesSerializer(features)

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [max_feature.unique_name()],
        "feature_definitions": {
            max_feature.unique_name(): max_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
            is_purchased.unique_name(): is_purchased.to_dictionary(),
        },
    }
    expected["primitive_definitions"] = {
        "0": serialize_primitive(max_primitive),
    }
    expected["feature_definitions"][max_feature.unique_name()]["arguments"][
        "primitive"
    ] = "0"

    actual = serializer.to_dict()
    _compare_feature_dicts(expected, actual)
def test_unknown_primitive_module(es):
    value = ft.IdentityFeature(es["log"].ww["value"])
    max_feat = ft.AggregationFeature(value, "sessions", Max)
    primitive_dict = serialize_primitive(Max())
    primitive_dict["module"] = "fake.module"
    dictionary = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [max_feat.unique_name(),
                         value.unique_name()],
        "feature_definitions": {
            max_feat.unique_name(): max_feat.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        },
        "primitive_definitions": {
            "0": primitive_dict
        },
    }

    with pytest.raises(RuntimeError) as excinfo:
        FeaturesDeserializer(dictionary)

    error_text = 'Primitive "Max" in module "fake.module" not found'
    assert error_text == str(excinfo.value)
    def _feature_definitions(self):
        if not self._features_dict:
            self._features_dict = {}
            self._primitives_dict = {}

            for feature in self.feature_list:
                self._serialize_feature(feature)

            primitive_number = 0
            primitive_id_to_key = {}
            for name, feature in self._features_dict.items():
                primitive = feature["arguments"].get("primitive")
                if primitive:
                    primitive_id = id(primitive)
                    if primitive_id not in primitive_id_to_key.keys():
                        # Primitive we haven't seen before, add to dict and increment primitive_id counter
                        # Always use string for keys because json conversion results in integer dict keys
                        # being converted to strings, but integer dict values are not.
                        primitives_dict_key = str(primitive_number)
                        primitive_id_to_key[primitive_id] = primitives_dict_key
                        self._primitives_dict[
                            primitives_dict_key
                        ] = serialize_primitive(primitive)
                        self._features_dict[name]["arguments"][
                            "primitive"
                        ] = primitives_dict_key
                        primitive_number += 1
                    else:
                        # Primitive we have seen already - use existing primitive_id key
                        key = primitive_id_to_key[primitive_id]
                        self._features_dict[name]["arguments"]["primitive"] = key

        return self._features_dict, self._primitives_dict
Example #6
0
 def get_arguments(self):
     return {
         "name": self._name,
         "base_features":
         [feat.unique_name() for feat in self.base_features],
         "primitive": serialize_primitive(self.primitive),
     }
Example #7
0
def test_feature_use_previous_pd_timedelta(es):
    value = ft.IdentityFeature(es["log"].ww["id"])
    td = pd.Timedelta(12, "W")
    count_primitive = Count()
    count_feature = ft.AggregationFeature(
        value, "customers", count_primitive, use_previous=td
    )
    features = [count_feature, value]
    serializer = FeaturesSerializer(features)

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [count_feature.unique_name(), value.unique_name()],
        "feature_definitions": {
            count_feature.unique_name(): count_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        },
    }
    expected["primitive_definitions"] = {"0": serialize_primitive(count_primitive)}
    expected["feature_definitions"][count_feature.unique_name()]["arguments"][
        "primitive"
    ] = "0"

    actual = serializer.to_dict()
    _compare_feature_dicts(expected, actual)
Example #8
0
 def get_arguments(self):
     return {
         'name': self._name,
         'base_features':
         [feat.unique_name() for feat in self.base_features],
         'primitive': serialize_primitive(self.primitive)
     }
Example #9
0
def test_serialization(es):
    primitives_deserializer = PrimitivesDeserializer()
    value = ft.IdentityFeature(es['log']['value'])
    primitive = ft.primitives.Max()
    max1 = ft.AggregationFeature(value, es['customers'], primitive)

    path = next(es.find_backward_paths('customers', 'log'))
    dictionary = {
        'name': None,
        'base_features': [value.unique_name()],
        'relationship_path': [r.to_dictionary() for r in path],
        'primitive': serialize_primitive(primitive),
        'where': None,
        'use_previous': None,
    }

    assert dictionary == max1.get_arguments()
    assert max1 == \
        ft.AggregationFeature.from_dictionary(dictionary, es,
                                              {value.unique_name(): value},
                                              primitives_deserializer)

    is_purchased = ft.IdentityFeature(es['log']['purchased'])
    use_previous = ft.Timedelta(3, 'd')
    max2 = ft.AggregationFeature(value,
                                 es['customers'],
                                 primitive,
                                 where=is_purchased,
                                 use_previous=use_previous)

    dictionary = {
        'name': None,
        'base_features': [value.unique_name()],
        'relationship_path': [r.to_dictionary() for r in path],
        'primitive': serialize_primitive(primitive),
        'where': is_purchased.unique_name(),
        'use_previous': use_previous.get_arguments(),
    }

    assert dictionary == max2.get_arguments()
    dependencies = {
        value.unique_name(): value,
        is_purchased.unique_name(): is_purchased
    }
    assert max2 == \
        ft.AggregationFeature.from_dictionary(dictionary, es, dependencies,
                                              primitives_deserializer)
Example #10
0
def test_serialization(es):
    primitives_deserializer = PrimitivesDeserializer()
    value = ft.IdentityFeature(es["log"].ww["value"])
    primitive = ft.primitives.Max()
    max1 = ft.AggregationFeature(value, "customers", primitive)

    path = next(es.find_backward_paths("customers", "log"))
    dictionary = {
        "name": None,
        "base_features": [value.unique_name()],
        "relationship_path": [r.to_dictionary() for r in path],
        "primitive": serialize_primitive(primitive),
        "where": None,
        "use_previous": None,
    }

    assert dictionary == max1.get_arguments()
    deserialized = ft.AggregationFeature.from_dictionary(
        dictionary, es, {value.unique_name(): value}, primitives_deserializer
    )
    _assert_agg_feats_equal(max1, deserialized)

    is_purchased = ft.IdentityFeature(es["log"].ww["purchased"])
    use_previous = ft.Timedelta(3, "d")
    max2 = ft.AggregationFeature(
        value, "customers", primitive, where=is_purchased, use_previous=use_previous
    )

    dictionary = {
        "name": None,
        "base_features": [value.unique_name()],
        "relationship_path": [r.to_dictionary() for r in path],
        "primitive": serialize_primitive(primitive),
        "where": is_purchased.unique_name(),
        "use_previous": use_previous.get_arguments(),
    }

    assert dictionary == max2.get_arguments()
    dependencies = {
        value.unique_name(): value,
        is_purchased.unique_name(): is_purchased,
    }
    deserialized = ft.AggregationFeature.from_dictionary(
        dictionary, es, dependencies, primitives_deserializer
    )
    _assert_agg_feats_equal(max2, deserialized)
Example #11
0
 def get_arguments(self):
     return {
         'name': self._name,
         'base_features': [feat.unique_name() for feat in self.base_features],
         'relationship_path': [r.to_dictionary() for _, r in self.relationship_path],
         'primitive': serialize_primitive(self.primitive),
         'where': self.where and self.where.unique_name(),
         'use_previous': self.use_previous and self.use_previous.get_arguments(),
     }
Example #12
0
def test_serialization(es):
    primitives_deserializer = PrimitivesDeserializer()
    value = ft.IdentityFeature(es['log']['value'])
    primitive = ft.primitives.Max()
    max1 = ft.AggregationFeature(value, es['sessions'], primitive)

    dictionary = {
        'base_features': [value.unique_name()],
        'parent_entity_id': 'sessions',
        'primitive': serialize_primitive(primitive),
        'where': None,
        'use_previous': None,
    }

    assert dictionary == max1.get_arguments()
    assert max1 == \
        ft.AggregationFeature.from_dictionary(dictionary, es,
                                              {value.unique_name(): value},
                                              primitives_deserializer)

    is_purchased = ft.IdentityFeature(es['log']['purchased'])
    use_previous = ft.Timedelta(3, 'd')
    max2 = ft.AggregationFeature(value,
                                 es['sessions'],
                                 primitive,
                                 where=is_purchased,
                                 use_previous=use_previous)

    dictionary = {
        'base_features': [value.unique_name()],
        'parent_entity_id': 'sessions',
        'primitive': serialize_primitive(primitive),
        'where': is_purchased.unique_name(),
        'use_previous': use_previous.get_arguments(),
    }

    assert dictionary == max2.get_arguments()
    dependencies = {
        value.unique_name(): value,
        is_purchased.unique_name(): is_purchased
    }
    assert max2 == \
        ft.AggregationFeature.from_dictionary(dictionary, es, dependencies,
                                              primitives_deserializer)
Example #13
0
 def get_arguments(self):
     # Do not include groupby in base_features.
     feature_names = [feat.unique_name() for feat in self.base_features
                      if feat.unique_name() != self.groupby.unique_name()]
     return {
         'name': self._name,
         'base_features': feature_names,
         'primitive': serialize_primitive(self.primitive),
         'groupby': self.groupby.unique_name(),
     }
def test_multioutput_feature(es):
    value = ft.IdentityFeature(es["log"].ww["product_id"])
    threecommon = NMostCommon()
    num_unique = NumUnique()
    tc = ft.Feature(value,
                    parent_dataframe_name="sessions",
                    primitive=threecommon)

    features = [tc, value]
    for i in range(3):
        features.append(
            ft.Feature(
                tc[i],
                parent_dataframe_name="customers",
                primitive=num_unique,
            ))
        features.append(tc[i])

    flist = [feat.unique_name() for feat in features]
    fd = [feat.to_dictionary() for feat in features]
    fdict = dict(zip(flist, fd))

    dictionary = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": flist,
        "feature_definitions": fdict,
    }
    dictionary["primitive_definitions"] = {
        "0": serialize_primitive(threecommon),
        "1": serialize_primitive(num_unique),
    }

    dictionary["feature_definitions"][flist[0]]["arguments"]["primitive"] = "0"
    dictionary["feature_definitions"][flist[2]]["arguments"]["primitive"] = "1"
    dictionary["feature_definitions"][flist[4]]["arguments"]["primitive"] = "1"
    dictionary["feature_definitions"][flist[6]]["arguments"]["primitive"] = "1"
    deserializer = FeaturesDeserializer(dictionary).to_list()

    for i in range(len(features)):
        assert features[i].unique_name() == deserializer[i].unique_name()
Example #15
0
def test_multi_output_features(es):
    product_id = ft.IdentityFeature(es["log"].ww["product_id"])
    threecommon = NMostCommon()
    num_unique = NumUnique()
    tc = ft.Feature(product_id, parent_dataframe_name="sessions", primitive=threecommon)

    features = [tc, product_id]
    for i in range(3):
        features.append(
            ft.Feature(
                tc[i],
                parent_dataframe_name="customers",
                primitive=num_unique,
            )
        )
        features.append(tc[i])

    serializer = FeaturesSerializer(features)

    flist = [feat.unique_name() for feat in features]
    fd = [feat.to_dictionary() for feat in features]
    fdict = dict(zip(flist, fd))

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": flist,
        "feature_definitions": fdict,
    }
    expected["primitive_definitions"] = {
        "0": serialize_primitive(threecommon),
        "1": serialize_primitive(num_unique),
    }

    expected["feature_definitions"][flist[0]]["arguments"]["primitive"] = "0"
    expected["feature_definitions"][flist[2]]["arguments"]["primitive"] = "1"
    expected["feature_definitions"][flist[4]]["arguments"]["primitive"] = "1"
    expected["feature_definitions"][flist[6]]["arguments"]["primitive"] = "1"

    actual = serializer.to_dict()
    _compare_feature_dicts(expected, actual)
Example #16
0
 def get_arguments(self):
     # Do not include groupby in base_features.
     feature_names = [
         feat.unique_name() for feat in self.base_features
         if feat.unique_name() != self.groupby.unique_name()
     ]
     return {
         "name": self._name,
         "base_features": feature_names,
         "primitive": serialize_primitive(self.primitive),
         "groupby": self.groupby.unique_name(),
     }
Example #17
0
 def get_arguments(self):
     return {
         'base_features':
         [feat.unique_name() for feat in self.base_features],
         'parent_entity_id':
         self.parent_entity.id,
         'primitive':
         serialize_primitive(self.primitive),
         'where':
         self.where and self.where.unique_name(),
         'use_previous':
         self.use_previous and self.use_previous.get_arguments(),
     }
Example #18
0
 def get_arguments(self):
     return {
         "name":
         self._name,
         "base_features":
         [feat.unique_name() for feat in self.base_features],
         "relationship_path":
         [r.to_dictionary() for _, r in self.relationship_path],
         "primitive":
         serialize_primitive(self.primitive),
         "where":
         self.where and self.where.unique_name(),
         "use_previous":
         self.use_previous and self.use_previous.get_arguments(),
     }
Example #19
0
def test_serialization(es):
    value = ft.IdentityFeature(es['log']['value'])
    primitive = ft.primitives.MultiplyNumericScalar(value=2)
    value_x2 = ft.TransformFeature(value, primitive)

    dictionary = {
        'base_features': [value.unique_name()],
        'primitive': serialize_primitive(primitive),
    }

    assert dictionary == value_x2.get_arguments()
    assert value_x2 == \
        ft.TransformFeature.from_dictionary(dictionary, es,
                                            {value.unique_name(): value},
                                            PrimitivesDeserializer())
def test_primitive_serialization(universal_sentence_encoder):
    sentences = pd.Series([
        "",
        "I like to eat pizza",
        "The roller coaster was built in 1885.",
        "When will humans go to mars?",
        "Mitochondria is the powerhouse of the cell",
    ])
    serialized_primitive = serialize_primitive(universal_sentence_encoder)
    deserializer = PrimitivesDeserializer()
    deserialized_primitive = deserializer.deserialize_primitive(
        serialized_primitive)

    a = pd.DataFrame(deserialized_primitive(sentences))
    a = a.mean().round(7).to_numpy()
    b = np.array([-0.0007475, 0.0032088, 0.0018552, 0.0008256, 0.0028342])
    np.testing.assert_array_almost_equal(a, b)
def test_serialization(pd_es):
    value = ft.IdentityFeature(pd_es["log"].ww["value"])
    zipcode = ft.IdentityFeature(pd_es["log"].ww["zipcode"])
    primitive = CumSum()
    groupby = ft.feature_base.GroupByTransformFeature(value, primitive, zipcode)

    dictionary = {
        "name": None,
        "base_features": [value.unique_name()],
        "primitive": serialize_primitive(primitive),
        "groupby": zipcode.unique_name(),
    }

    assert dictionary == groupby.get_arguments()
    dependencies = {
        value.unique_name(): value,
        zipcode.unique_name(): zipcode,
    }
    assert groupby == ft.feature_base.GroupByTransformFeature.from_dictionary(
        dictionary, pd_es, dependencies, PrimitivesDeserializer()
    )
Example #22
0
def test_serialization(es):
    value = ft.IdentityFeature(es['log']['value'])
    zipcode = ft.IdentityFeature(es['log']['zipcode'])
    primitive = CumSum()
    groupby = ft.feature_base.GroupByTransformFeature(value, primitive, zipcode)

    dictionary = {
        'base_features': [value.unique_name()],
        'primitive': serialize_primitive(primitive),
        'groupby': zipcode.unique_name(),
    }

    assert dictionary == groupby.get_arguments()
    dependencies = {
        value.unique_name(): value,
        zipcode.unique_name(): zipcode,
    }
    assert groupby == \
        ft.feature_base.GroupByTransformFeature.from_dictionary(dictionary, es,
                                                                dependencies,
                                                                PrimitivesDeserializer())
Example #23
0
def test_base_features_in_list(es):
    value = ft.IdentityFeature(es["log"].ww["value"])
    max_feature = ft.AggregationFeature(value, "sessions", Max)
    features = [max_feature, value]
    serializer = FeaturesSerializer(features)

    expected = {
        "ft_version": ft.__version__,
        "schema_version": SCHEMA_VERSION,
        "entityset": es.to_dictionary(),
        "feature_list": [max_feature.unique_name(), value.unique_name()],
        "feature_definitions": {
            max_feature.unique_name(): max_feature.to_dictionary(),
            value.unique_name(): value.to_dictionary(),
        },
    }
    expected["primitive_definitions"] = {"0": serialize_primitive(Max())}
    expected["feature_definitions"][max_feature.unique_name()]["arguments"][
        "primitive"
    ] = "0"

    actual = serializer.to_dict()
    _compare_feature_dicts(expected, actual)