def test_isin_feat(es): isin = IsIn(es['log']['product_id'], list_of_outputs=["toothpaste", "coke zero"]) features = [isin] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(range(8), None) true = [True, True, True, False, False, True, True, True] v = df[isin.get_name()].values.tolist() assert true == v
def test_isin_feat_custom(es): def pd_is_in(array, list_of_outputs=None): if list_of_outputs is None: list_of_outputs = [] return pd.Series(array).isin(list_of_outputs) def isin_generate_name(self, base_feature_names): return u"%s.isin(%s)" % (base_feature_names[0], str(self.kwargs['list_of_outputs'])) IsIn = make_trans_primitive( pd_is_in, [Variable], Boolean, name="is_in", description="For each value of the base feature, checks whether it is " "in a list that is provided.", cls_attributes={"generate_name": isin_generate_name}) isin = ft.Feature( es['log']['product_id'], primitive=IsIn(list_of_outputs=["toothpaste", "coke zero"])) features = [isin] df = ft.calculate_feature_matrix(entityset=es, features=features, instance_ids=range(8)) true = [True, True, True, False, False, True, True, True] v = df[isin.get_name()].values.tolist() assert true == v isin = ft.Feature(es['log']['product_id']).isin( ["toothpaste", "coke zero"]) features = [isin] df = ft.calculate_feature_matrix(entityset=es, features=features, instance_ids=range(8)) true = [True, True, True, False, False, True, True, True] v = df[isin.get_name()].values.tolist() assert true == v isin = ft.Feature(es['log']['value']).isin([5, 10]) features = [isin] df = ft.calculate_feature_matrix(entityset=es, features=features, instance_ids=range(8)) true = [False, True, True, False, False, False, False, False] v = df[isin.get_name()].values.tolist() assert true == v
def test_isin_feat_custom(es): def pd_is_in(array, list_of_outputs=None): if list_of_outputs is None: list_of_outputs = [] return pd.Series(array).isin(list_of_outputs) def isin_get_name(self): return u"%s.isin(%s)" % (self.base_features[0].get_name(), str(self.kwargs['list_of_outputs'])) IsIn = make_trans_primitive( pd_is_in, [Variable], Boolean, name="is_in", description="For each value of the base feature, checks whether it is " "in a list that is provided.", cls_attributes={"_get_name": isin_get_name}) isin = IsIn(es['log']['product_id'], list_of_outputs=["toothpaste", "coke zero"]) features = [isin] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(range(8), None) true = [True, True, True, False, False, True, True, True] v = df[isin.get_name()].values.tolist() assert true == v isin = Feature(es['log']['product_id']).isin(["toothpaste", "coke zero"]) features = [isin] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(range(8), None) true = [True, True, True, False, False, True, True, True] v = df[isin.get_name()].values.tolist() assert true == v isin = Feature(es['log']['value']).isin([5, 10]) features = [isin] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(range(8), None) true = [False, True, True, False, False, False, False, False] v = df[isin.get_name()].values.tolist() assert true == v
def test_isin_feat_custom(es): def pd_is_in(array, list_of_outputs=None): if list_of_outputs is None: list_of_outputs = [] return pd.Series(array).isin(list_of_outputs) def isin_generate_name(self): return u"%s.isin(%s)" % (self.base_features[0].get_name(), str(self.kwargs['list_of_outputs'])) IsIn = make_trans_primitive( pd_is_in, [Variable], Boolean, name="is_in", description="For each value of the base feature, checks whether it is " "in a list that is provided.", cls_attributes={"generate_name": isin_generate_name}) isin = IsIn(es['log']['product_id'], list_of_outputs=["toothpaste", "coke zero"]) features = [isin] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(range(8), None) true = [True, True, True, False, False, True, True, True] v = df[isin.get_name()].values.tolist() assert true == v isin = Feature(es['log']['product_id']).isin(["toothpaste", "coke zero"]) features = [isin] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(range(8), None) true = [True, True, True, False, False, True, True, True] v = df[isin.get_name()].values.tolist() assert true == v isin = Feature(es['log']['value']).isin([5, 10]) features = [isin] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(range(8), None) true = [False, True, True, False, False, False, False, False] v = df[isin.get_name()].values.tolist() assert true == v
def test_args_string_default(): assert IsIn().get_args_string() == ''
def test_single_args_string(): assert IsIn([1, 2, 3]).get_args_string() == ', list_of_outputs=[1, 2, 3]'
def test_deserializer_uses_common_primitive_instances_with_args(es, tmp_path): # Single argument scalar1 = MultiplyNumericScalar(value=1) scalar5 = MultiplyNumericScalar(value=5) features = ft.dfs( entityset=es, target_dataframe_name="products", features_only=True, agg_primitives=["sum"], trans_primitives=[scalar1, scalar5], ) scalar1_features = [ f for f in features if f.primitive.name == "multiply_numeric_scalar" and " * 1" in f.get_name() ] scalar5_features = [ f for f in features if f.primitive.name == "multiply_numeric_scalar" and " * 5" in f.get_name() ] # Make sure we have multiple features of each type assert len(scalar1_features) > 1 assert len(scalar5_features) > 1 # DFS should use the the passed in primitive instance for all features assert all([f.primitive is scalar1 for f in scalar1_features]) assert all([f.primitive is scalar5 for f in scalar5_features]) file = os.path.join(tmp_path, "features.json") ft.save_features(features, file) deserialized_features = ft.load_features(file) new_scalar1_features = [ f for f in deserialized_features if f.primitive.name == "multiply_numeric_scalar" and " * 1" in f.get_name() ] new_scalar5_features = [ f for f in deserialized_features if f.primitive.name == "multiply_numeric_scalar" and " * 5" in f.get_name() ] # After deserialization all features that share a primitive should use the same primitive instance new_scalar1_primitive = new_scalar1_features[0].primitive new_scalar5_primitive = new_scalar5_features[0].primitive assert all([f.primitive is new_scalar1_primitive for f in new_scalar1_features]) assert all([f.primitive is new_scalar5_primitive for f in new_scalar5_features]) assert new_scalar1_primitive.value == 1 assert new_scalar5_primitive.value == 5 # Test primitive with multiple args - pandas only due to primitive compatibility if es.dataframe_type == Library.PANDAS.value: distance_to_holiday = DistanceToHoliday( holiday="Victoria Day", country="Canada" ) features = ft.dfs( entityset=es, target_dataframe_name="customers", features_only=True, agg_primitives=[], trans_primitives=[distance_to_holiday], ) distance_features = [ f for f in features if f.primitive.name == "distance_to_holiday" ] assert len(distance_features) > 1 # DFS should use the the passed in primitive instance for all features assert all([f.primitive is distance_to_holiday for f in distance_features]) file = os.path.join(tmp_path, "distance_features.json") ft.save_features(distance_features, file) new_distance_features = ft.load_features(file) # After deserialization all features that share a primitive should use the same primitive instance new_distance_primitive = new_distance_features[0].primitive assert all( [f.primitive is new_distance_primitive for f in new_distance_features] ) assert new_distance_primitive.holiday == "Victoria Day" assert new_distance_primitive.country == "Canada" # Test primitive with list arg is_in = IsIn(list_of_outputs=[5, True, "coke zero"]) features = ft.dfs( entityset=es, target_dataframe_name="customers", features_only=True, agg_primitives=[], trans_primitives=[is_in], ) is_in_features = [f for f in features if f.primitive.name == "isin"] assert len(is_in_features) > 1 # DFS should use the the passed in primitive instance for all features assert all([f.primitive is is_in for f in is_in_features]) file = os.path.join(tmp_path, "distance_features.json") ft.save_features(is_in_features, file) new_is_in_features = ft.load_features(file) # After deserialization all features that share a primitive should use the same primitive instance new_is_in_primitive = new_is_in_features[0].primitive assert all([f.primitive is new_is_in_primitive for f in new_is_in_features]) assert new_is_in_primitive.list_of_outputs == [5, True, "coke zero"]
def isin(self, list_of_output): from featuretools.primitives import IsIn return IsIn(self, list_of_outputs=list_of_output)