def test_isin_feat(es):
    isin = IsIn(es['log']['product_id'],
                list_of_outputs=["toothpaste", "coke zero"])
    features = [isin]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(range(8), None)
    true = [True, True, True, False, False, True, True, True]
    v = df[isin.get_name()].values.tolist()
    assert true == v
Exemple #2
0
def test_isin_feat_custom(es):
    def pd_is_in(array, list_of_outputs=None):
        if list_of_outputs is None:
            list_of_outputs = []
        return pd.Series(array).isin(list_of_outputs)

    def isin_generate_name(self, base_feature_names):
        return u"%s.isin(%s)" % (base_feature_names[0],
                                 str(self.kwargs['list_of_outputs']))

    IsIn = make_trans_primitive(
        pd_is_in, [Variable],
        Boolean,
        name="is_in",
        description="For each value of the base feature, checks whether it is "
        "in a list that is provided.",
        cls_attributes={"generate_name": isin_generate_name})

    isin = ft.Feature(
        es['log']['product_id'],
        primitive=IsIn(list_of_outputs=["toothpaste", "coke zero"]))
    features = [isin]
    df = ft.calculate_feature_matrix(entityset=es,
                                     features=features,
                                     instance_ids=range(8))
    true = [True, True, True, False, False, True, True, True]
    v = df[isin.get_name()].values.tolist()
    assert true == v

    isin = ft.Feature(es['log']['product_id']).isin(
        ["toothpaste", "coke zero"])
    features = [isin]
    df = ft.calculate_feature_matrix(entityset=es,
                                     features=features,
                                     instance_ids=range(8))
    true = [True, True, True, False, False, True, True, True]
    v = df[isin.get_name()].values.tolist()
    assert true == v

    isin = ft.Feature(es['log']['value']).isin([5, 10])
    features = [isin]
    df = ft.calculate_feature_matrix(entityset=es,
                                     features=features,
                                     instance_ids=range(8))
    true = [False, True, True, False, False, False, False, False]
    v = df[isin.get_name()].values.tolist()
    assert true == v
def test_isin_feat_custom(es):
    def pd_is_in(array, list_of_outputs=None):
        if list_of_outputs is None:
            list_of_outputs = []
        return pd.Series(array).isin(list_of_outputs)

    def isin_get_name(self):
        return u"%s.isin(%s)" % (self.base_features[0].get_name(),
                                 str(self.kwargs['list_of_outputs']))

    IsIn = make_trans_primitive(
        pd_is_in, [Variable],
        Boolean,
        name="is_in",
        description="For each value of the base feature, checks whether it is "
        "in a list that is provided.",
        cls_attributes={"_get_name": isin_get_name})

    isin = IsIn(es['log']['product_id'],
                list_of_outputs=["toothpaste", "coke zero"])
    features = [isin]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(range(8), None)
    true = [True, True, True, False, False, True, True, True]
    v = df[isin.get_name()].values.tolist()
    assert true == v

    isin = Feature(es['log']['product_id']).isin(["toothpaste", "coke zero"])
    features = [isin]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(range(8), None)
    true = [True, True, True, False, False, True, True, True]
    v = df[isin.get_name()].values.tolist()
    assert true == v

    isin = Feature(es['log']['value']).isin([5, 10])
    features = [isin]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(range(8), None)
    true = [False, True, True, False, False, False, False, False]
    v = df[isin.get_name()].values.tolist()
    assert true == v
def test_isin_feat_custom(es):
    def pd_is_in(array, list_of_outputs=None):
        if list_of_outputs is None:
            list_of_outputs = []
        return pd.Series(array).isin(list_of_outputs)

    def isin_generate_name(self):
        return u"%s.isin(%s)" % (self.base_features[0].get_name(),
                                 str(self.kwargs['list_of_outputs']))

    IsIn = make_trans_primitive(
        pd_is_in,
        [Variable],
        Boolean,
        name="is_in",
        description="For each value of the base feature, checks whether it is "
        "in a list that is provided.",
        cls_attributes={"generate_name": isin_generate_name})

    isin = IsIn(es['log']['product_id'],
                list_of_outputs=["toothpaste", "coke zero"])
    features = [isin]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(range(8), None)
    true = [True, True, True, False, False, True, True, True]
    v = df[isin.get_name()].values.tolist()
    assert true == v

    isin = Feature(es['log']['product_id']).isin(["toothpaste", "coke zero"])
    features = [isin]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(range(8), None)
    true = [True, True, True, False, False, True, True, True]
    v = df[isin.get_name()].values.tolist()
    assert true == v

    isin = Feature(es['log']['value']).isin([5, 10])
    features = [isin]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(range(8), None)
    true = [False, True, True, False, False, False, False, False]
    v = df[isin.get_name()].values.tolist()
    assert true == v
def test_args_string_default():
    assert IsIn().get_args_string() == ''
def test_single_args_string():
    assert IsIn([1, 2, 3]).get_args_string() == ', list_of_outputs=[1, 2, 3]'
Exemple #7
0
def test_deserializer_uses_common_primitive_instances_with_args(es, tmp_path):
    # Single argument
    scalar1 = MultiplyNumericScalar(value=1)
    scalar5 = MultiplyNumericScalar(value=5)
    features = ft.dfs(
        entityset=es,
        target_dataframe_name="products",
        features_only=True,
        agg_primitives=["sum"],
        trans_primitives=[scalar1, scalar5],
    )

    scalar1_features = [
        f
        for f in features
        if f.primitive.name == "multiply_numeric_scalar" and " * 1" in f.get_name()
    ]
    scalar5_features = [
        f
        for f in features
        if f.primitive.name == "multiply_numeric_scalar" and " * 5" in f.get_name()
    ]

    # Make sure we have multiple features of each type
    assert len(scalar1_features) > 1
    assert len(scalar5_features) > 1

    # DFS should use the the passed in primitive instance for all features
    assert all([f.primitive is scalar1 for f in scalar1_features])
    assert all([f.primitive is scalar5 for f in scalar5_features])

    file = os.path.join(tmp_path, "features.json")
    ft.save_features(features, file)
    deserialized_features = ft.load_features(file)

    new_scalar1_features = [
        f
        for f in deserialized_features
        if f.primitive.name == "multiply_numeric_scalar" and " * 1" in f.get_name()
    ]
    new_scalar5_features = [
        f
        for f in deserialized_features
        if f.primitive.name == "multiply_numeric_scalar" and " * 5" in f.get_name()
    ]

    # After deserialization all features that share a primitive should use the same primitive instance
    new_scalar1_primitive = new_scalar1_features[0].primitive
    new_scalar5_primitive = new_scalar5_features[0].primitive
    assert all([f.primitive is new_scalar1_primitive for f in new_scalar1_features])
    assert all([f.primitive is new_scalar5_primitive for f in new_scalar5_features])
    assert new_scalar1_primitive.value == 1
    assert new_scalar5_primitive.value == 5

    # Test primitive with multiple args - pandas only due to primitive compatibility
    if es.dataframe_type == Library.PANDAS.value:
        distance_to_holiday = DistanceToHoliday(
            holiday="Victoria Day", country="Canada"
        )
        features = ft.dfs(
            entityset=es,
            target_dataframe_name="customers",
            features_only=True,
            agg_primitives=[],
            trans_primitives=[distance_to_holiday],
        )

        distance_features = [
            f for f in features if f.primitive.name == "distance_to_holiday"
        ]

        assert len(distance_features) > 1

        # DFS should use the the passed in primitive instance for all features
        assert all([f.primitive is distance_to_holiday for f in distance_features])

        file = os.path.join(tmp_path, "distance_features.json")
        ft.save_features(distance_features, file)
        new_distance_features = ft.load_features(file)

        # After deserialization all features that share a primitive should use the same primitive instance
        new_distance_primitive = new_distance_features[0].primitive
        assert all(
            [f.primitive is new_distance_primitive for f in new_distance_features]
        )
        assert new_distance_primitive.holiday == "Victoria Day"
        assert new_distance_primitive.country == "Canada"

    # Test primitive with list arg
    is_in = IsIn(list_of_outputs=[5, True, "coke zero"])
    features = ft.dfs(
        entityset=es,
        target_dataframe_name="customers",
        features_only=True,
        agg_primitives=[],
        trans_primitives=[is_in],
    )

    is_in_features = [f for f in features if f.primitive.name == "isin"]
    assert len(is_in_features) > 1

    # DFS should use the the passed in primitive instance for all features
    assert all([f.primitive is is_in for f in is_in_features])

    file = os.path.join(tmp_path, "distance_features.json")
    ft.save_features(is_in_features, file)
    new_is_in_features = ft.load_features(file)

    # After deserialization all features that share a primitive should use the same primitive instance
    new_is_in_primitive = new_is_in_features[0].primitive
    assert all([f.primitive is new_is_in_primitive for f in new_is_in_features])
    assert new_is_in_primitive.list_of_outputs == [5, True, "coke zero"]
 def isin(self, list_of_output):
     from featuretools.primitives import IsIn
     return IsIn(self, list_of_outputs=list_of_output)