def test_init_and_name(es):
    log = es['log']
    rating = ft.Feature(ft.IdentityFeature(es["products"].ww["rating"]), "log")
    log_features = [ft.Feature(es['log'].ww[col]) for col in log.columns] +\
        [ft.Feature(rating, primitive=GreaterThanScalar(2.5)),
         ft.Feature(rating, primitive=GreaterThanScalar(3.5))]
    # Add Timedelta feature
    # features.append(pd.Timestamp.now() - ft.Feature(log['datetime']))
    customers_features = [
        ft.Feature(es["customers"].ww[col]) for col in es["customers"].columns
    ]

    # check all transform primitives have a name
    for attribute_string in dir(ft.primitives):
        attr = getattr(ft.primitives, attribute_string)
        if isclass(attr):
            if issubclass(attr,
                          TransformPrimitive) and attr != TransformPrimitive:
                assert getattr(attr, "name") is not None

    trans_primitives = get_transform_primitives().values()
    # If Dask EntitySet use only Dask compatible primitives
    if es.dataframe_type == Library.DASK.value:
        trans_primitives = [
            prim for prim in trans_primitives
            if Library.DASK in prim.compatibility
        ]
    if es.dataframe_type == Library.KOALAS.value:
        trans_primitives = [
            prim for prim in trans_primitives
            if Library.KOALAS in prim.compatibility
        ]

    for transform_prim in trans_primitives:
        # skip automated testing if a few special cases
        features_to_use = log_features
        if transform_prim in [NotEqual, Equal]:
            continue
        if transform_prim in [Age]:
            features_to_use = customers_features

        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = match(input_types[0], features_to_use)
        else:
            matching_inputs = match(input_types, features_to_use)
        if len(matching_inputs) == 0:
            raise Exception("Transform Primitive %s not tested" %
                            transform_prim.name)
        for prim in matching_inputs:
            instance = ft.Feature(prim, primitive=transform_prim)

            # try to get name and calculate
            instance.get_name()
            ft.calculate_feature_matrix([instance], entityset=es)
def test_init_and_name(es):
    log = es['log']
    rating = ft.Feature(es["products"]["rating"], es["log"])
    features = [ft.Feature(v) for v in log.variables] +\
        [ft.Feature(rating, primitive=GreaterThanScalar(2.5))]
    # Add Timedelta feature
    # features.append(pd.Timestamp.now() - ft.Feature(log['datetime']))
    for transform_prim in get_transform_primitives().values():

        # skip automated testing if a few special cases
        if transform_prim in [NotEqual, Equal]:
            continue

        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = match(input_types[0], features)
        else:
            matching_inputs = match(input_types, features)
        if len(matching_inputs) == 0:
            raise Exception("Transform Primitive %s not tested" %
                            transform_prim.name)
        for s in matching_inputs:
            instance = ft.Feature(s, primitive=transform_prim)

            # try to get name and calculate
            instance.get_name()
            ft.calculate_feature_matrix([instance], entityset=es).head(5)
def test_does_not_warn_with_stacking_feature(pd_es):
    with pytest.warns(None) as record:
        dfs(
            entityset=pd_es,
            target_dataframe_name="régions",
            agg_primitives=["percent_true"],
            trans_primitives=[GreaterThanScalar(5)],
            primitive_options={
                "greater_than_scalar": {"include_dataframes": ["stores"]}
            },
            features_only=True,
        )

    assert not record
def test_does_not_warn_with_stacking_feature(pd_es):
    with pytest.warns(None) as record:
        dfs(entityset=pd_es,
            target_entity='régions',
            agg_primitives=['percent_true'],
            trans_primitives=[GreaterThanScalar(5)],
            primitive_options={
                'greater_than_scalar': {
                    'include_entities': ['stores']
                }
            },
            features_only=True)

    assert not record
Beispiel #5
0
def test_override_boolean(es):
    count = ft.Feature(es['log']['id'], parent_entity=es['sessions'], primitive=Count)
    count_lo = ft.Feature(count, primitive=GreaterThanScalar(1))
    count_hi = ft.Feature(count, primitive=LessThanScalar(10))

    to_test = [[True, True, True],
               [True, True, False],
               [False, False, True]]

    features = []
    features.append(count_lo.OR(count_hi))
    features.append(count_lo.AND(count_hi))
    features.append(~(count_lo.AND(count_hi)))

    df = ft.calculate_feature_matrix(entityset=es, features=features, instance_ids=[0, 1, 2])
    for i, test in enumerate(to_test):
        v = df[features[i].get_name()].values.tolist()
        assert v == test
Beispiel #6
0
def test_init_and_name(es):
    log = es['log']
    rating = ft.Feature(es["products"]["rating"], es["log"])
    log_features = [ft.Feature(v) for v in log.variables] +\
        [ft.Feature(rating, primitive=GreaterThanScalar(2.5))]
    # Add Timedelta feature
    # features.append(pd.Timestamp.now() - ft.Feature(log['datetime']))
    customers_features = [ft.Feature(v) for v in es["customers"].variables]
    trans_primitives = get_transform_primitives().values()
    # If Dask EntitySet use only Dask compatible primitives
    if isinstance(es['log'].df, dd.DataFrame):
        trans_primitives = [
            prim for prim in trans_primitives
            if Library.DASK in prim.compatibility
        ]
    if ks and isinstance(es['log'].df, ks.DataFrame):
        trans_primitives = [
            prim for prim in trans_primitives
            if Library.KOALAS in prim.compatibility
        ]
    for transform_prim in trans_primitives:
        # skip automated testing if a few special cases
        features_to_use = log_features
        if transform_prim in [NotEqual, Equal]:
            continue
        if transform_prim in [Age]:
            features_to_use = customers_features

        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = match(input_types[0], features_to_use)
        else:
            matching_inputs = match(input_types, features_to_use)
        if len(matching_inputs) == 0:
            raise Exception("Transform Primitive %s not tested" %
                            transform_prim.name)
        for prim in matching_inputs:
            instance = ft.Feature(prim, primitive=transform_prim)

            # try to get name and calculate
            instance.get_name()
            ft.calculate_feature_matrix([instance], entityset=es)
def test_override_boolean(es):
    count = ft.Feature(es["log"].ww["id"],
                       parent_dataframe_name="sessions",
                       primitive=Count)
    count_lo = ft.Feature(count, primitive=GreaterThanScalar(1))
    count_hi = ft.Feature(count, primitive=LessThanScalar(10))

    to_test = [[True, True, True], [True, True, False], [False, False, True]]

    features = []
    features.append(count_lo.OR(count_hi))
    features.append(count_lo.AND(count_hi))
    features.append(~(count_lo.AND(count_hi)))

    df = ft.calculate_feature_matrix(entityset=es,
                                     features=features,
                                     instance_ids=[0, 1, 2])
    df = to_pandas(df, index="id", sort_index=True)
    for i, test in enumerate(to_test):
        v = df[features[i].get_name()].tolist()
        assert v == test