def test_init_and_name(es):
    log = es['log']
    rating = ft.Feature(es["products"]["rating"], es["log"])
    features = [ft.Feature(v) for v in log.variables] +\
        [ft.Feature(rating, primitive=GreaterThanScalar(2.5))]
    # Add Timedelta feature
    # features.append(pd.Timestamp.now() - ft.Feature(log['datetime']))
    for transform_prim in get_transform_primitives().values():

        # skip automated testing if a few special cases
        if transform_prim in [NotEqual, Equal]:
            continue

        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = match(input_types[0], features)
        else:
            matching_inputs = match(input_types, features)
        if len(matching_inputs) == 0:
            raise Exception("Transform Primitive %s not tested" %
                            transform_prim.name)
        for s in matching_inputs:
            instance = ft.Feature(s, primitive=transform_prim)

            # try to get name and calculate
            instance.get_name()
            ft.calculate_feature_matrix([instance], entityset=es).head(5)
def test_init_and_name(es):
    log = es['log']
    features = [Feature(v) for v in log.variables] +\
        [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)]
    # Add Timedelta feature
    features.append(pd.Timestamp.now() - Feature(log['datetime']))
    for transform_prim in get_transform_primitives():
        if issubclass(transform_prim, Compare):
            continue
        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = [
                g for s in input_types for g in match(s, features)
            ]
        else:
            matching_inputs = match(input_types, features)
        if len(matching_inputs) == 0:
            raise Exception("Transform Primitive %s not tested" %
                            transform_prim.name)
        for s in matching_inputs:
            instance = transform_prim(*s)

            # try to get name and calculate
            instance.get_name()
            instance.head()
def test_init_and_name(es):
    log = es['log']
    features = [Feature(v) for v in log.variables] +\
        [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)]
    # Add Timedelta feature
    features.append(pd.Timestamp.now() - Feature(log['datetime']))
    for transform_prim in get_transform_primitives().values():
        if issubclass(transform_prim, Compare):
            continue
        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = [g for s in input_types
                               for g in match(s, features)]
        else:
            matching_inputs = match(input_types, features)
        if len(matching_inputs) == 0:
            raise Exception(
                "Transform Primitive %s not tested" % transform_prim.name)
        for s in matching_inputs:
            instance = transform_prim(*s)

            # try to get name and calculate
            instance.get_name()
            instance.head()
def test_init_and_name(es):
    log = es['log']
    rating = ft.Feature(ft.IdentityFeature(es["products"].ww["rating"]), "log")
    log_features = [ft.Feature(es['log'].ww[col]) for col in log.columns] +\
        [ft.Feature(rating, primitive=GreaterThanScalar(2.5)),
         ft.Feature(rating, primitive=GreaterThanScalar(3.5))]
    # Add Timedelta feature
    # features.append(pd.Timestamp.now() - ft.Feature(log['datetime']))
    customers_features = [
        ft.Feature(es["customers"].ww[col]) for col in es["customers"].columns
    ]

    # check all transform primitives have a name
    for attribute_string in dir(ft.primitives):
        attr = getattr(ft.primitives, attribute_string)
        if isclass(attr):
            if issubclass(attr,
                          TransformPrimitive) and attr != TransformPrimitive:
                assert getattr(attr, "name") is not None

    trans_primitives = get_transform_primitives().values()
    # If Dask EntitySet use only Dask compatible primitives
    if es.dataframe_type == Library.DASK.value:
        trans_primitives = [
            prim for prim in trans_primitives
            if Library.DASK in prim.compatibility
        ]
    if es.dataframe_type == Library.KOALAS.value:
        trans_primitives = [
            prim for prim in trans_primitives
            if Library.KOALAS in prim.compatibility
        ]

    for transform_prim in trans_primitives:
        # skip automated testing if a few special cases
        features_to_use = log_features
        if transform_prim in [NotEqual, Equal]:
            continue
        if transform_prim in [Age]:
            features_to_use = customers_features

        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = match(input_types[0], features_to_use)
        else:
            matching_inputs = match(input_types, features_to_use)
        if len(matching_inputs) == 0:
            raise Exception("Transform Primitive %s not tested" %
                            transform_prim.name)
        for prim in matching_inputs:
            instance = ft.Feature(prim, primitive=transform_prim)

            # try to get name and calculate
            instance.get_name()
            ft.calculate_feature_matrix([instance], entityset=es)
def test_init_and_name(es):
    session = es['sessions']
    log = es['log']

    features = [ft.Feature(v) for v in log.variables]
    for agg_prim in get_aggregation_primitives().values():

        input_types = agg_prim.input_types
        if type(input_types[0]) != list:
            input_types = [input_types]

        # test each allowed input_types for this primitive
        for it in input_types:
            # use the input_types matching function from DFS
            matching_types = match(it, features)
            if len(matching_types) == 0:
                raise Exception("Agg Primitive %s not tested" % agg_prim.name)
            for t in matching_types:
                instance = ft.Feature(t,
                                      parent_entity=session,
                                      primitive=agg_prim)

                # try to get name and calculate
                instance.get_name()
                ft.calculate_feature_matrix([instance], entityset=es).head(5)
Exemple #6
0
def test_init_and_name(es):
    log = es['log']
    rating = ft.Feature(es["products"]["rating"], es["log"])
    log_features = [ft.Feature(v) for v in log.variables] +\
        [ft.Feature(rating, primitive=GreaterThanScalar(2.5))]
    # Add Timedelta feature
    # features.append(pd.Timestamp.now() - ft.Feature(log['datetime']))
    customers_features = [ft.Feature(v) for v in es["customers"].variables]
    trans_primitives = get_transform_primitives().values()
    # If Dask EntitySet use only Dask compatible primitives
    if isinstance(es['log'].df, dd.DataFrame):
        trans_primitives = [
            prim for prim in trans_primitives
            if Library.DASK in prim.compatibility
        ]
    if ks and isinstance(es['log'].df, ks.DataFrame):
        trans_primitives = [
            prim for prim in trans_primitives
            if Library.KOALAS in prim.compatibility
        ]
    for transform_prim in trans_primitives:
        # skip automated testing if a few special cases
        features_to_use = log_features
        if transform_prim in [NotEqual, Equal]:
            continue
        if transform_prim in [Age]:
            features_to_use = customers_features

        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = match(input_types[0], features_to_use)
        else:
            matching_inputs = match(input_types, features_to_use)
        if len(matching_inputs) == 0:
            raise Exception("Transform Primitive %s not tested" %
                            transform_prim.name)
        for prim in matching_inputs:
            instance = ft.Feature(prim, primitive=transform_prim)

            # try to get name and calculate
            instance.get_name()
            ft.calculate_feature_matrix([instance], entityset=es)
Exemple #7
0
def test_init_and_name(es):
    log = es['log']

    # Add a BooleanNullable column so primitives with that input type get tested
    boolean_nullable = log.ww['purchased']
    boolean_nullable = boolean_nullable.ww.set_logical_type('BooleanNullable')
    log.ww['boolean_nullable'] = boolean_nullable

    features = [ft.Feature(es['log'].ww[col]) for col in log.columns]

    # check all primitives have name
    for attribute_string in dir(ft.primitives):
        attr = getattr(ft.primitives, attribute_string)
        if isclass(attr):
            if issubclass(
                    attr,
                    AggregationPrimitive) and attr != AggregationPrimitive:
                assert getattr(attr, "name") is not None

    agg_primitives = get_aggregation_primitives().values()
    # If Dask EntitySet use only Dask compatible primitives
    if es.dataframe_type == Library.DASK.value:
        agg_primitives = [
            prim for prim in agg_primitives
            if Library.DASK in prim.compatibility
        ]
    if es.dataframe_type == Library.KOALAS.value:
        agg_primitives = [
            prim for prim in agg_primitives
            if Library.KOALAS in prim.compatibility
        ]

    for agg_prim in agg_primitives:
        input_types = agg_prim.input_types
        if type(input_types[0]) != list:
            input_types = [input_types]

        # test each allowed input_types for this primitive
        for it in input_types:
            # use the input_types matching function from DFS
            matching_types = match(it, features)
            if len(matching_types) == 0:
                raise Exception("Agg Primitive %s not tested" % agg_prim.name)
            for t in matching_types:
                instance = ft.Feature(t,
                                      parent_dataframe_name='sessions',
                                      primitive=agg_prim)

                # try to get name and calculate
                instance.get_name()
                ft.calculate_feature_matrix([instance], entityset=es)
Exemple #8
0
def test_init_and_name(es):
    from featuretools import calculate_feature_matrix
    log = es['log']
    features = [Feature(v) for v in log.variables] +\
        [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)]
    # Add Timedelta feature
    features.append(pd.Timestamp.now() - Feature(log['datetime']))
    for transform_prim in get_transform_primitives().values():
        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = match(input_types[0], features)
        else:
            matching_inputs = match(input_types, features)
        if len(matching_inputs) == 0:
            raise Exception("Transform Primitive %s not tested" %
                            transform_prim.name)
        for s in matching_inputs:
            instance = transform_prim(*s)

            # try to get name and calculate
            instance.get_name()
            calculate_feature_matrix([instance], entityset=es).head(5)
def test_init_and_name(es):
    from featuretools import calculate_feature_matrix
    log = es['log']
    features = [Feature(v) for v in log.variables] +\
        [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)]
    # Add Timedelta feature
    features.append(pd.Timestamp.now() - Feature(log['datetime']))
    for transform_prim in get_transform_primitives().values():
        # use the input_types matching function from DFS
        input_types = transform_prim.input_types
        if type(input_types[0]) == list:
            matching_inputs = match(input_types[0], features)
        else:
            matching_inputs = match(input_types, features)
        if len(matching_inputs) == 0:
            raise Exception(
                "Transform Primitive %s not tested" % transform_prim.name)
        for s in matching_inputs:
            instance = transform_prim(*s)

            # try to get name and calculate
            instance.get_name()
            calculate_feature_matrix([instance], entityset=es).head(5)
def test_init_and_name(es):
    session = es['sessions']
    log = es['log']

    features = [Feature(v) for v in log.variables]
    for agg_prim in get_aggregation_primitives().values():

        input_types = agg_prim.input_types
        if type(input_types[0]) != list:
            input_types = [input_types]

        # test each allowed input_types for this primitive
        for it in input_types:
            # use the input_types matching function from DFS
            matching_types = match(it, features)
            if len(matching_types) == 0:
                raise Exception("Agg Primitive %s not tested" % agg_prim.name)
            for t in matching_types:
                instance = agg_prim(t, parent_entity=session)

                # try to get name and calculate
                instance.get_name()
                ft.calculate_feature_matrix([instance], entityset=es).head(5)
def test_init_and_name(es):
    session = es['sessions']
    log = es['log']

    features = [ft.Feature(v) for v in log.variables]
    agg_primitives = get_aggregation_primitives().values()
    # If Dask EntitySet use only Dask compatible primitives
    if isinstance(es['sessions'].df, dd.DataFrame):
        agg_primitives = [
            prim for prim in agg_primitives
            if Library.DASK in prim.compatibility
        ]
    if ks and isinstance(es['sessions'].df, ks.DataFrame):
        agg_primitives = [
            prim for prim in agg_primitives
            if Library.KOALAS in prim.compatibility
        ]

    for agg_prim in agg_primitives:
        input_types = agg_prim.input_types
        if type(input_types[0]) != list:
            input_types = [input_types]

        # test each allowed input_types for this primitive
        for it in input_types:
            # use the input_types matching function from DFS
            matching_types = match(it, features)
            if len(matching_types) == 0:
                raise Exception("Agg Primitive %s not tested" % agg_prim.name)
            for t in matching_types:
                instance = ft.Feature(t,
                                      parent_entity=session,
                                      primitive=agg_prim)

                # try to get name and calculate
                instance.get_name()
                ft.calculate_feature_matrix([instance], entityset=es)