def test_init_and_name(es): log = es['log'] rating = ft.Feature(es["products"]["rating"], es["log"]) features = [ft.Feature(v) for v in log.variables] +\ [ft.Feature(rating, primitive=GreaterThanScalar(2.5))] # Add Timedelta feature # features.append(pd.Timestamp.now() - ft.Feature(log['datetime'])) for transform_prim in get_transform_primitives().values(): # skip automated testing if a few special cases if transform_prim in [NotEqual, Equal]: continue # use the input_types matching function from DFS input_types = transform_prim.input_types if type(input_types[0]) == list: matching_inputs = match(input_types[0], features) else: matching_inputs = match(input_types, features) if len(matching_inputs) == 0: raise Exception("Transform Primitive %s not tested" % transform_prim.name) for s in matching_inputs: instance = ft.Feature(s, primitive=transform_prim) # try to get name and calculate instance.get_name() ft.calculate_feature_matrix([instance], entityset=es).head(5)
def test_init_and_name(es): log = es['log'] features = [Feature(v) for v in log.variables] +\ [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)] # Add Timedelta feature features.append(pd.Timestamp.now() - Feature(log['datetime'])) for transform_prim in get_transform_primitives(): if issubclass(transform_prim, Compare): continue # use the input_types matching function from DFS input_types = transform_prim.input_types if type(input_types[0]) == list: matching_inputs = [ g for s in input_types for g in match(s, features) ] else: matching_inputs = match(input_types, features) if len(matching_inputs) == 0: raise Exception("Transform Primitive %s not tested" % transform_prim.name) for s in matching_inputs: instance = transform_prim(*s) # try to get name and calculate instance.get_name() instance.head()
def test_init_and_name(es): log = es['log'] features = [Feature(v) for v in log.variables] +\ [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)] # Add Timedelta feature features.append(pd.Timestamp.now() - Feature(log['datetime'])) for transform_prim in get_transform_primitives().values(): if issubclass(transform_prim, Compare): continue # use the input_types matching function from DFS input_types = transform_prim.input_types if type(input_types[0]) == list: matching_inputs = [g for s in input_types for g in match(s, features)] else: matching_inputs = match(input_types, features) if len(matching_inputs) == 0: raise Exception( "Transform Primitive %s not tested" % transform_prim.name) for s in matching_inputs: instance = transform_prim(*s) # try to get name and calculate instance.get_name() instance.head()
def test_init_and_name(es): log = es['log'] rating = ft.Feature(ft.IdentityFeature(es["products"].ww["rating"]), "log") log_features = [ft.Feature(es['log'].ww[col]) for col in log.columns] +\ [ft.Feature(rating, primitive=GreaterThanScalar(2.5)), ft.Feature(rating, primitive=GreaterThanScalar(3.5))] # Add Timedelta feature # features.append(pd.Timestamp.now() - ft.Feature(log['datetime'])) customers_features = [ ft.Feature(es["customers"].ww[col]) for col in es["customers"].columns ] # check all transform primitives have a name for attribute_string in dir(ft.primitives): attr = getattr(ft.primitives, attribute_string) if isclass(attr): if issubclass(attr, TransformPrimitive) and attr != TransformPrimitive: assert getattr(attr, "name") is not None trans_primitives = get_transform_primitives().values() # If Dask EntitySet use only Dask compatible primitives if es.dataframe_type == Library.DASK.value: trans_primitives = [ prim for prim in trans_primitives if Library.DASK in prim.compatibility ] if es.dataframe_type == Library.KOALAS.value: trans_primitives = [ prim for prim in trans_primitives if Library.KOALAS in prim.compatibility ] for transform_prim in trans_primitives: # skip automated testing if a few special cases features_to_use = log_features if transform_prim in [NotEqual, Equal]: continue if transform_prim in [Age]: features_to_use = customers_features # use the input_types matching function from DFS input_types = transform_prim.input_types if type(input_types[0]) == list: matching_inputs = match(input_types[0], features_to_use) else: matching_inputs = match(input_types, features_to_use) if len(matching_inputs) == 0: raise Exception("Transform Primitive %s not tested" % transform_prim.name) for prim in matching_inputs: instance = ft.Feature(prim, primitive=transform_prim) # try to get name and calculate instance.get_name() ft.calculate_feature_matrix([instance], entityset=es)
def test_init_and_name(es): session = es['sessions'] log = es['log'] features = [ft.Feature(v) for v in log.variables] for agg_prim in get_aggregation_primitives().values(): input_types = agg_prim.input_types if type(input_types[0]) != list: input_types = [input_types] # test each allowed input_types for this primitive for it in input_types: # use the input_types matching function from DFS matching_types = match(it, features) if len(matching_types) == 0: raise Exception("Agg Primitive %s not tested" % agg_prim.name) for t in matching_types: instance = ft.Feature(t, parent_entity=session, primitive=agg_prim) # try to get name and calculate instance.get_name() ft.calculate_feature_matrix([instance], entityset=es).head(5)
def test_init_and_name(es): log = es['log'] rating = ft.Feature(es["products"]["rating"], es["log"]) log_features = [ft.Feature(v) for v in log.variables] +\ [ft.Feature(rating, primitive=GreaterThanScalar(2.5))] # Add Timedelta feature # features.append(pd.Timestamp.now() - ft.Feature(log['datetime'])) customers_features = [ft.Feature(v) for v in es["customers"].variables] trans_primitives = get_transform_primitives().values() # If Dask EntitySet use only Dask compatible primitives if isinstance(es['log'].df, dd.DataFrame): trans_primitives = [ prim for prim in trans_primitives if Library.DASK in prim.compatibility ] if ks and isinstance(es['log'].df, ks.DataFrame): trans_primitives = [ prim for prim in trans_primitives if Library.KOALAS in prim.compatibility ] for transform_prim in trans_primitives: # skip automated testing if a few special cases features_to_use = log_features if transform_prim in [NotEqual, Equal]: continue if transform_prim in [Age]: features_to_use = customers_features # use the input_types matching function from DFS input_types = transform_prim.input_types if type(input_types[0]) == list: matching_inputs = match(input_types[0], features_to_use) else: matching_inputs = match(input_types, features_to_use) if len(matching_inputs) == 0: raise Exception("Transform Primitive %s not tested" % transform_prim.name) for prim in matching_inputs: instance = ft.Feature(prim, primitive=transform_prim) # try to get name and calculate instance.get_name() ft.calculate_feature_matrix([instance], entityset=es)
def test_init_and_name(es): log = es['log'] # Add a BooleanNullable column so primitives with that input type get tested boolean_nullable = log.ww['purchased'] boolean_nullable = boolean_nullable.ww.set_logical_type('BooleanNullable') log.ww['boolean_nullable'] = boolean_nullable features = [ft.Feature(es['log'].ww[col]) for col in log.columns] # check all primitives have name for attribute_string in dir(ft.primitives): attr = getattr(ft.primitives, attribute_string) if isclass(attr): if issubclass( attr, AggregationPrimitive) and attr != AggregationPrimitive: assert getattr(attr, "name") is not None agg_primitives = get_aggregation_primitives().values() # If Dask EntitySet use only Dask compatible primitives if es.dataframe_type == Library.DASK.value: agg_primitives = [ prim for prim in agg_primitives if Library.DASK in prim.compatibility ] if es.dataframe_type == Library.KOALAS.value: agg_primitives = [ prim for prim in agg_primitives if Library.KOALAS in prim.compatibility ] for agg_prim in agg_primitives: input_types = agg_prim.input_types if type(input_types[0]) != list: input_types = [input_types] # test each allowed input_types for this primitive for it in input_types: # use the input_types matching function from DFS matching_types = match(it, features) if len(matching_types) == 0: raise Exception("Agg Primitive %s not tested" % agg_prim.name) for t in matching_types: instance = ft.Feature(t, parent_dataframe_name='sessions', primitive=agg_prim) # try to get name and calculate instance.get_name() ft.calculate_feature_matrix([instance], entityset=es)
def test_init_and_name(es): from featuretools import calculate_feature_matrix log = es['log'] features = [Feature(v) for v in log.variables] +\ [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)] # Add Timedelta feature features.append(pd.Timestamp.now() - Feature(log['datetime'])) for transform_prim in get_transform_primitives().values(): # use the input_types matching function from DFS input_types = transform_prim.input_types if type(input_types[0]) == list: matching_inputs = match(input_types[0], features) else: matching_inputs = match(input_types, features) if len(matching_inputs) == 0: raise Exception("Transform Primitive %s not tested" % transform_prim.name) for s in matching_inputs: instance = transform_prim(*s) # try to get name and calculate instance.get_name() calculate_feature_matrix([instance], entityset=es).head(5)
def test_init_and_name(es): from featuretools import calculate_feature_matrix log = es['log'] features = [Feature(v) for v in log.variables] +\ [GreaterThan(Feature(es["products"]["rating"], es["log"]), 2.5)] # Add Timedelta feature features.append(pd.Timestamp.now() - Feature(log['datetime'])) for transform_prim in get_transform_primitives().values(): # use the input_types matching function from DFS input_types = transform_prim.input_types if type(input_types[0]) == list: matching_inputs = match(input_types[0], features) else: matching_inputs = match(input_types, features) if len(matching_inputs) == 0: raise Exception( "Transform Primitive %s not tested" % transform_prim.name) for s in matching_inputs: instance = transform_prim(*s) # try to get name and calculate instance.get_name() calculate_feature_matrix([instance], entityset=es).head(5)
def test_init_and_name(es): session = es['sessions'] log = es['log'] features = [Feature(v) for v in log.variables] for agg_prim in get_aggregation_primitives().values(): input_types = agg_prim.input_types if type(input_types[0]) != list: input_types = [input_types] # test each allowed input_types for this primitive for it in input_types: # use the input_types matching function from DFS matching_types = match(it, features) if len(matching_types) == 0: raise Exception("Agg Primitive %s not tested" % agg_prim.name) for t in matching_types: instance = agg_prim(t, parent_entity=session) # try to get name and calculate instance.get_name() ft.calculate_feature_matrix([instance], entityset=es).head(5)
def test_init_and_name(es): session = es['sessions'] log = es['log'] features = [ft.Feature(v) for v in log.variables] agg_primitives = get_aggregation_primitives().values() # If Dask EntitySet use only Dask compatible primitives if isinstance(es['sessions'].df, dd.DataFrame): agg_primitives = [ prim for prim in agg_primitives if Library.DASK in prim.compatibility ] if ks and isinstance(es['sessions'].df, ks.DataFrame): agg_primitives = [ prim for prim in agg_primitives if Library.KOALAS in prim.compatibility ] for agg_prim in agg_primitives: input_types = agg_prim.input_types if type(input_types[0]) != list: input_types = [input_types] # test each allowed input_types for this primitive for it in input_types: # use the input_types matching function from DFS matching_types = match(it, features) if len(matching_types) == 0: raise Exception("Agg Primitive %s not tested" % agg_prim.name) for t in matching_types: instance = ft.Feature(t, parent_entity=session, primitive=agg_prim) # try to get name and calculate instance.get_name() ft.calculate_feature_matrix([instance], entityset=es)