def pipeline(es): pipeline = Pipeline(steps=[( 'ft', DFSTransformer( entityset=es, target_entity="customers", max_features=20) ), ("numeric", FunctionTransformer(select_numeric, validate=False) ), ('imp', SimpleImputer()), ('et', ExtraTreesClassifier(n_estimators=10))]) return pipeline
def pipeline(): pipeline = Pipeline(steps=[ ("ft", DFSTransformer(target_dataframe_name="customers", max_features=20)), ("numeric", FunctionTransformer(select_numeric, validate=False)), ("imp", SimpleImputer()), ("et", ExtraTreesClassifier(n_estimators=10)), ]) return pipeline
def test_sklearn_transformer(es, df): # Using with transformers pipeline = Pipeline(steps=[ ('ft', DFSTransformer(entityset=es, target_entity="customers")), ("numeric", FunctionTransformer(select_numeric, validate=False)), ('sc', StandardScaler()), ]) X_train = pipeline.fit(df['customer_id']).transform(df['customer_id']) assert X_train.shape[0] == 15
def test_sklearn_transformer_with_entityset(es): # Using with transformers pipeline = Pipeline(steps=[ ("ft", DFSTransformer(target_dataframe_name="customers")), ("numeric", FunctionTransformer(select_numeric, validate=False)), ("sc", StandardScaler()), ]) X_train = pipeline.fit(es).transform(es) assert X_train.shape[0] == 15
def test_cfm_uses_filtered_target_df_with_entityset(es): pipeline = Pipeline( steps=[("ft", DFSTransformer(target_dataframe_name="transactions"))]) train_ids = [1, 2, 3] test_ids = [10, 55, 853] train_es = filter_transactions(es, ids=train_ids) test_es = filter_transactions(es, ids=test_ids) fm_train = pipeline.fit_transform(X=train_es) assert all(fm_train["sessions.COUNT(transactions)"] == [1, 1, 1]) assert set(fm_train.index.values) == set(train_ids) fm_test = pipeline.transform(test_es) assert all(fm_test["sessions.COUNT(transactions)"] == [1, 2, 2]) assert set(fm_test.index.values) == set(test_ids)
def test_cfm_uses_filtered_target_df_with_dataframes_and_relationships(es): pipeline = Pipeline( steps=[("ft", DFSTransformer(target_dataframe_name="transactions"))]) train_ids = [3, 1, 2] test_ids = [853, 55, 10] train_es = filter_transactions(es, ids=train_ids) test_es = filter_transactions(es, ids=test_ids) train_dataframes, train_relationships = get_dataframes_and_relationships( train_es) test_dataframes, test_relationships = get_dataframes_and_relationships( test_es) fm_train = pipeline.fit_transform(X=(train_dataframes, train_relationships)) assert all(fm_train["sessions.COUNT(transactions)"] == [1, 1, 1]) assert set(fm_train.index.values) == set(train_ids) fm_test = pipeline.transform(X=(test_dataframes, test_relationships)) assert all(fm_test["sessions.COUNT(transactions)"] == [2, 2, 1]) assert set(fm_test.index.values) == set(test_ids)