def pipeline(es):
    pipeline = Pipeline(steps=[(
        'ft',
        DFSTransformer(
            entityset=es, target_entity="customers", max_features=20)
    ), ("numeric", FunctionTransformer(select_numeric, validate=False)
        ), ('imp',
            SimpleImputer()), ('et', ExtraTreesClassifier(n_estimators=10))])
    return pipeline
Example #2
0
def pipeline():
    pipeline = Pipeline(steps=[
        ("ft",
         DFSTransformer(target_dataframe_name="customers", max_features=20)),
        ("numeric", FunctionTransformer(select_numeric, validate=False)),
        ("imp", SimpleImputer()),
        ("et", ExtraTreesClassifier(n_estimators=10)),
    ])
    return pipeline
def test_sklearn_transformer(es, df):
    # Using with transformers
    pipeline = Pipeline(steps=[
        ('ft', DFSTransformer(entityset=es, target_entity="customers")),
        ("numeric", FunctionTransformer(select_numeric, validate=False)),
        ('sc', StandardScaler()),
    ])

    X_train = pipeline.fit(df['customer_id']).transform(df['customer_id'])

    assert X_train.shape[0] == 15
Example #4
0
def test_sklearn_transformer_with_entityset(es):
    # Using with transformers
    pipeline = Pipeline(steps=[
        ("ft", DFSTransformer(target_dataframe_name="customers")),
        ("numeric", FunctionTransformer(select_numeric, validate=False)),
        ("sc", StandardScaler()),
    ])

    X_train = pipeline.fit(es).transform(es)

    assert X_train.shape[0] == 15
Example #5
0
def test_cfm_uses_filtered_target_df_with_entityset(es):
    pipeline = Pipeline(
        steps=[("ft", DFSTransformer(target_dataframe_name="transactions"))])

    train_ids = [1, 2, 3]
    test_ids = [10, 55, 853]

    train_es = filter_transactions(es, ids=train_ids)
    test_es = filter_transactions(es, ids=test_ids)

    fm_train = pipeline.fit_transform(X=train_es)
    assert all(fm_train["sessions.COUNT(transactions)"] == [1, 1, 1])
    assert set(fm_train.index.values) == set(train_ids)

    fm_test = pipeline.transform(test_es)

    assert all(fm_test["sessions.COUNT(transactions)"] == [1, 2, 2])
    assert set(fm_test.index.values) == set(test_ids)
Example #6
0
def test_cfm_uses_filtered_target_df_with_dataframes_and_relationships(es):
    pipeline = Pipeline(
        steps=[("ft", DFSTransformer(target_dataframe_name="transactions"))])

    train_ids = [3, 1, 2]
    test_ids = [853, 55, 10]

    train_es = filter_transactions(es, ids=train_ids)
    test_es = filter_transactions(es, ids=test_ids)
    train_dataframes, train_relationships = get_dataframes_and_relationships(
        train_es)
    test_dataframes, test_relationships = get_dataframes_and_relationships(
        test_es)

    fm_train = pipeline.fit_transform(X=(train_dataframes,
                                         train_relationships))
    assert all(fm_train["sessions.COUNT(transactions)"] == [1, 1, 1])
    assert set(fm_train.index.values) == set(train_ids)

    fm_test = pipeline.transform(X=(test_dataframes, test_relationships))
    assert all(fm_test["sessions.COUNT(transactions)"] == [2, 2, 1])
    assert set(fm_test.index.values) == set(test_ids)