예제 #1
0
    for col in train.columns:
        if train[col].nunique() >= 5 and train[col].nunique(
        ) < 200 and col != "target":
            cat_embed_cols.append(col)
    num_cols = [
        c for c in train.columns if c not in cat_embed_cols + ["target"]
    ]

    wide_cols = []
    for col in train.columns:
        if train[col].nunique() < 40 and col != "target":
            wide_cols.append(col)

    prepare_wide = WidePreprocessor(wide_cols)
    X_wide_train = prepare_wide.fit_transform(train)
    X_wide_valid = prepare_wide.transform(valid)

    prepare_tab = TabPreprocessor(
        embed_cols=cat_embed_cols,
        continuous_cols=num_cols,
        for_tabtransformer=True,
        scale=False,
    )
    X_tab_train = prepare_tab.fit_transform(train)
    X_tab_valid = prepare_tab.transform(valid)

    y_train = train.target.values
    y_valid = valid.target.values

    wide = Wide(wide_dim=np.unique(X_wide_train).shape[0])
예제 #2
0
def test_notfittederror():
    processor = WidePreprocessor(wide_cols, cross_cols)
    with pytest.raises(NotFittedError):
        processor.transform(df_letters)