def test_transform_drop_all_nan_columns():
    X = pd.DataFrame({
        "all_nan": [np.nan, np.nan, np.nan],
        "some_nan": [np.nan, 1, 0],
        "another_col": [0, 1, 2]
    })
    strategies = {
        'all_nan': {
            "impute_strategy": "most_frequent"
        },
        'some_nan': {
            "impute_strategy": "most_frequent"
        },
        'another_col': {
            "impute_strategy": "most_frequent"
        }
    }
    transformer = PerColumnImputer(impute_strategies=strategies)
    transformer.fit(X)
    X_expected_arr = pd.DataFrame({
        "some_nan": [0, 1, 0],
        "another_col": [0, 1, 2]
    })
    assert_frame_equal(X_expected_arr,
                       transformer.transform(X),
                       check_dtype=False)
    assert_frame_equal(
        X,
        pd.DataFrame({
            "all_nan": [np.nan, np.nan, np.nan],
            "some_nan": [np.nan, 1, 0],
            "another_col": [0, 1, 2]
        }))
예제 #2
0
def test_transform_drop_all_nan_columns_empty():
    X = pd.DataFrame([[np.nan, np.nan, np.nan]])
    strategies = {'0': {"impute_strategy": "most_frequent"}, }
    transformer = PerColumnImputer(impute_strategies=strategies)
    assert transformer.fit_transform(X).to_dataframe().empty
    assert_frame_equal(X, pd.DataFrame([[np.nan, np.nan, np.nan]]))

    strategies = {'0': {"impute_strategy": "most_frequent"}}
    transformer = PerColumnImputer(impute_strategies=strategies)
    transformer.fit(X)
    assert transformer.transform(X).to_dataframe().empty
    assert_frame_equal(X, pd.DataFrame([[np.nan, np.nan, np.nan]]))
예제 #3
0
def test_per_column_imputer_woodwork_custom_overrides_returned_by_components(X_df, has_nan):
    y = pd.Series([1, 2, 1])
    if has_nan:
        X_df.iloc[len(X_df) - 1, 0] = np.nan
    override_types = [Integer, Double, Categorical, NaturalLanguage, Boolean]
    for logical_type in override_types:
        try:
            X = ww.DataTable(X_df, logical_types={0: logical_type})
        except TypeError:
            continue

        imputer = PerColumnImputer()
        imputer.fit(X, y)
        transformed = imputer.transform(X, y)
        assert isinstance(transformed, ww.DataTable)
        assert transformed.logical_types == {0: logical_type}
def test_fit_transform():
    X = pd.DataFrame([[2], [4], [6], [np.nan]])

    X_expected = pd.DataFrame([[2], [4], [6], [4]])

    X.columns = ['A']
    X_expected.columns = ['A']

    strategies = {'A': {"impute_strategy": "median"}}

    transformer = PerColumnImputer(impute_strategies=strategies)
    transformer.fit(X)
    X_t = transformer.transform(X)

    transformer = PerColumnImputer(impute_strategies=strategies)
    X_fit_transform = transformer.fit_transform(X)

    assert_frame_equal(X_t, X_fit_transform, check_dtype=False)