Example #1
0
def test_drop_null_transformer_transform_default_pct_null_threshold():
    drop_null_transformer = DropNullColumns()
    X = pd.DataFrame({
        'lots_of_null': [None, None, None, None, 5],
        'no_null': [1, 2, 3, 4, 5]
    })
    drop_null_transformer.fit(X)
    assert drop_null_transformer.transform(X).equals(X)
def test_drop_null_transformer_transform_default_pct_null_threshold():
    drop_null_transformer = DropNullColumns()
    X = pd.DataFrame({
        'lots_of_null': [None, None, None, None, 5],
        'no_null': [1, 2, 3, 4, 5]
    })
    X_expected = X.astype({'lots_of_null': 'float64', 'no_null': 'Int64'})
    drop_null_transformer.fit(X)
    X_t = drop_null_transformer.transform(X)
    assert_frame_equal(X_expected, X_t.to_dataframe())
def test_drop_null_transformer_woodwork_custom_overrides_returned_by_components(
        X_df, has_nan):
    y = pd.Series([1, 2, 1])
    if has_nan:
        X_df['all null'] = [np.nan, np.nan, np.nan]
    override_types = [Integer, Double, Categorical, NaturalLanguage, Boolean]
    for logical_type in override_types:
        try:
            X = ww.DataTable(X_df, logical_types={0: logical_type})
        except TypeError:
            continue

        drop_null_transformer = DropNullColumns()
        drop_null_transformer.fit(X)
        transformed = drop_null_transformer.transform(X, y)
        assert isinstance(transformed, ww.DataTable)
        assert transformed.logical_types == {0: logical_type}
Example #4
0
def test_drop_null_transformer_transform_custom_pct_null_threshold():
    X = pd.DataFrame({
        'lots_of_null': [None, None, None, None, 5],
        'all_null': [None, None, None, None, None],
        'no_null': [1, 2, 3, 4, 5]
    })

    drop_null_transformer = DropNullColumns(pct_null_threshold=0.5)
    drop_null_transformer.fit(X)
    assert drop_null_transformer.transform(X).equals(
        X.drop(["lots_of_null", "all_null"], axis=1))
    # check that X is untouched
    assert X.equals(
        pd.DataFrame({
            'lots_of_null': [None, None, None, None, 5],
            'all_null': [None, None, None, None, None],
            'no_null': [1, 2, 3, 4, 5]
        }))
def test_drop_null_transformer_transform_custom_pct_null_threshold():
    X = pd.DataFrame({
        'lots_of_null': [None, None, None, None, 5],
        'all_null': [None, None, None, None, None],
        'no_null': [1, 2, 3, 4, 5]
    })

    drop_null_transformer = DropNullColumns(pct_null_threshold=0.5)
    X_expected = X.drop(["lots_of_null", "all_null"], axis=1)
    X_expected = X_expected.astype({"no_null": "Int64"})
    drop_null_transformer.fit(X)
    X_t = drop_null_transformer.transform(X)
    assert_frame_equal(X_expected, X_t.to_dataframe())
    # check that X is untouched
    assert X.equals(
        pd.DataFrame({
            'lots_of_null': [None, None, None, None, 5],
            'all_null': [None, None, None, None, None],
            'no_null': [1, 2, 3, 4, 5]
        }))
Example #6
0
def test_drop_null_transformer_transform_boundary_pct_null_threshold():
    drop_null_transformer = DropNullColumns(pct_null_threshold=0.0)
    X = pd.DataFrame({
        'all_null': [None, None, None, None, None],
        'lots_of_null': [None, None, None, None, 5],
        'some_null': [None, 0, 3, 4, 5]
    })
    drop_null_transformer.fit(X)
    assert drop_null_transformer.transform(X).empty

    drop_null_transformer = DropNullColumns(pct_null_threshold=1.0)
    drop_null_transformer.fit(X)
    assert drop_null_transformer.transform(X).equals(
        X.drop(["all_null"], axis=1))
    # check that X is untouched
    assert X.equals(
        pd.DataFrame({
            'all_null': [None, None, None, None, None],
            'lots_of_null': [None, None, None, None, 5],
            'some_null': [None, 0, 3, 4, 5]
        }))