def test_drop_null_transformer_fit_transform():
    drop_null_transformer = DropNullColumns()
    X = pd.DataFrame({
        'lots_of_null': [None, None, None, None, 5],
        'no_null': [1, 2, 3, 4, 5]
    })
    X_expected = X.astype({'lots_of_null': 'float64', 'no_null': 'Int64'})
    X_t = drop_null_transformer.fit_transform(X)
    assert_frame_equal(X_expected, X_t.to_dataframe())

    X = pd.DataFrame({
        'lots_of_null': [None, None, None, None, 5],
        'all_null': [None, None, None, None, None],
        'no_null': [1, 2, 3, 4, 5]
    })
    drop_null_transformer = DropNullColumns(pct_null_threshold=0.5)
    X_expected = X.drop(["lots_of_null", "all_null"], axis=1)
    X_expected = X_expected.astype({'no_null': 'Int64'})
    X_t = drop_null_transformer.fit_transform(X)
    assert_frame_equal(X_expected, X_t.to_dataframe())
    # check that X is untouched
    assert X.equals(
        pd.DataFrame({
            'lots_of_null': [None, None, None, None, 5],
            'all_null': [None, None, None, None, None],
            'no_null': [1, 2, 3, 4, 5]
        }))

    drop_null_transformer = DropNullColumns(pct_null_threshold=0.0)
    X = pd.DataFrame({
        'lots_of_null': [None, None, None, None, 5],
        'some_null': [None, 0, 3, 4, 5]
    })
    X_t = drop_null_transformer.fit_transform(X)
    assert X_t.to_dataframe().empty

    X = pd.DataFrame({
        'all_null': [None, None, None, None, None],
        'lots_of_null': [None, None, None, None, 5],
        'some_null': [None, 0, 3, 4, 5]
    })
    drop_null_transformer = DropNullColumns(pct_null_threshold=1.0)
    X_t = drop_null_transformer.fit_transform(X)
    assert_frame_equal(X.drop(["all_null"], axis=1), X_t.to_dataframe())
Ejemplo n.º 2
0
def test_drop_null_transformer_np_array():
    drop_null_transformer = DropNullColumns(pct_null_threshold=0.5)
    X = np.array([[np.nan, 0, 2, 0], [np.nan, 1, np.nan, 0],
                  [np.nan, 2, np.nan, 0], [np.nan, 1, 1, 0]])
    assert drop_null_transformer.fit_transform(X).equals(
        pd.DataFrame(np.delete(X, [0, 2], axis=1), columns=[1, 3]))

    # check that X is untouched
    np.testing.assert_allclose(
        X,
        np.array([[np.nan, 0, 2, 0], [np.nan, 1, np.nan, 0],
                  [np.nan, 2, np.nan, 0], [np.nan, 1, 1, 0]]))
Ejemplo n.º 3
0
def test_drop_null_transformer_fit_transform():
    drop_null_transformer = DropNullColumns()
    X = pd.DataFrame({
        'lots_of_null': [None, None, None, None, 5],
        'no_null': [1, 2, 3, 4, 5]
    })
    assert drop_null_transformer.fit_transform(X).equals(X)

    X = pd.DataFrame({
        'lots_of_null': [None, None, None, None, 5],
        'all_null': [None, None, None, None, None],
        'no_null': [1, 2, 3, 4, 5]
    })

    drop_null_transformer = DropNullColumns(pct_null_threshold=0.5)
    assert drop_null_transformer.fit_transform(X).equals(
        X.drop(["lots_of_null", "all_null"], axis=1))
    # check that X is untouched
    assert X.equals(
        pd.DataFrame({
            'lots_of_null': [None, None, None, None, 5],
            'all_null': [None, None, None, None, None],
            'no_null': [1, 2, 3, 4, 5]
        }))

    drop_null_transformer = DropNullColumns(pct_null_threshold=0.0)
    X = pd.DataFrame({
        'lots_of_null': [None, None, None, None, 5],
        'some_null': [None, 0, 3, 4, 5]
    })
    assert drop_null_transformer.fit_transform(X).empty

    X = pd.DataFrame({
        'all_null': [None, None, None, None, None],
        'lots_of_null': [None, None, None, None, 5],
        'some_null': [None, 0, 3, 4, 5]
    })
    drop_null_transformer = DropNullColumns(pct_null_threshold=1.0)
    assert drop_null_transformer.fit_transform(X).equals(
        X.drop(["all_null"], axis=1))