Python NumImputer Examples

Programming Language: Python

Namespace/Package Name: aikit.transformers.base

Class/Type: NumImputer

Examples at hotexamples.com: 3

Python NumImputer - 3 examples found. These are the top rated real world Python examples of aikit.transformers.base.NumImputer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NumImputer(3)

fit_transform(2)

transform(1)

Example #1

Show file

File: test_base.py Project: mabdelsayed/aikit

def test_NumImputer_is_picklable():
    df = get_sample_df(100, seed=123)
    df.loc[[2, 10, 50], "float_col"] = np.nan

    imputer = NumImputer()
    _ = imputer.fit_transform(df)

    pickled_imputer = pickle.dumps(imputer)

    unpickled_imputer = pickle.loads(pickled_imputer)

    assert type(unpickled_imputer) == type(imputer)
    X1 = imputer.transform(df)
    X2 = unpickled_imputer.transform(df)

    assert X1.shape == X2.shape
    assert (X1 == X2).all().all()

Example #2

Show file

def test_NumImputer_output_type():
    df = get_sample_df(100, seed=123)

    # with type float64
    df["float_col"].astype("float64")
    imp = NumImputer()
    Xenc = imp.fit_transform(df)

    assert Xenc.dtypes["float_col"] == df.dtypes["float_col"]

    # with type float32
    df2 = df.copy()
    df2["float_col"] = df2["float_col"].astype("float32")
    imp = NumImputer()
    Xenc = imp.fit_transform(df2)

    assert Xenc.dtypes["float_col"] == df2.dtypes["float_col"]

    # with type float64
    df["float_col"].astype("float64")
    df.loc[0, "float_col"] = np.nan

    imp = NumImputer()
    Xenc = imp.fit_transform(df)

    assert Xenc.dtypes["float_col"] == df.dtypes["float_col"]

    # with type float32
    df2 = df.copy()
    df2["float_col"] = df2["float_col"].astype("float32")
    df2.loc[0, "float_col"] = np.nan
    imp = NumImputer()
    Xenc = imp.fit_transform(df2)

    assert Xenc.dtypes["float_col"] == df2.dtypes["float_col"]

Example #3

Show file

def test__NumImputer():

    xx, xxd, xxs = get_sample_data(add_na=True)
    xxd.index = np.array([0, 1, 2, 3, 4, 10, 11, 12, 12, 14])

    # DataFrame entry
    for inp in (_NumImputer(), NumImputer(), _NumImputer(add_is_null=False),
                NumImputer(add_is_null=False)):
        xx_out = inp.fit_transform(xxd)
        assert (xx_out.index == xxd.index).all()
        assert pd.isnull(xxd.loc[0, "col1"])  # Verify that it is still null
        assert xx_out.isnull().sum().sum() == 0
        assert xx_out["col1"][0] == xxd.loc[~xxd["col1"].isnull(),
                                            "col1"].mean()

        assert xx_out.shape[0] == xx.shape[0]
        assert get_type(xx_out) == get_type(xxd)

        if inp.add_is_null:
            assert inp.get_feature_names() == [
                "col0", "col1", "col2", "col3", "col4", "col5", "col6",
                "col1_isnull"
            ]
            assert xx_out.shape[1] == 1 + xxd.shape[1]
            assert xx_out["col1_isnull"].iloc[0] == 1
            assert xx_out["col1_isnull"].iloc[5] == 1
            assert (xx_out["col1_isnull"].iloc[np.array(
                [1, 2, 3, 4, 6, 7, 8, 9])] == 0).all()

        else:
            assert xx_out.shape[1] == xxd.shape[1]
            assert inp.get_feature_names() == [
                "col0", "col1", "col2", "col3", "col4", "col5", "col6"
            ]

        inp = _NumImputer(add_is_null=False, allow_unseen_null=False)
        inp.fit(xxd)
        xxd2 = xxd.copy()
        xxd2.iloc[0, 3] = np.nan
        try:
            inp.transform(xxd2)
            raise AssertionError("Model should have fail its transformation")
        except ValueError:
            pass

    input_features = ["COL_%d" % i for i in range(xx.shape[1])]
    # Numpy array
    for inp in (_NumImputer(), NumImputer()):
        xx_out = inp.fit_transform(xx)
        assert pd.isnull(xx[0, 1])
        assert pd.isnull(xx_out).sum() == 0
        assert xx_out.shape[1] == 1 + xx.shape[1]
        assert xx_out.shape[0] == xx.shape[0]
        assert get_type(xx_out) == get_type(xx)
        assert inp.get_feature_names() == [
            "0", "1", "2", "3", "4", "5", "6", "1_isnull"
        ]
        assert inp.get_feature_names(
            input_features) == input_features + ["COL_1_isnull"]
        assert xx_out[0, 7] == 1
        assert xx_out[5, 7] == 1
        assert (xx_out[np.array([1, 2, 3, 4, 6, 7, 8, 9]), 7] == 0).all()

    # Sparse Array
    for inp in (_NumImputer(), NumImputer()):
        for f in (sps.coo_matrix, sps.csc_matrix, sps.csr_matrix):
            xxsf = f(xxs.copy())
            xx_out = inp.fit_transform(xxsf)
            assert pd.isnull(xxs[0, 1])
            assert pd.isnull(xx_out.todense()).sum() == 0
            assert get_type(xx_out) == get_type(xxs)
            assert xx_out.shape[1] == 1 + xxs.shape[1]
            assert xx_out.shape[0] == xx.shape[0]
            assert inp.get_feature_names() == [
                "0", "1", "2", "3", "4", "5", "6", "1_isnull"
            ]
            assert inp.get_feature_names(
                input_features) == input_features + ["COL_1_isnull"]
            assert xx_out.todense()[0, 7] == 1
            assert xx_out.todense()[0, 7] == 1
            assert (xx_out.todense()[np.array([1, 2, 3, 4, 6, 7, 8, 9]),
                                     7] == 0).all()

    xx, xxd, xxs = get_sample_data(add_na=False)
    xxd.index = np.array([0, 1, 2, 3, 4, 10, 11, 12, 12, 14])

    # DataFrame entry
    for inp in (_NumImputer(), NumImputer()):
        xx_out = inp.fit_transform(xxd)
        assert (xx_out.index == xxd.index).all()
        assert xx_out.isnull().sum().sum() == 0
        assert xx_out.shape[1] == xxd.shape[1]
        assert xx_out.shape[0] == xx.shape[0]
        assert get_type(xx_out) == get_type(xxd)
        assert inp.get_feature_names() == [
            "col0", "col1", "col2", "col3", "col4", "col5", "col6"
        ]

    # Numpy array
    for inp in (_NumImputer(), NumImputer()):
        xx_out = inp.fit_transform(xx)
        assert pd.isnull(xx_out).sum() == 0
        assert xx_out.shape[1] == xx.shape[1]
        assert xx_out.shape[0] == xx.shape[0]
        assert get_type(xx_out) == get_type(xx)
        assert inp.get_feature_names() == ["0", "1", "2", "3", "4", "5", "6"]
        assert inp.get_feature_names(
            input_features=input_features) == input_features

    # Sparse Array
    for inp in (_NumImputer(), NumImputer()):
        for f in (sps.coo_matrix, sps.csc_matrix, sps.csr_matrix):
            xxs_f = f(xxs.copy())
            xx_out = inp.fit_transform(xxs_f)
            assert pd.isnull(xx_out.todense()).sum() == 0
            assert get_type(xx_out) == get_type(xxs)
            assert xx_out.shape[1] == xxs.shape[1]
            assert xx_out.shape[0] == xx.shape[0]
            assert inp.get_feature_names() == [
                "0", "1", "2", "3", "4", "5", "6"
            ]
            assert inp.get_feature_names(
                input_features=input_features) == input_features