Python MICEImputer Examples

Programming Language: Python

Namespace/Package Name: sklearn.preprocessing.imputation

Class/Type: MICEImputer

Examples at hotexamples.com: 9

Python MICEImputer - 9 examples found. These are the top rated real world Python examples of sklearn.preprocessing.imputation.MICEImputer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MICEImputer(9)

fit_transform(6)

transform(2)

fit(1)

Example #1

Show file

def test_mice_imputation_order():
    n = 100
    d = 10
    X = sparse_random_matrix(n, d, density=0.10).toarray()
    X[:, 0] = 1  # this column shouldn't be ever used

    for imputation_order in [
            'random', 'roman', 'monotone', 'revmonotone', 'arabic'
    ]:
        imputer = MICEImputer(missing_values=0,
                              n_imputations=1,
                              n_burn_in=1,
                              n_nearest_features=5,
                              min_value=0,
                              max_value=1,
                              verbose=False,
                              imputation_order=imputation_order)
        imputer.fit_transform(X)
        ordered_idx = [i.feat_idx for i in imputer.imputation_sequence_]
        if imputation_order == 'roman':
            assert np.all(ordered_idx[:d - 1] == np.arange(1, d))
        elif imputation_order == 'arabic':
            assert np.all(ordered_idx[:d - 1] == np.arange(d - 1, 0, -1))
        elif imputation_order == 'random':
            ordered_idx_round_1 = ordered_idx[:d - 1]
            ordered_idx_round_2 = ordered_idx[d - 1:]
            assert ordered_idx_round_1 != ordered_idx_round_2

Example #2

Show file

def test_mice_transform_correctness():
    # make data
    def make_data(rank):
        n = 100
        d = 100
        A = np.random.random((n, rank))
        B = np.random.random((rank, d))
        Xfilled = np.dot(A, B)
        # half is randomly missing
        nan_mask = np.random.random((n, d)) < 0.5
        X_missing = Xfilled.copy()
        X_missing[nan_mask] = np.nan

        # split up data
        n = int(n / 2)
        Xtr_filled = Xfilled[:n]
        Xtr = X_missing[:n]
        Xts_filled = Xfilled[n:]
        Xts = X_missing[n:]
        return Xtr_filled, Xtr, Xts_filled, Xts

    for rank in [5, 10]:
        Xtr_filled, Xtr, Xts_filled, Xts = make_data(rank)
        imputer = MICEImputer(n_imputations=10, n_burn_in=10,
                              verbose=True).fit(Xtr)
        Xts_est = imputer.fit_transform(Xts)
        assert_array_almost_equal(Xts_filled, Xts_est, decimal=1)

Example #3

Show file

def test_mice_rank_one():
    d = 100
    A = np.random.random((d, 1))
    B = np.random.random((1, d))
    X = np.dot(A, B)
    nan_mask = np.random.random((d, d)) < 0.5
    X_missing = X.copy()
    X_missing[nan_mask] = np.nan

    imputer = MICEImputer(n_imputations=5, n_burn_in=5, verbose=True)
    X_filled = imputer.fit_transform(X_missing)
    assert_array_almost_equal(X_filled, X, decimal=2)

Example #4

Show file

def test_imputation_shape():
    # Verify the shapes of the imputed matrix for different strategies.
    X = np.random.randn(10, 2)
    X[::2] = np.nan

    for strategy in ['mean', 'median', 'most_frequent', 'mice']:
        if strategy == 'mice':
            imputer = MICEImputer()
        else:
            imputer = Imputer(strategy=strategy)
            X_imputed = imputer.fit_transform(sparse.csr_matrix(X))
            assert_equal(X_imputed.shape, (10, 2))
        X_imputed = imputer.fit_transform(X)
        assert_equal(X_imputed.shape, (10, 2))

Example #5

Show file

def test_mice_predictors():
    from sklearn.dummy import DummyRegressor
    from sklearn.linear_model import BayesianRidge

    n = 100
    d = 10
    X = sparse_random_matrix(n, d, density=0.10).toarray()

    for predictor in [DummyRegressor, BayesianRidge]:
        imputer = MICEImputer(missing_values=0,
                              n_imputations=1,
                              n_burn_in=1,
                              predictor=predictor())
        imputer.fit_transform(X)

Example #6

Show file

def test_mice_missing_at_transform():
    n = 100
    d = 10
    Xtr = np.random.randint(low=0, high=3, size=(n, d))
    Xts = np.random.randint(low=0, high=3, size=(n, d))

    Xtr[:, 0] = 1  # definitely no missing values in 0th column
    Xts[0, 0] = 0  # definitely missing value in 0th column

    for strategy in ["mean", "median", "most_frequent"]:
        mice = MICEImputer(missing_values=0,
                           n_imputations=1,
                           n_burn_in=1,
                           initial_strategy=strategy).fit(Xtr)
        initial_imputer = Imputer(missing_values=0, strategy=strategy).fit(Xtr)

        # if there were no missing values at time of fit, then mice will
        # only use the initial imputer for that feature at transform
        assert np.all(
            mice.transform(Xts)[:, 0] == initial_imputer.transform(Xts)[:, 0])

Example #7

Show file

def test_mice_additive_matrix():
    n = 100
    d = 10
    A = np.random.randn(n, d)
    B = np.random.randn(n, d)
    Xfilled = np.zeros(A.shape)
    for i in range(d):
        for j in range(d):
            Xfilled[:, (i + j) % d] += (A[:, i] + B[:, j]) / 2
    # a quarter is randomly missing
    nan_mask = np.random.random((n, d)) < 0.25
    X_missing = Xfilled.copy()
    X_missing[nan_mask] = np.nan

    # split up data
    n = int(n / 2)
    Xtr = X_missing[:n]
    Xts_filled = Xfilled[n:]
    Xts = X_missing[n:]

    imputer = MICEImputer(n_imputations=10, n_burn_in=10,
                          verbose=True).fit(Xtr)
    Xts_est = imputer.fit_transform(Xts)
    assert_array_almost_equal(Xts_filled, Xts_est, decimal=1)

Example #8

Show file

def test_mice_pipeline_grid_search():
    # Test imputation within a pipeline + gridsearch.
    pipeline = Pipeline([('imputer',
                          MICEImputer(missing_values=0,
                                      n_imputations=1,
                                      n_burn_in=1,
                                      random_state=0)),
                         ('tree', tree.DecisionTreeRegressor(random_state=0))])

    parameters = {
        'imputer__initial_strategy': ["mean", "median", "most_frequent"]
    }

    n = 100
    d = 10
    X = sparse_random_matrix(n, d, density=0.50).toarray()
    Y = np.random.random((n, d))
    gs = GridSearchCV(pipeline, parameters)
    gs.fit(X, Y)

Example #9

Show file

def test_imputation_pickle():
    # Test for pickling imputers.
    import pickle

    n = 100
    X = sparse_random_matrix(n, n, density=0.10).todense()

    for strategy in ["mean", "median", "most_frequent", "mice"]:
        if strategy == 'mice':
            imputer = MICEImputer(missing_values=0,
                                  n_imputations=1,
                                  n_burn_in=1)
        else:
            imputer = Imputer(missing_values=0, strategy=strategy)
        imputer.fit(X)

        imputer_pickled = pickle.loads(pickle.dumps(imputer))

        assert_array_almost_equal(
            imputer.transform(X.copy()),
            imputer_pickled.transform(X.copy()),
            err_msg="Fail to transform the data after pickling "
            "(strategy = %s)" % (strategy))