def test_missing_indicator_sparse_param(arr_type, missing_values, param_sparse): # check the format of the output with different sparse parameter X_fit = np.array([[missing_values, missing_values, 1], [4, missing_values, 2]]) X_trans = np.array([[missing_values, missing_values, 1], [4, 12, 10]]) X_fit = arr_type(X_fit).astype(np.float64) X_trans = arr_type(X_trans).astype(np.float64) indicator = MissingIndicator(missing_values=missing_values, sparse=param_sparse) X_fit_mask = indicator.fit_transform(X_fit) X_trans_mask = indicator.transform(X_trans) if param_sparse is True: assert X_fit_mask.format == 'csc' assert X_trans_mask.format == 'csc' elif param_sparse == 'auto' and missing_values == 0: assert isinstance(X_fit_mask, np.ndarray) assert isinstance(X_trans_mask, np.ndarray) elif param_sparse is False: assert isinstance(X_fit_mask, np.ndarray) assert isinstance(X_trans_mask, np.ndarray) else: if sparse.issparse(X_fit): assert X_fit_mask.format == 'csc' assert X_trans_mask.format == 'csc' else: assert isinstance(X_fit_mask, np.ndarray) assert isinstance(X_trans_mask, np.ndarray)
def test_missing_indicator_sparse_no_explicit_zeros(): # Check that non missing values don't become explicit zeros in the mask # generated by missing indicator when X is sparse. (#13491) X = sparse.csr_matrix([[0, 1, 2], [1, 2, 0], [2, 0, 1]]) mi = MissingIndicator(features='all', missing_values=1) Xt = mi.fit_transform(X) assert Xt.getnnz() == Xt.sum()
def test_missing_indicator_no_missing(): # check that all features are dropped if there are no missing values when # features='missing-only' (#13491) X = np.array([[1, 1], [1, 1]]) mi = MissingIndicator(features='missing-only', missing_values=-1) Xt = mi.fit_transform(X) assert Xt.shape[1] == 0
def get_scores_for_imputer(imputer, X_missing, y_missing): estimator = make_pipeline( make_union(imputer, MissingIndicator(missing_values=0)), REGRESSOR) impute_scores = cross_val_score(estimator, X_missing, y_missing, scoring='neg_mean_squared_error', cv=N_SPLITS) return impute_scores
def test_missing_indicator_new(missing_values, arr_type, dtype, param_features, n_features, features_indices): X_fit = np.array([[missing_values, missing_values, 1], [4, 2, missing_values]]) X_trans = np.array([[missing_values, missing_values, 1], [4, 12, 10]]) X_fit_expected = np.array([[1, 1, 0], [0, 0, 1]]) X_trans_expected = np.array([[1, 1, 0], [0, 0, 0]]) # convert the input to the right array format and right dtype X_fit = arr_type(X_fit).astype(dtype) X_trans = arr_type(X_trans).astype(dtype) X_fit_expected = X_fit_expected.astype(dtype) X_trans_expected = X_trans_expected.astype(dtype) indicator = MissingIndicator(missing_values=missing_values, features=param_features, sparse=False) X_fit_mask = indicator.fit_transform(X_fit) X_trans_mask = indicator.transform(X_trans) assert X_fit_mask.shape[1] == n_features assert X_trans_mask.shape[1] == n_features assert_array_equal(indicator.features_, features_indices) assert_allclose(X_fit_mask, X_fit_expected[:, features_indices]) assert_allclose(X_trans_mask, X_trans_expected[:, features_indices]) assert X_fit_mask.dtype == bool assert X_trans_mask.dtype == bool assert isinstance(X_fit_mask, np.ndarray) assert isinstance(X_trans_mask, np.ndarray) indicator.set_params(sparse=True) X_fit_mask_sparse = indicator.fit_transform(X_fit) X_trans_mask_sparse = indicator.transform(X_trans) assert X_fit_mask_sparse.dtype == bool assert X_trans_mask_sparse.dtype == bool assert X_fit_mask_sparse.format == 'csc' assert X_trans_mask_sparse.format == 'csc' assert_allclose(X_fit_mask_sparse.toarray(), X_fit_mask) assert_allclose(X_trans_mask_sparse.toarray(), X_trans_mask)
def test_missing_indicator_raise_on_sparse_with_missing_0(arr_type): # test for sparse input and missing_value == 0 missing_values = 0 X_fit = np.array([[missing_values, missing_values, 1], [4, missing_values, 2]]) X_trans = np.array([[missing_values, missing_values, 1], [4, 12, 10]]) # convert the input to the right array format X_fit_sparse = arr_type(X_fit) X_trans_sparse = arr_type(X_trans) indicator = MissingIndicator(missing_values=missing_values) with pytest.raises(ValueError, match="Sparse input with missing_values=0"): indicator.fit_transform(X_fit_sparse) indicator.fit_transform(X_fit) with pytest.raises(ValueError, match="Sparse input with missing_values=0"): indicator.transform(X_trans_sparse)
def test_missing_indicator_error(X_fit, X_trans, params, msg_err): indicator = MissingIndicator(missing_values=-1) indicator.set_params(**params) with pytest.raises(ValueError, match=msg_err): indicator.fit(X_fit).transform(X_trans)
def test_missing_indicator_with_imputer(X, missing_values, X_trans_exp): trans = make_union( SimpleImputer(missing_values=missing_values, strategy='most_frequent'), MissingIndicator(missing_values=missing_values)) X_trans = trans.fit_transform(X) assert_array_equal(X_trans, X_trans_exp)
def test_missing_indicator_string(): X = np.array([['a', 'b', 'c'], ['b', 'c', 'a']], dtype=object) indicator = MissingIndicator(missing_values='a', features='all') X_trans = indicator.fit_transform(X) assert_array_equal(X_trans, np.array([[True, False, False], [False, False, True]]))