def test_knn_n_neighbors(): X = np.array([[0, 0], [np.nan, 2], [4, 3], [5, np.nan], [7, 7], [np.nan, 8], [14, 13]]) statistics_mean = np.nanmean(X, axis=0) # Test with 1 neighbor X_imputed_1NN = np.array([[0, 0], [4, 2], [4, 3], [5, 3], [7, 7], [7, 8], [14, 13]]) n_neighbors = 1 imputer = KNNImputer(n_neighbors=n_neighbors) assert_array_equal(imputer.fit_transform(X), X_imputed_1NN) assert_array_equal(imputer.statistics_, statistics_mean) # Test with 6 neighbors X = np.array([[0, 0], [np.nan, 2], [4, 3], [5, np.nan], [7, 7], [np.nan, 8], [14, 13]]) X_imputed_6NN = np.array([[0, 0], [6, 2], [4, 3], [5, 5.5], [7, 7], [6, 8], [14, 13]]) n_neighbors = 6 imputer = KNNImputer(n_neighbors=6) imputer_plus1 = KNNImputer(n_neighbors=n_neighbors + 1) assert_array_equal(imputer.fit_transform(X), X_imputed_6NN) assert_array_equal(imputer.statistics_, statistics_mean) assert_array_equal(imputer.fit_transform(X), imputer_plus1.fit(X).transform(X))
def test_knn_imputation_default(): # Test imputation with default parameter values # Test with an imputable matrix X = np.array([ [1, 0, 0, 1], [2, 1, 2, np.nan], [3, 2, 3, np.nan], [np.nan, 4, 5, 5], [6, np.nan, 6, 7], [8, 8, 8, 8], [16, 15, 18, 19], ]) statistics_mean = np.nanmean(X, axis=0) X_imputed = np.array([ [1, 0, 0, 1], [2, 1, 2, 8], [3, 2, 3, 8], [4, 4, 5, 5], [6, 3, 6, 7], [8, 8, 8, 8], [16, 15, 18, 19], ]) imputer = KNNImputer() assert_array_equal(imputer.fit_transform(X), X_imputed) assert_array_equal(imputer.statistics_, statistics_mean) # Test with % missing in row > row_max_missing X = np.array([ [1, 0, 0, 1], [2, 1, 2, np.nan], [3, 2, 3, np.nan], [np.nan, 4, 5, 5], [6, np.nan, 6, 7], [8, 8, 8, 8], [19, 19, 19, 19], [np.nan, np.nan, np.nan, 19], ]) statistics_mean = np.nanmean(X, axis=0) r7c0, r7c1, r7c2, _ = statistics_mean X_imputed = np.array([ [1, 0, 0, 1], [2, 1, 2, 8], [3, 2, 3, 8], [4, 4, 5, 5], [6, 3, 6, 7], [8, 8, 8, 8], [19, 19, 19, 19], [r7c0, r7c1, r7c2, 19], ]) imputer = KNNImputer() assert_array_almost_equal(imputer.fit_transform(X), X_imputed, decimal=6) assert_array_almost_equal(imputer.statistics_, statistics_mean, decimal=6) # Test with all neighboring donors also having missing feature values X = np.array([[1, 0, 0, np.nan], [2, 1, 2, np.nan], [3, 2, 3, np.nan], [4, 4, 5, np.nan], [6, 7, 6, np.nan], [8, 8, 8, np.nan], [20, 20, 20, 20], [22, 22, 22, 22]]) statistics_mean = np.nanmean(X, axis=0) X_imputed = np.array([[1, 0, 0, 21], [2, 1, 2, 21], [3, 2, 3, 21], [4, 4, 5, 21], [6, 7, 6, 21], [8, 8, 8, 21], [20, 20, 20, 20], [22, 22, 22, 22]]) imputer = KNNImputer() assert_array_equal(imputer.fit_transform(X), X_imputed) assert_array_equal(imputer.statistics_, statistics_mean) # Test when data in fit() and transform() are different X = np.array([[0, 0], [np.nan, 2], [4, 3], [5, 6], [7, 7], [9, 8], [11, 16]]) statistics_mean = np.nanmean(X, axis=0) Y = np.array([[1, 0], [3, 2], [4, np.nan]]) Y_imputed = np.array([[1, 0], [3, 2], [4, 4.8]]) imputer = KNNImputer() assert_array_equal(imputer.fit(X).transform(Y), Y_imputed) assert_array_equal(imputer.statistics_, statistics_mean)