예제 #1
0
def test_solver_fill_methods_with_low_rank_random_matrix():
    for fill_method in ("zero", "mean", "median", "min", "random"):
        imputer = SimpleFill(fill_method=fill_method)
        XY_completed = imputer.fit_transform(XY_incomplete)
        _, missing_mae = reconstruction_error(
            XY,
            XY_completed,
            missing_mask,
            name="Solver with fill_method=%s" % fill_method)
        assert missing_mae < 5, "Error too high for Solver with %s fill method!" % fill_method
예제 #2
0
 def run_impute(self, X, state='train'):
     if state == 'train':
         self.train_data['ave'] = np.zeros([X.shape[0], X.shape[1]])
         for imp_method in self.impute_method:
             if imp_method == 'mean':
                 imp_ope = SimpleFill()
             if imp_method == 'KNN':
                 imp_ope = KNN()
             if imp_method == 'IterativeSVD':
                 imp_ope = IterativeSVD()
             if imp_method == 'MatrixFactorization':
                 imp_ope = MatrixFactorization()
             X_filled = imp_ope.fit_transform(X)
             self.train_data[imp_method] = X_filled
             self.impute_operator[imp_method] = imp_ope
             self.train_data['ave'] += X_filled
         self.train_data['ave'] /= len(self.impute_method)
     return 0
예제 #3
0
                         SimpleFill)

n = 200
m = 20
inner_rank = 4
X = np.dot(np.random.randn(n, inner_rank), np.random.randn(inner_rank, m))
print("Mean squared element: %0.4f" % (X**2).mean())

# X is a data matrix which we're going to randomly drop entries from
missing_mask = np.random.rand(*X.shape) < 0.1
X_incomplete = X.copy()
# missing entries indicated with NaN
X_incomplete[missing_mask] = np.nan

meanFill = SimpleFill("mean")
X_filled_mean = meanFill.fit_transform(X_incomplete)

# Use 3 nearest rows which have a feature to fill in each row's missing features
knnImpute = KNN(k=3)
X_filled_knn = knnImpute.fit_transform(X_incomplete)

# matrix completion using convex optimization to find low-rank solution
# that still matches observed values. Slow!
X_filled_nnm = NuclearNormMinimization().fit_transform(X_incomplete)

# Instead of solving the nuclear norm objective directly, instead
# induce sparsity using singular value thresholding
softImpute = SoftImpute()

# simultaneously normalizes the rows and columns of your observed data,
# sometimes useful for low-rank imputation methods