def test_soft_impute_with_low_rank_random_matrix(): solver = SoftImpute() XY_completed = solver.fit_transform(XY_incomplete) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask, name="SoftImpute") assert missing_mae < 0.1, "Error too high!"
def fun(lambd_val): # fit soft impute for each lambda value si = SoftImpute(shrinkage_value=lambd_val, init_fill_method='mean', max_rank=max_rank, verbose=verbose, max_iters=max_iters, convergence_threshold=convergence_threshold) X_filled = si.fit_transform(X_incomplete.copy()) return ((X_filled[missing_mask] - X[missing_mask]) ** 2).mean()
def fit(self, trainset): AlgoBase.fit(self, trainset) X_incomplete = np.nan * np.zeros((trainset.n_users, trainset.n_items)) for u, i, r in trainset.all_ratings(): X_incomplete[u, i] = r soft_impute = SoftImpute(shrinkage_value=self.lmbda, max_iters=self.max_iter, max_rank=self.max_rank, min_value=self.min_value, max_value=self.max_value, verbose=self.verbose) X_filled_normalized \ = soft_impute.fit_transform(X_incomplete) self.predictions = X_filled_normalized return self
def impute(self, trained_model, input): """ Loads the input table and gives the imputed table :param trained_model: trained model returned by train function - not used in our case :param input: input table which needs to be imputed :return: X_filled_softimpute: imputed table as a numpy array """ X_incomplete = input softImpute = SoftImpute() biscaler = BiScaler() X_incomplete_normalized = biscaler.fit_transform(X_incomplete) X_filled_softimpute_normalized = softImpute.fit_transform( X_incomplete_normalized) X_filled_softimpute = biscaler.inverse_transform( X_filled_softimpute_normalized) return X_filled_softimpute
class SoftImputer: """ Perform matrix completion by iterative soft thresholding of SVD decompositions. Note that it does not support the methods 'fit' and 'transform' For compatibility, we included the two methods, but they refit the imputer, cf. KNNExt For more information, please check inductive vs. transductive imputation: https://github.com/iskandr/fancyimpute """ def __init__(self): """ Params: k number of nearest neighbors to consider """ self._imputer = SoftImpute() def fit_transform(self, df): """ run feature imputation Params: df input data to run the imputation on """ return pd.DataFrame( data=self._imputer.fit_transform(df.to_numpy()), columns=df.columns, index=df.index, ) def fit(self, df): return def transform(self, df): return self.fit_transform(df)
# matrix completion using convex optimization to find low-rank solution # that still matches observed values. Slow! X_filled_nnm = NuclearNormMinimization().fit_transform(X_incomplete) # Instead of solving the nuclear norm objective directly, instead # induce sparsity using singular value thresholding softImpute = SoftImpute() # simultaneously normalizes the rows and columns of your observed data, # sometimes useful for low-rank imputation methods biscaler = BiScaler() # rescale both rows and columns to have zero mean and unit variance X_incomplete_normalized = biscaler.fit_transform(X_incomplete) X_filled_softimpute_normalized = softImpute.fit_transform( X_incomplete_normalized) X_filled_softimpute = biscaler.inverse_transform( X_filled_softimpute_normalized) X_filled_softimpute_no_biscale = softImpute.fit_transform(X_incomplete) meanfill_mse = ((X_filled_mean[missing_mask] - X[missing_mask])**2).mean() print("meanFill MSE: %f" % meanfill_mse) # print mean squared error for the imputation methods above nnm_mse = ((X_filled_nnm[missing_mask] - X[missing_mask])**2).mean() print("Nuclear norm minimization MSE: %f" % nnm_mse) softImpute_mse = ((X_filled_softimpute[missing_mask] - X[missing_mask])**2).mean() print("SoftImpute MSE: %f" % softImpute_mse)