def test_matrix_factorization_with_low_rank_random_matrix(): initialize_random_seed() # for reproducibility solver = MatrixFactorization(learning_rate=0.01, rank=3, l2_penalty=0, min_improvement=1e-6, verbose=False) XY_completed = solver.fit_transform(XY_incomplete) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask, name="MatrixFactorization") assert missing_mae < 0.1, "Error too high!" initialize_random_seed() # for reproducibility solver = MatrixFactorization(learning_rate=0.01, rank=3, l2_penalty=0, min_improvement=1e-6, verbose=False) XY_completed = solver.fit(XY_incomplete, missing_mask) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask, name="MatrixFactorization") assert missing_mae < 0.1, "Error too high!" XY_completed = solver.transform(XY_incomplete, missing_mask) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask, name="MatrixFactorization") assert missing_mae < 0.1, "Error too high!"
def test_iterative_imputer_with_low_rank_random_matrix(): imputer = IterativeImputer(n_iter=50, random_state=0) XY_completed = imputer.fit_transform(XY_incomplete) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask, name="IterativeImputer") assert missing_mae < 0.1, "Error too high with IterativeImputer method!"
def test_nuclear_norm_minimization_with_low_rank_random_matrix(): solver = NuclearNormMinimization(max_iters=2000) XY_completed = solver.fit_transform(XY_incomplete[:100]) _, missing_mae = reconstruction_error(XY[:100], XY_completed, missing_mask[:100], name="NuclearNorm") assert missing_mae < 0.1, "Error too high!"
def test_iterative_svd_with_low_rank_random_matrix(): solver = IterativeSVD(rank=3) XY_completed = solver.fit_transform(XY_incomplete) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask, name="IterativeSVD") assert missing_mae < 0.1, "Error too high!"
def test_soft_impute_with_low_rank_random_matrix(): solver = SoftImpute() XY_completed = solver.fit_transform(XY_incomplete) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask, name="SoftImpute") assert missing_mae < 0.1, "Error too high!"
def test_matrix_factorization_with_low_rank_random_matrix(): solver = MatrixFactorization(rank=3, l1_penalty=0, l2_penalty=0) XY_completed = solver.complete(XY_incomplete) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask, name="MatrixFactorization") assert missing_mae < 0.01, "Error too high!"
def test_missforest_all_continous(): solver = MissForest(n_estimators=500) XY_completed = solver.fit_transform(XY_incomplete) #dont need to normalization _, missing_mae = reconstruction_error( XY, XY_completed, missing_mask, name="MissForest") assert missing_mae < 0.3, "Error too high!"
def test_mice_column_with_low_rank_random_matrix(): mice = MICE(n_imputations=100, impute_type='col') XY_completed = mice.complete(XY_incomplete) _, missing_mae = reconstruction_error( XY, XY_completed, missing_mask, name="MICE (impute_type=col)") assert missing_mae < 0.1, "Error too high with column method!"
def test_mice_row_with_low_rank_random_matrix_approximate(): mice = MICE(n_imputations=100, impute_type='pmm', n_nearest_columns=5) XY_completed = mice.complete(XY_incomplete) _, missing_mae = reconstruction_error( XY, XY_completed, missing_mask, name="MICE (impute_type=row)") assert missing_mae < 0.1, "Error too high with approximate PMM method!"
def test_solver_fill_methods_with_low_rank_random_matrix(): for fill_method in ("zero", "mean", "median", "min", "random"): imputer = SimpleFill(fill_method=fill_method) XY_completed = imputer.fit_transform(XY_incomplete) _, missing_mae = reconstruction_error( XY, XY_completed, missing_mask, name="Solver with fill_method=%s" % fill_method) assert missing_mae < 5, "Error too high for Solver with %s fill method!" % fill_method
def test_iterative_imputer_with_low_rank_random_matrix_approximate(): imputer = IterativeImputer(n_iter=50, n_nearest_features=5, random_state=0) XY_completed = imputer.fit_transform(XY_incomplete) _, missing_mae = reconstruction_error( XY, XY_completed, missing_mask, name="IterativeImputer with n_nearest_features=5") assert missing_mae < 0.1, "Error too high with IterativeImputer " \ "method using n_nearest_features=5!"
def test_auto_encoder_with_low_rank_random_matrix(): solver = AutoEncoder( hidden_layer_sizes=None, hidden_activation="tanh", optimizer="adam", recurrent_weight=0.0) XY_completed = solver.complete( XY_incomplete) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask) assert missing_mae < 0.1, "Error too high!"
def test_matrix_factorization_with_low_rank_random_matrix(): solver = MatrixFactorization(learning_rate=0.01, rank=3, l2_penalty=0, min_improvement=1e-6) XY_completed = solver.complete(XY_incomplete) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask, name="MatrixFactorization") assert missing_mae < 0.1, "Error too high!"
def test_matrix_factorization_with_low_rank_random_matrix(): solver = MatrixFactorization( rank=3, l1_penalty=0, l2_penalty=0) XY_completed = solver.complete(XY_incomplete) _, missing_mae = reconstruction_error( XY, XY_completed, missing_mask, name="MatrixFactorization") assert missing_mae < 0.01, "Error too high!"
def test_iterative_imputer_train_test_with_low_rank_random_matrix(): XY_incomplete_train = XY_incomplete[:250] XY_incomplete_test = XY_incomplete[250:] XY_test = XY[250:] imputer = IterativeImputer(n_iter=50, random_state=0) imputer.fit(XY_incomplete_train) XY_completed_test = imputer.transform(XY_incomplete_test) _, missing_mae = reconstruction_error(XY_test, XY_completed_test, missing_mask, name="IterativeImputer Train/Test") assert missing_mae < 0.1, "Error too high with IterativeImputer train/test method!"
def test_iterative_imputer_as_mice_with_low_rank_random_matrix_approximate(): n_imputations = 5 XY_completed = [] for i in range(n_imputations): imputer = IterativeImputer(n_iter=5, sample_posterior=True, random_state=i) XY_completed.append(imputer.fit_transform(XY_incomplete)) _, missing_mae = reconstruction_error(XY, np.mean(XY_completed, axis=0), missing_mask, name="IterativeImputer as MICE") assert missing_mae < 0.1, "Error too high with IterativeImputer as MICE!"
def test_auto_encoder_with_low_rank_random_matrix(): solver = AutoEncoder(hidden_layer_sizes=None, hidden_activation="tanh", optimizer="adam", recurrent_weight=0.0) XY_completed = solver.complete(XY_incomplete) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask) assert missing_mae < 0.1, "Error too high!"
def test_nuclear_norm_minimization_with_low_rank_random_matrix(): solver = NuclearNormMinimization(require_symmetric_solution=False) XY_completed = solver.complete(XY_incomplete[:100]) _, missing_mae = reconstruction_error( XY[:100], XY_completed, missing_mask[:100], name="NuclearNorm") assert missing_mae < 0.1, "Error too high!"
def test_matrix_factorization_with_low_rank_random_matrix(): solver = MatrixFactorization(learning_rate=0.02, rank=5) XY_completed = solver.fit_transform(XY_incomplete) _, missing_mae = reconstruction_error(XY, XY_completed, missing_mask, name="MatrixFactorization") assert missing_mae < 0.1, "Error too high!"