def PCA_please(enable_PCA_cache, submit_test_prediction): tr_identity, tr_labels, tr_images, valid_identity, valid_labels, valid_images = preprocessor.load_train_and_valid( is_gabor=False) unlabeled_images = preprocessor.load_unlabeled(is_gabor=False) if (enable_PCA_cache): pca_filename = 'pca.pkl' if os.path.isfile(pca_filename): pca = joblib.load(pca_filename) else: # save the PCA pca = models.PCA() pca.fit(unlabeled_images) joblib.dump(pca, pca_filename) else: pca = models.PCA() pca.fit(unlabeled_images) pca_tr_images = pca.transform(tr_images) pca_valid_images = pca.transform(valid_images) model = models.SVM() model.fit(pca_tr_images, tr_labels) train_predictions = model.predict(pca_tr_images) valid_predictions = model.predict(pca_valid_images) printClassificationRate(model, valid_labels, valid_predictions, tr_labels, train_predictions) if (submit_test_prediction): test_images = preprocessor.load_test(is_gabor=False) pca_test_images = pca.transform(test_images) test_predictions = model.predict(pca_test_images) submission.output(test_predictions)
def main(): args = get_args() X, y = load(args.data, 'train') test_X, _ = load(args.data, args.test_split) if args.dr_algorithm is not None: num_train = X.shape[0] concat_data = np.concatenate((X, test_X)) start = time.time() if args.dr_algorithm == 'pca': reduced_X = models.PCA(concat_data, args.target_dim).fit(concat_data) elif args.dr_algorithm == 'lle': reduced_X = models.LLE(concat_data, args.target_dim, args.lle_k).fit(concat_data) else: raise Exception('Invalid dimensionality reduction algorithm') end = time.time() print(f"dimensionality reduction took {end - start} seconds!") X = reduced_X[:num_train] test_X = reduced_X[num_train:] model = models.KNN(args.knn_k) model.fit(X, y) y_hat = model.predict(test_X) np.savetxt(args.predictions_file, y_hat, fmt='%d')
def PCA_Preprocess(pca_fit_data, train_features, valid_features, test_features): pca = models.PCA() pca.fit(pca_fit_data) transformed_pca_fit_dat = pca.transform(pca_fit_data) transformed_train_features = pca.transform(train_features) transformed_valid_features = pca.transform(valid_features) transformed_test_features = pca.transform(test_features) print "PCA n components: %d" % (pca.get_n_components()) return transformed_pca_fit_dat, transformed_train_features, transformed_valid_features, transformed_test_features