def evaluate(all_data, all_labels, fold_number, view_number): ''' Runs baseline classification for a single fold. Args: all_data (list<numpy.ndarray>): List of all data from each block. all_labels (list<numpy.ndarray>): List of all labels from each block. fold_number (int): The fold to test on. ''' print '---- Fold #{}, View #{} ----'.format(fold_number, view_number) def report_time(t): print '\tTook {}\n'.format(format_time(time.time()-t)) # Find the appropriate blocks for this fold and stack data ( trn_blocks, tun_blocks, tst_blocks ) = util.configure_blocks(fold_number) def data_for_blocks(blocks): data = list() labels = list() for block in blocks: data.append(all_data[block-1][view_number][0::subsampling_factor,:]) labels.append(all_labels[block-1][view_number][0::subsampling_factor]) return np.vstack(data), np.hstack(labels) trn_data, trn_labels = data_for_blocks(trn_blocks) tun_data, tun_labels = data_for_blocks(tun_blocks) tst_data, tst_labels = data_for_blocks(tst_blocks) ''' # Make data zero-mean mean = np.mean(trn_data, axis=0) trn_data = trn_data-mean tun_data = tun_data-mean tst_data = tst_data-mean ''' print ( 'Num. samples in training: {}\n' 'Num. samples in tuning: {}\n' 'Num. samples in testing: {}\n' ).format(trn_data.shape[0], tun_data.shape[0], tst_data.shape[0]) def run(trn, tun, tst): if try_knn: print 'kNN:' runtime = time.time() k_neighbors, tun_acc = tune_knn( trn, trn_labels, tun, tun_labels ) tst_acc = knn_accuracy( trn, trn_labels, tst, tst_labels, k_neighbors ) print ( '\tk-Neighbors: {}\n' '\tTuning Accuracy: {:.2f}%\n' '\tTesting Accuracy: {:.2f}%\n' ).format(k_neighbors, 100*tun_acc, 100*tst_acc) report_time(runtime) if try_svm: print 'SVM:' runtime = time.time() gamma, error, tun_acc = tune_svm( trn_data, trn_labels, tun_data, tun_labels ) tst_acc = svm_accuracy( trn_data, trn_labels, tst_data, tst_labels, gamma, error ) print ( '\tGamma: {}\n' '\tPenalty: {}\n' '\tTuning Accuracy: {:.2f}%\n' '\tTesting Accuracy: {:.2f}%\n' ).format(gamma, error, 100*tun_acc, 100*tst_acc) report_time(runtime) print 'Raw data...\n' run(trn_data, tun_data, tst_data) print 'PCA...\n' pca = PCA() pca.fit(trn_data) trn_data_pca = pca.transform(trn_data) tun_data_pca = pca.transform(tun_data) tst_data_pca = pca.transform(tst_data) run(trn_data_pca, tun_data_pca, tst_data_pca)
def runSingleFold(data_list, file_idx, fold_number): print '| ---- ---- Fold #{} ---- ----'.format(fold_number) number_of_views = len(data_locations) ( training_blocks, tuning_blocks, testing_blocks ) = util.configure_blocks(fold_number) # Pre-process training data to have equal number of observations (n) data_pre_processor = DataPreProcessor(data_list, file_idx, training_blocks, True) training_data_per_view, training_labels_per_view = data_pre_processor.process() # Perform GCCA on processed data gcca_model = GeneralizedCCA(training_data_per_view, num_of_dimensions) G = gcca_model.solve() # List for holding U_j for each view proj_matrix_per_view = list() training_data = np.ndarray(shape=(0, np.shape(G)[1]), dtype=np.float) training_labels = np.array([], dtype=np.int) if use_full_phones: cmap = util.getColorMap(38) else: cmap = util.getColorMap(len(vowel_labels)) colors = [] # Compute U_j (matrix for projecting data into lower dimensional subspace) for i in range(number_of_views): U = np.linalg.pinv(training_data_per_view[i].transpose()) * np.mat(G) projected_data = np.mat(training_data_per_view[i].transpose()) * np.mat(U) proj_matrix_per_view.append(U) labels = training_labels_per_view[i] if use_full_phones: training_data = np.vstack((training_data, projected_data)) for j in range(len(labels)): colors.append(cmap(int(labels[j]))) training_labels = np.hstack((training_labels, int(labels[j]))) else: for j in range(len(labels)): if (labels[j] in vowel_labels): training_data = np.vstack((training_data, projected_data[j,:])) training_labels = np.hstack((training_labels, int(labels[j]))) colors.append(cmap(vowel_labels.index(int(labels[j])))) #plot = plt.scatter(training_data[:,2], training_data[:,1], color=colors) #plt.show() # Start tuning/testing if classification_model == ClassificationModel.Kernel_SVM_RBF: max_accuracy = 0.0 optimal_gamma = 0 for i in [500, 600, 700, 800, 900]: model = svm.SVC(decision_function_shape='ovo',kernel='rbf',gamma=i,C=1000) model.fit(training_data, training_labels) accuracies = getAccuracies(model, data_list, file_idx, tuning_blocks, proj_matrix_per_view) if accuracies[len(accuracies) - 1] > max_accuracy: max_accuracy = accuracies[len(accuracies) - 1] optimal_gamma = i print '| Optimal gamma value: {}'.format(optimal_gamma) model = svm.SVC(decision_function_shape='ovo',kernel='rbf',gamma=optimal_gamma,C=1000) else: max_accuracy = 0.0 optimal_neighbors = 0 for i in [4, 8, 12, 16]: model = neighbors.KNeighborsClassifier(i, weights='distance') model.fit(training_data, training_labels) accuracies = getAccuracies(model, data_list, file_idx, tuning_blocks, proj_matrix_per_view) if accuracies[len(accuracies) - 1] > max_accuracy: max_accuracy = accuracies[len(accuracies) - 1] optimal_neighbors = i print '| Optimal number of neighbors: {}'.format(optimal_neighbors) model = neighbors.KNeighborsClassifier(optimal_neighbors, weights='distance') model.fit(training_data, training_labels) accuracies = getAccuracies(model, data_list, file_idx, testing_blocks, proj_matrix_per_view) for i in range(len(accuracies)): if i < len(accuracies) - 1: print '| Accuracy for view {}: {:.3f}'.format(i + 1, accuracies[i]) else: print '| Accuracy for whole data: {:.3f}'.format(accuracies[i]) print '|'
def runSingleFold(data_list, file_idx, fold_number): print '| ---- ---- Fold #{} ---- ----'.format(fold_number) number_of_views = len(data_list) (training_blocks, tuning_blocks, testing_blocks) = util.configure_blocks(fold_number) data_pre_processor = DataPreProcessor(data_list, file_idx, training_blocks, False) training_data_per_view, training_labels_per_view = data_pre_processor.process( ) data_pre_processor = DataPreProcessor(data_list, file_idx, tuning_blocks, False) tuning_data_per_view, tuning_labels_per_view = data_pre_processor.process() data_pre_processor = DataPreProcessor(data_list, file_idx, testing_blocks, False) testing_data_per_view, testing_labels_per_view = data_pre_processor.process( ) for i in range(number_of_views): training_data = np.ndarray(shape=(0, np.shape( training_data_per_view[i])[0]), dtype=np.float) training_labels = np.array([], dtype=np.int) if use_full_phones: training_data = training_data_per_view[i].transpose() training_labels = training_labels_per_view[i] else: for j in range(len(training_labels_per_view[i])): if (training_labels_per_view[i][j] in vowel_labels): training_data = np.vstack( (training_data, training_data_per_view[i].transpose()[j, :])) training_labels = np.hstack( (training_labels, int(training_labels_per_view[i][j]))) param_msg = None # Start tuning/testing if classification_model == ClassificationModel.Kernel_SVM_RBF: max_accuracy = 0.0 optimal_gamma = 0.0 for j in [3e-08, 3.5e-08, 4e-08, 4.5e-08]: model = svm.SVC(decision_function_shape='ovo', kernel='rbf', gamma=j, C=2) model.fit(training_data, training_labels) accuracy = getAccuracy(model, tuning_data_per_view[i], tuning_labels_per_view[i]) if accuracy > max_accuracy: max_accuracy = accuracy optimal_gamma = j param_msg = 'Optimal gamma value: {}'.format(optimal_gamma) model = svm.SVC(decision_function_shape='ovo', kernel='rbf', gamma=optimal_gamma, C=2) else: max_accuracy = 0.0 optimal_neighbors = 0 for j in [28, 32, 36, 40]: model = neighbors.KNeighborsClassifier(j, weights='distance') model.fit(training_data, training_labels) accuracy = getAccuracy(model, tuning_data_per_view[i], tuning_labels_per_view[i]) if accuracy > max_accuracy: max_accuracy = accuracy optimal_neighbors = j param_msg = 'Optimal number of neighbors: {}'.format( optimal_neighbors) model = neighbors.KNeighborsClassifier(optimal_neighbors, weights='distance') model.fit(training_data, training_labels) accuracy = getAccuracy(model, testing_data_per_view[i], testing_labels_per_view[i]) print '| Accuracy for view {}: {:.3f}'.format( i + 1, accuracy) + ', ' + param_msg print '|'
def runSingleFold(data_list, file_idx, fold_number): print "| ---- ---- Fold #{} ---- ----".format(fold_number) number_of_views = len(data_list) (training_blocks, tuning_blocks, testing_blocks) = util.configure_blocks(fold_number) data_pre_processor = DataPreProcessor(data_list, file_idx, training_blocks, False) training_data_per_view, training_labels_per_view = data_pre_processor.process() data_pre_processor = DataPreProcessor(data_list, file_idx, tuning_blocks, False) tuning_data_per_view, tuning_labels_per_view = data_pre_processor.process() data_pre_processor = DataPreProcessor(data_list, file_idx, testing_blocks, False) testing_data_per_view, testing_labels_per_view = data_pre_processor.process() for i in range(number_of_views): training_data = np.ndarray(shape=(0, np.shape(training_data_per_view[i])[0]), dtype=np.float) training_labels = np.array([], dtype=np.int) if use_full_phones: training_data = training_data_per_view[i].transpose() training_labels = training_labels_per_view[i] else: for j in range(len(training_labels_per_view[i])): if training_labels_per_view[i][j] in vowel_labels: training_data = np.vstack((training_data, training_data_per_view[i].transpose()[j, :])) training_labels = np.hstack((training_labels, int(training_labels_per_view[i][j]))) param_msg = None # Start tuning/testing if classification_model == ClassificationModel.Kernel_SVM_RBF: max_accuracy = 0.0 optimal_gamma = 0.0 for j in [3e-08, 3.5e-08, 4e-08, 4.5e-08]: model = svm.SVC(decision_function_shape="ovo", kernel="rbf", gamma=j, C=2) model.fit(training_data, training_labels) accuracy = getAccuracy(model, tuning_data_per_view[i], tuning_labels_per_view[i]) if accuracy > max_accuracy: max_accuracy = accuracy optimal_gamma = j param_msg = "Optimal gamma value: {}".format(optimal_gamma) model = svm.SVC(decision_function_shape="ovo", kernel="rbf", gamma=optimal_gamma, C=2) else: max_accuracy = 0.0 optimal_neighbors = 0 for j in [28, 32, 36, 40]: model = neighbors.KNeighborsClassifier(j, weights="distance") model.fit(training_data, training_labels) accuracy = getAccuracy(model, tuning_data_per_view[i], tuning_labels_per_view[i]) if accuracy > max_accuracy: max_accuracy = accuracy optimal_neighbors = j param_msg = "Optimal number of neighbors: {}".format(optimal_neighbors) model = neighbors.KNeighborsClassifier(optimal_neighbors, weights="distance") model.fit(training_data, training_labels) accuracy = getAccuracy(model, testing_data_per_view[i], testing_labels_per_view[i]) print "| Accuracy for view {}: {:.3f}".format(i + 1, accuracy) + ", " + param_msg print "|"
def runSingleFold(data_list, file_idx, fold_number): print "| ---- ---- Fold #{} ---- ----".format(fold_number) number_of_views = len(data_locations) (training_blocks, tuning_blocks, testing_blocks) = util.configure_blocks(fold_number) # Pre-process training data to have equal number of observations (n) data_pre_processor = DataPreProcessor(data_list, file_idx, training_blocks, True) training_data_per_view, training_labels_per_view = data_pre_processor.process() # Perform GCCA on processed data gcca_model = GeneralizedCCA(training_data_per_view, num_of_dimensions) G = gcca_model.solve() # List for holding U_j for each view proj_matrix_per_view = list() training_data = np.ndarray(shape=(0, np.shape(G)[1]), dtype=np.float) training_labels = np.array([], dtype=np.int) if use_full_phones: cmap = util.getColorMap(38) else: cmap = util.getColorMap(len(vowel_labels)) colors = [] # Compute U_j (matrix for projecting data into lower dimensional subspace) for i in range(number_of_views): U = np.linalg.pinv(training_data_per_view[i].transpose()) * np.mat(G) projected_data = np.mat(training_data_per_view[i].transpose()) * np.mat(U) proj_matrix_per_view.append(U) labels = training_labels_per_view[i] if use_full_phones: training_data = np.vstack((training_data, projected_data)) for j in range(len(labels)): colors.append(cmap(int(labels[j]))) training_labels = np.hstack((training_labels, int(labels[j]))) else: for j in range(len(labels)): if labels[j] in vowel_labels: training_data = np.vstack((training_data, projected_data[j, :])) training_labels = np.hstack((training_labels, int(labels[j]))) colors.append(cmap(vowel_labels.index(int(labels[j])))) # plot = plt.scatter(training_data[:,2], training_data[:,1], color=colors) # plt.show() # Start tuning/testing if classification_model == ClassificationModel.Kernel_SVM_RBF: max_accuracy = 0.0 optimal_gamma = 0 for i in [500, 600, 700, 800, 900]: model = svm.SVC(decision_function_shape="ovo", kernel="rbf", gamma=i, C=1000) model.fit(training_data, training_labels) accuracies = getAccuracies(model, data_list, file_idx, tuning_blocks, proj_matrix_per_view) if accuracies[len(accuracies) - 1] > max_accuracy: max_accuracy = accuracies[len(accuracies) - 1] optimal_gamma = i print "| Optimal gamma value: {}".format(optimal_gamma) model = svm.SVC(decision_function_shape="ovo", kernel="rbf", gamma=optimal_gamma, C=1000) else: max_accuracy = 0.0 optimal_neighbors = 0 for i in [4, 8, 12, 16]: model = neighbors.KNeighborsClassifier(i, weights="distance") model.fit(training_data, training_labels) accuracies = getAccuracies(model, data_list, file_idx, tuning_blocks, proj_matrix_per_view) if accuracies[len(accuracies) - 1] > max_accuracy: max_accuracy = accuracies[len(accuracies) - 1] optimal_neighbors = i print "| Optimal number of neighbors: {}".format(optimal_neighbors) model = neighbors.KNeighborsClassifier(optimal_neighbors, weights="distance") model.fit(training_data, training_labels) accuracies = getAccuracies(model, data_list, file_idx, testing_blocks, proj_matrix_per_view) for i in range(len(accuracies)): if i < len(accuracies) - 1: print "| Accuracy for view {}: {:.3f}".format(i + 1, accuracies[i]) else: print "| Accuracy for whole data: {:.3f}".format(accuracies[i]) print "|"