def do_predictions(self, train_x, train_y, test_x, alpha, kernel): # Compute test Gram matrix K_test = kernels.kernel_matrix_test(train_x, test_x, kernel) labels = self.predict_labels(alpha, np.matrix.transpose(K_test)) labels = array_to_labels(labels, 0) return labels
def run_KRR_spectrum(self, x_train, y_train, x_test, distrib): kernel_func = partial(np.dot) histograms_X_train = kernels.spectrum_histogram(x_train, x_train, 7, distrib) gram_matrix = kernels.kernel_matrix_training(histograms_X_train, kernel_func) save_object(gram_matrix, 'spectr_kernel_aug_k=7_train_distrib={}'.format(distrib)) # Solve the linear system in order to find the vector weights alpha = self.solve_linear_system(gram_matrix, len(x_train), 0.1, y_train) alpha = alpha.reshape(len(x_train),1) # Build the Gram matrix for the test data histograms_X_test = kernels.spectrum_histogram(x_train, x_test, 7, distrib) gram_mat_test = kernels.kernel_matrix_test(histograms_X_train, histograms_X_test, kernel_func) save_object(gram_matrix, 'spectr_kernel_aug_k=7_test_distrib={}'.format(distrib)) # Compute predictions over the test data pred = self.predict_labels(alpha, np.matrix.transpose(gram_mat_test)) # Convert predictions to labels pred = array_to_labels(pred, 0) return pred
def run_svm_kernel(): x_train = np.array(read_x_data(train=True, raw=True)) y_train = np.array(read_y_data()) x_test = np.array(read_x_data(train=False, raw=True)) # Build a partial spectrum function kernel_func = partial(np.dot) all_labels = [] for distribution in range(3): # Build the Gram matrix for the spectrum kernel histograms_X_train = kernels.spectrum_histogram( x_train[distribution], x_train[distribution], 8, 0) gram_matrix_train = kernels.kernel_matrix_training( histograms_X_train, kernel_func) # Build the Gram matrix for the test data histograms_X_test = kernels.spectrum_histogram(x_train[distribution], x_test[distribution], 8, 0) gram_mat_test = kernels.kernel_matrix_test(histograms_X_train, histograms_X_test, kernel_func) model = Kernel_svm_model(gram_matrix_train) model.fit(x_train[distribution], y_train[distribution]) predicted_values = list(model.predict(gram_mat_test)) all_labels += list(predicted_values) all_labels = [0 if x == -1 else 1 for x in all_labels] write_predicted_labels_csv(all_labels, 'Yte.csv')
def train_folds(self, data, labels, folds): """ docstring """ len_data = len(data) data = np.array(list(zip(data, labels))) len_fold = int(len(data) / folds) lambda_values = [0.1, 0.3, 0.6, 0.9] sigma_values = [0.0001, 0.001, 0.01, 0.1, 0.5] for lam in lambda_values: accuracy_values = [] for sigma in sigma_values: # Build a partial gaussian function with the current 'sigma' value kernel_func = partial(self.gaussian_kernel, sigma) print('Processing sigma value={}'.format(sigma)) # TODO Compute the whole gram matrix here only once # each fold will extract fold_accuracy = 0 for i in range(folds): # print('Fold: {}'.format(i)) # Training data is obtained by concatenating the 2 subsets: at the right + at the left # of the current fold train_data = [*data[0:i*len_fold], *data[(i+1)*len_fold:len_data]] # The current fold is used to test the model test_data = [*data[i*len_fold:(i+1)*len_fold]] x_train = np.array([x[0] for x in train_data]) y_train = np.array([x[1] for x in train_data]) x_test = np.array([x[0] for x in test_data]) y_test = np.array([x[1] for x in test_data]) # Build the Gram matrix gram_matrix = kernels.kernel_matrix_training(x_train, kernel_func) # Solve the linear system in order to find the vector weights alpha = self.solve_linear_system(gram_matrix, len(x_train), lam, y_train) alpha = alpha.reshape(len(x_train),1) # Build the Gram matrix for the test data gram_mat_test = kernels.kernel_matrix_test(x_train, x_test, kernel_func) # Compute predictions over the test data pred = self.predict_labels(alpha, np.matrix.transpose(gram_mat_test)) # Convert predictions to labels pred = array_to_labels(pred, -1) fold_accuracy += accuracy_score(pred, y_test) # Compute average accuracy for all folds average_accuracy = fold_accuracy / folds accuracy_values.append(average_accuracy) print('lambda={}'.format(lam)) print('For the sigma values: {}'.format(sigma_values)) print('Accuracies: {}\n'.format(accuracy_values))