Ejemplo n.º 1
0
    def do_predictions(self, train_x, train_y, test_x, alpha, kernel):

        # Compute test Gram matrix
        K_test = kernels.kernel_matrix_test(train_x, test_x, kernel)
        labels = self.predict_labels(alpha, np.matrix.transpose(K_test))

        labels = array_to_labels(labels, 0)
        return labels
Ejemplo n.º 2
0
    def run_KRR_spectrum(self, x_train, y_train, x_test, distrib):
        kernel_func = partial(np.dot)
        histograms_X_train = kernels.spectrum_histogram(x_train, x_train, 7, distrib)
        gram_matrix = kernels.kernel_matrix_training(histograms_X_train, kernel_func)

        save_object(gram_matrix, 'spectr_kernel_aug_k=7_train_distrib={}'.format(distrib))

        # Solve the linear system in order to find the vector weights
        alpha = self.solve_linear_system(gram_matrix, len(x_train), 0.1, y_train)
        alpha = alpha.reshape(len(x_train),1)

        # Build the Gram matrix for the test data
        histograms_X_test = kernels.spectrum_histogram(x_train, x_test, 7, distrib)
        gram_mat_test = kernels.kernel_matrix_test(histograms_X_train, histograms_X_test, kernel_func)

        save_object(gram_matrix, 'spectr_kernel_aug_k=7_test_distrib={}'.format(distrib))

        # Compute predictions over the test data
        pred = self.predict_labels(alpha, np.matrix.transpose(gram_mat_test))

        # Convert predictions to labels
        pred = array_to_labels(pred, 0)

        return pred
Ejemplo n.º 3
0
def run_svm_kernel():
    x_train = np.array(read_x_data(train=True, raw=True))
    y_train = np.array(read_y_data())
    x_test = np.array(read_x_data(train=False, raw=True))

    # Build a partial spectrum function
    kernel_func = partial(np.dot)

    all_labels = []

    for distribution in range(3):
        # Build the Gram matrix for the spectrum kernel
        histograms_X_train = kernels.spectrum_histogram(
            x_train[distribution], x_train[distribution], 8, 0)
        gram_matrix_train = kernels.kernel_matrix_training(
            histograms_X_train, kernel_func)

        # Build the Gram matrix for the test data
        histograms_X_test = kernels.spectrum_histogram(x_train[distribution],
                                                       x_test[distribution], 8,
                                                       0)
        gram_mat_test = kernels.kernel_matrix_test(histograms_X_train,
                                                   histograms_X_test,
                                                   kernel_func)

        model = Kernel_svm_model(gram_matrix_train)

        model.fit(x_train[distribution], y_train[distribution])

        predicted_values = list(model.predict(gram_mat_test))

        all_labels += list(predicted_values)

    all_labels = [0 if x == -1 else 1 for x in all_labels]

    write_predicted_labels_csv(all_labels, 'Yte.csv')
Ejemplo n.º 4
0
    def train_folds(self, data, labels, folds):
        """
        docstring
        """

        len_data = len(data)
        data = np.array(list(zip(data, labels)))
        len_fold = int(len(data) / folds)

        lambda_values = [0.1, 0.3, 0.6, 0.9]
        sigma_values = [0.0001, 0.001, 0.01, 0.1, 0.5]

        for lam in lambda_values:
            accuracy_values = []
            for sigma in sigma_values:
                # Build a partial gaussian function with the current 'sigma' value
                kernel_func = partial(self.gaussian_kernel, sigma)
                print('Processing sigma value={}'.format(sigma))

                # TODO Compute the whole gram matrix here only once
                # each fold will extract

                fold_accuracy = 0
                for i in range(folds):
                    # print('Fold: {}'.format(i))
                    # Training data is obtained by concatenating the 2 subsets: at the right + at the left
                    # of the current fold
                    train_data = [*data[0:i*len_fold], *data[(i+1)*len_fold:len_data]]

                    # The current fold is used to test the model
                    test_data = [*data[i*len_fold:(i+1)*len_fold]]
                    
                    x_train = np.array([x[0] for x in train_data])
                    y_train = np.array([x[1] for x in train_data])

                    x_test = np.array([x[0] for x in test_data])
                    y_test = np.array([x[1] for x in test_data])

                    # Build the Gram matrix
                    gram_matrix = kernels.kernel_matrix_training(x_train, kernel_func)

                    # Solve the linear system in order to find the vector weights
                    alpha = self.solve_linear_system(gram_matrix, len(x_train), lam, y_train)
                    alpha = alpha.reshape(len(x_train),1)

                    # Build the Gram matrix for the test data
                    gram_mat_test = kernels.kernel_matrix_test(x_train, x_test, kernel_func)

                    # Compute predictions over the test data
                    pred = self.predict_labels(alpha, np.matrix.transpose(gram_mat_test))

                    # Convert predictions to labels
                    pred = array_to_labels(pred, -1)

                    fold_accuracy += accuracy_score(pred, y_test)
                
                # Compute average accuracy for all folds
                average_accuracy = fold_accuracy / folds
                accuracy_values.append(average_accuracy)

            print('lambda={}'.format(lam))
            print('For the sigma values: {}'.format(sigma_values))
            print('Accuracies: {}\n'.format(accuracy_values))