class NaiveBayes(Classifier): def __init__(self, X, y): # remove zero covariance features and standardize self.data_preprocessor = DataPreprocessor(X) X = self.data_preprocessor.process_data(X) y = np.copy(y) self.number_features = X.shape[1] self.all_classes = np.unique(y) self.number_classes = self.all_classes.size self.prior, self.mean_array, self.std_array = self.calculate_Gauss_parameters( X, y) def calculate_Gauss_parameters(self, X, y): prior = [np.sum(y == y_val) / y.size for y_val in self.all_classes] num_obs, _ = X.shape mean_array = np.zeros((self.number_classes, self.number_features)) std_array = np.zeros((self.number_classes, self.number_features)) for k in range(0, self.number_classes): index = y == self.all_classes[k] X_sub = X[index, :] mean_array[k, :] = np.mean(X_sub, axis=0) std_array[k, :] = np.std(X_sub, axis=0, ddof=1) std_array[std_array < 1e-03] = 1e-03 return prior, mean_array, std_array def validate(self, X_test, y_test): X_test = self.data_preprocessor.process_data(X_test) assert X_test.shape[1] == self.number_features predicted_score = self.predict_score(X_test) predicted_class = self.predict_class(predicted_score) prediction_error = self.calculate_predict_error( predicted_class, y_test) return prediction_error def calculate_predict_error(self, predicted_class, y): predicted_indicator = np.array( [predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size def predict_class(self, predicted_score): max_indicator = np.argmax(predicted_score, axis=1) return np.array([self.all_classes[i] for i in max_indicator]) def predict_score(self, X): N = X.shape[0] log_score = np.zeros((N, self.number_classes)) for k in range(0, self.number_classes): for j in range(0, self.number_features): log_score[:, k] += norm.logpdf(X[:, j], loc=self.mean_array[k, j], scale=self.std_array[k, j]) log_prior = [log(p) for p in self.prior] log_score += log_prior return log_score
class LDA2dGaussGM(Classifier): def __init__(self, X, y): # remove zero covariance features and standardize self.data_preprocessor = DataPreprocessor(X) X = self.data_preprocessor.process_data(X) y = np.copy(y) self.number_features = X.shape[1] self.all_classes = np.unique(y) self.number_classes = self.all_classes.size self.W = self.calculate_weight_vector(X, y) self.prior, self.mean, self.covariance = \ self.calculate_GaussGM_parameters(self.LDA_projection(X), y) def calculate_weight_vector(self, X, y): # hard coded for 2d projection k = 2 X_kclass = {} for one_class in self.all_classes: X_kclass[one_class] = X[y == one_class] mean_all = np.mean(X, axis=0) S_T = np.matmul(np.transpose(X - mean_all), X - mean_all) S_W = np.zeros((self.number_features, self.number_features)) for one_class in self.all_classes: mean_each = np.mean(X_kclass[one_class], axis=0) S_W += np.matmul(np.transpose(X_kclass[one_class] - mean_each), X_kclass[one_class] - mean_each) S_B = S_T - S_W temp_mat = mat(np.linalg.inv(S_W)) * mat(S_B) _, eig_vecs = eigs(temp_mat, k=k) return eig_vecs.real def LDA_projection(self, X): assert X.shape[1] == self.W.shape[0] return X.dot(self.W) def calculate_GaussGM_parameters(self, X, y): number_features = X.shape[1] priors = [np.sum(y == one_class) / y.size for one_class in self.all_classes] means = np.zeros((self.number_classes, number_features)) covariances = np.zeros((self.number_classes, number_features, number_features)) for k in range(0, self.number_classes): index = y == self.all_classes[k] X_classk = X[index, :] means[k, :] = np.mean(X_classk, axis=0) covariances[k, :, :] = np.cov(X_classk, rowvar=False, bias=True) return priors, means, covariances def validate(self, X_test, y_test): X_test = self.data_preprocessor.process_data(X_test) assert X_test.shape[1] == self.number_features X_test = self.LDA_projection(X_test) predicted_scores = self.predict_score(X_test) predicted_class = self.predict_class(predicted_scores) test_error = self.calculate_predict_error(predicted_class, y_test) return test_error def calculate_predict_error(self, predicted_class, y): predicted_indicator = np.array([predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size def predict_class(self, score): max_indicator = np.argmax(score, axis=1) return np.array([self.all_classes[i] for i in max_indicator]) def predict_score(self, X): N = X.shape[0] log_score = np.zeros((N, self.number_classes)) for k in range(self.number_classes): mean_k = self.mean[k, :] cov_k = self.covariance[k, :, :] log_score[:, k] = multivariate_normal.logpdf(X, mean_k, cov_k) log_prior = [log(p) for p in self.prior] log_score += log_prior return log_score
class LogisticRegression(Classifier): def __init__(self, X, y): # Data_Preprocessor will copy X self.data_preprocessor = DataPreprocessor(X) X = self.data_preprocessor.process_data(X) y = np.copy(y) self.all_classes = np.unique(y) self.number_classes = self.all_classes.size self.number_observations, self.number_features = X.shape # row-wise concatenated weight vector W_init = np.random.normal(0, 0.001, self.number_classes * self.number_features) self.W = self.IRLS(W_init, X, y) def IRLS(self, W, X, y): # construct YT to compute gradients and hessian T = np.zeros((self.number_observations, self.number_classes)) Y = np.zeros((self.number_observations, self.number_classes)) # through iterations number_iterations = 30 loss_record = np.zeros(number_iterations) for iter in range(number_iterations): W_mat = self.W_vector2matrix(W) for i in range(self.number_observations): T[i, y[i]] = 1 Y[i, :] = LogisticRegression.softmax(W_mat, X[i, :]) loss_record[iter] = LogisticRegression.cross_entropy_loss(Y, T) grad_W = self.compute_gradient(X, Y, T) hess_W = self.compute_hessian(X, Y) W += -0.01 * np.matmul(np.linalg.inv(hess_W), grad_W) # W += - 0.01 * grad_W return self.W_vector2matrix(W) def compute_gradient(self, X, Y, T): grad_mat = np.zeros((self.number_classes, self.number_features)) for i in range(self.number_classes): grad_mat[i, :] = (Y[:, i] - T[:, i]).dot(X) return grad_mat.reshape(self.number_classes * self.number_features) def cross_entropy_loss(Y, T): loss = 0 N, K = Y.shape for n in range(N): for k in range(K): loss += -T[n, k] * log(Y[n, k]) return loss def compute_hessian(self, X, Y): hess_mat = np.zeros((self.number_classes * self.number_features, self.number_classes * self.number_features)) for j in range(self.number_classes): for k in range(self.number_classes): i_kj = 1 if (k == j) else 0 dot_vec = Y[:, k] * (i_kj - Y[:, j]) block_kj = np.matmul(np.matmul(X.T, np.diag(dot_vec)), X) hess_mat[j * self.number_features : (j + 1) * self.number_features, \ k * self.number_features : (k + 1) * self.number_features] = block_kj # hessian may not be PSD due to numerical issue hess_mat = hess_mat + 0.1 * np.identity( self.number_classes * self.number_features) return hess_mat def W_vector2matrix(self, W_vec): assert (W_vec.size == self.number_classes * self.number_features) return W_vec.reshape((self.number_classes, self.number_features)) def softmax(W, x): e = np.exp(W.dot(x)) dist = e / np.sum(e) return dist def validate(self, X_test, y_test): X_test = self.data_preprocessor.process_data(X_test) assert X_test.shape[1] == self.number_features predicted_score = self.predict_score(X_test) predicted_class = self.predict_class(predicted_score) test_error = self.calculate_predict_error(predicted_class, y_test) return test_error def calculate_predict_error(self, predicted_class, y): predicted_indicator = np.array( [predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size def predict_class(self, score): max_indicator = np.argmax(score, axis=1) return np.array([self.all_classes[i] for i in max_indicator]) def predict_score(self, X): N = X.shape[0] softmax_score = np.zeros((N, self.number_classes)) for i in range(N): softmax_score[i, :] = LogisticRegression.softmax(self.W, X[i, :]) return softmax_score
class SVMCVX(Classifier): def __init__(self, X, y, regulator): self.data_preprocessor = DataPreprocessor(X) X = self.data_preprocessor.process_data(X) y = np.copy(y).astype(int) self.all_classes = np.unique(y) assert self.all_classes.size == 2 self.target_value = np.array([1, -1]).astype(int) y[y == self.all_classes[0]] = self.target_value[0] y[y == self.all_classes[1]] = self.target_value[1] self.number_features = X.shape[1] alpha = self.solve_dual_problem(X, y, regulator) zero_threshold = 1e-6 self.number_support_vectors = np.sum(alpha > zero_threshold) self.margin = 1 / np.linalg.norm(alpha) self.svm_weight, self.svm_bias = SVMCVX.compute_svm_parameters(alpha, X, y, regulator) def solve_dual_problem(self, X, y, c): # QP problem # min 0.5 * xTPx + qTx # st Gx <= h, Ax = b number_observations = X.shape[0] yX = np.reshape(y, (number_observations, 1)) * X P = matrix(yX.dot(yX.T)) q = matrix(-np.ones(number_observations)) A = matrix(np.reshape(y.astype(float), (1, number_observations))) b = matrix([0.0]) I = np.identity(number_observations) G = matrix(np.concatenate((I, -I), axis=0)) vector_c = c * np.ones(number_observations) vector_0 = np.zeros(number_observations) h = matrix(np.concatenate((vector_c, vector_0))) solution = qp(P, q, G, h, A, b) alpha = np.array(solution['x']) return alpha.reshape((-1,)) def compute_svm_parameters(alpha, X, y, c): w = (alpha * y).dot(X) b = 0 count = 0 for i in range(alpha.shape[0]): if 0 < alpha[i] < c: count += 1 b += y[i] - w.dot(X[i, :]) assert count > 0 b /= count return w, b def predict(self, X_new): X = self.data_preprocessor.process_data(X_new) X = np.reshape(X, (-1, self.number_features)) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) return predicted_class def validate(self, X_test, y_test): X_test = self.data_preprocessor.process_data(X_test) assert X_test.shape[1] == self.number_features predicted_score = self.predict_score(X_test) predicted_class = self.predict_class(predicted_score) test_error = self.calculate_predict_error(predicted_class, y_test) return test_error def calculate_predict_error(self, predicted_class, y): predicted_indicator = np.array([predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size def predict_class(self, score): max_indicator = np.argmax(score, axis=1) return np.array([self.all_classes[i] for i in max_indicator]) def predict_score(self, X): N = X.shape[0] svm_score = np.zeros((N, 2)) svm_score[:, 0] = X.dot(self.svm_weight) + self.svm_bias svm_score[:, 1] = -svm_score[:, 0] return svm_score
class SVMSoftplus(Classifier): def __init__(self, X, y, sgd_batch_size): self.softplus_a = 0.1 self.data_preprocessor = DataPreprocessor(X) X = self.data_preprocessor.process_data(X) y = np.copy(y).astype(int) self.all_classes = np.unique(y) assert self.all_classes.size == 2 self.target_value = np.array([1, -1]).astype(int) y[y == self.all_classes[0]] = self.target_value[0] y[y == self.all_classes[1]] = self.target_value[1] n, self.number_features = X.shape assert sgd_batch_size <= n # prepare for optimization self.loss_record = [] penalty_lambda = 1 w_init = np.random.normal(0, 0.001, self.number_features) self.svm_weight = self.optimization(X, y, penalty_lambda, w_init, sgd_batch_size) def optimization(self, X, y, lambda_, w0, k): X_train, y_train, number_samples = self.group_train_data(X, y, k) # optimization hyperparameters max_iterations = 10000 learning_rate = 0.01 max_ktot = 100 * X.shape[0] ktot = 0 for i in range(max_iterations): ktot += k self.loss_record.append(self.compute_loss(X, y, lambda_, w0)) X_batch, y_batch = self.select_batch(X_train, y_train, number_samples, w0) grad_w = self.compute_gradient(X_batch, y_batch, lambda_, w0) w1 = w0 - learning_rate * grad_w w0 = w1 if ktot >= max_ktot: break return w0 def group_train_data(self, X, y, k): X_train = {} y_train = {} for one_class in self.target_value: index = y == one_class X_train[one_class] = X[index] y_train[one_class] = y[index] percent = k / len(y) number_class1 = floor(percent * len(y_train[self.target_value[0]])) number_class2 = k - number_class1 assert number_class2 <= len(y_train[self.target_value[1]]) number_samples = { self.target_value[0]: number_class1, self.target_value[1]: number_class2 } return X_train, y_train, number_samples def select_batch(self, X_train, y_train, number_samples, w): X_batch = np.array([]).reshape(0, self.number_features) y_batch = np.array([]) for one_class in self.target_value: n = len(y_train[one_class]) k = number_samples[one_class] random_number = np.random.permutation(n) random_idx = random_number[np.arange(k)] X_batch = np.r_[X_batch, X_train[one_class][random_idx, :]] y_batch = np.r_[y_batch, y_train[one_class][random_idx]] return X_batch, y_batch def compute_gradient(self, X, y, lambda_, w): n = X.shape[0] temp = np.exp((1 - y * X.dot(w)) / self.softplus_a) gradient_w = (-1 / (1 + temp) * temp * y).dot(X) gradient_w = gradient_w / n + 2 * lambda_ * w return gradient_w def compute_loss(self, X, y, lambda_, w): n = X.shape[0] loss = np.sum(np.log(1 + np.exp((1 - y * X.dot(w)) / self.softplus_a))) loss = loss * self.softplus_a / n + lambda_ * np.sum(w * w) return loss def predict(self, X_new): X = self.data_preprocessor.process_data(X_new) X = np.reshape(X, (-1, self.number_features)) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) return predicted_class def validate(self, X_test, y_test): X_test = self.data_preprocessor.process_data(X_test) assert X_test.shape[1] == self.number_features predicted_score = self.predict_score(X_test) predicted_class = self.predict_class(predicted_score) test_error = self.calculate_predict_error(predicted_class, y_test) return test_error def calculate_predict_error(self, predicted_class, y): predicted_indicator = np.array( [predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size def predict_class(self, score): max_indicator = np.argmax(score, axis=1) return np.array([self.all_classes[i] for i in max_indicator]) def predict_score(self, X): N = X.shape[0] svm_score = np.zeros((N, 2)) svm_score[:, 0] = X.dot(self.svm_weight) svm_score[:, 1] = -svm_score[:, 0] return svm_score
class SVMPegasos(Classifier): def __init__(self, X, y, sgd_batch_size): self.data_preprocessor = DataPreprocessor(X) X = self.data_preprocessor.process_data(X) y = np.copy(y).astype(int) self.all_classes = np.unique(y) assert self.all_classes.size == 2 self.target_value = np.array([1, -1]).astype(int) y[y == self.all_classes[0]] = self.target_value[0] y[y == self.all_classes[1]] = self.target_value[1] self.number_features = X.shape[1] # prepare for optimization self.loss_record = [] penalty_lambda = 1 w_init = np.zeros(self.number_features) w_init.fill(np.sqrt(1 / (self.number_features * penalty_lambda))) self.svm_weight = self.pegas(X, y, penalty_lambda, w_init, sgd_batch_size) def pegas(self, X, y, lambda_, w0, k): X_train, y_train, number_samples = self.group_train_data(X, y, k) # optimization hyperparameters max_iterations = 10000 max_ktot = 100 * X.shape[0] ktot = 0 for i in range(1, max_iterations): ktot += k self.loss_record.append(self.compute_loss(X, y, lambda_, w0)) X_batch, y_batch = self.select_batch(X_train, y_train, number_samples, w0) w1 = self.update_weight(X_batch, y_batch, lambda_, w0, k, i) w0 = w1 if ktot >= max_ktot: break return w0 def group_train_data(self, X, y, k): X_train = {} y_train = {} for one_class in self.target_value: index = y == one_class X_train[one_class] = X[index] y_train[one_class] = y[index] percent = k / len(y) number_class1 = floor(percent * len(y_train[self.target_value[0]])) number_class2 = k - number_class1 assert number_class2 <= len(y_train[self.target_value[1]]) number_samples = { self.target_value[0]: number_class1, self.target_value[1]: number_class2 } return X_train, y_train, number_samples def select_batch(self, X_train, y_train, number_samples, w): X_batch = np.array([]).reshape(0, self.number_features) y_batch = np.array([]) for one_class in self.target_value: n = len(y_train[one_class]) k = number_samples[one_class] random_number = np.random.permutation(n) random_idx = random_number[np.arange(k)] X_batch = np.r_[X_batch, X_train[one_class][random_idx, :]] y_batch = np.r_[y_batch, y_train[one_class][random_idx]] index = (np.dot(X_batch, w) * y_batch) < 1 return X_batch[index, :], y_batch[index] def update_weight(self, X, y, lambda_, w0, k, iter_t): # decay learning rate eta = 1 / (lambda_ * iter_t) w_half = (1 - eta * lambda_) * w0 + eta / k * np.dot(y, X) # for numerical stability if np.sum(w_half * w_half) < 1e-07: w_half = np.maximum(w_half, 1e-04) w1 = np.minimum( 1, 1 / np.sqrt(lambda_) / np.sqrt(np.sum(w_half * w_half))) * w_half return w1 def compute_loss(self, X, y, lambda_, w): n = X.shape[0] tmp_loss = 1 - y * np.dot(X, w) loss = np.sum(tmp_loss[tmp_loss > 0]) / n + lambda_ / 2 * np.sum(w * w) return loss def predict(self, X_new): X = self.data_preprocessor.process_data(X_new) X = np.reshape(X, (-1, self.number_features)) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) return predicted_class def validate(self, X_test, y_test): X_test = self.data_preprocessor.process_data(X_test) assert X_test.shape[1] == self.number_features predicted_score = self.predict_score(X_test) predicted_class = self.predict_class(predicted_score) test_error = self.calculate_predict_error(predicted_class, y_test) return test_error def calculate_predict_error(self, predicted_class, y): predicted_indicator = np.array( [predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size def predict_class(self, score): max_indicator = np.argmax(score, axis=1) return np.array([self.all_classes[i] for i in max_indicator]) def predict_score(self, X): N = X.shape[0] svm_score = np.zeros((N, 2)) svm_score[:, 0] = X.dot(self.svm_weight) svm_score[:, 1] = -svm_score[:, 0] return svm_score