class Least_Square_Discriminant(Classifier): def __init__(self, X, y): # initialize and train the classifier # Data_Preprocessor will copy X self.data_preprocessor = Data_Preprocessor(X) X = self.data_preprocessor.predict(X) self.y_vals = np.unique(y) y_recode = self.recode_y_to_bit_vector(y) self.weight = self.calc_weight(X, y_recode) def predict(self, X_new, output=0): # used for making prediction X_new = self.data_preprocessor.predict(X_new) predicted_score = self.predict_score(X_new, self.weight) predicted_class = self.predict_class(predicted_score, self.y_vals) return predicted_class def validate(self, X_new, y_new, output=0): # used for validating the prediction performance X_new = self.data_preprocessor.predict(X_new) predicted_score = self.predict_score(X_new, self.weight) predicted_class = self.predict_class(predicted_score, self.y_vals) prediction_error = self.calc_predict_error(predicted_class, y_new) return prediction_error def recode_y_to_bit_vector(self, y): y_vals = self.y_vals y_new = np.zeros((y.size, y_vals.size)) for i in range(0, y.size): y_new[i, np.argmax(y_vals == y[i])] = 1 return y_new def calc_weight(self, X, y): p = X.shape[1] # Here we add an identity matrix to X'X to fix the condition return np.linalg.inv(mat(X.T) * mat(X) + np.identity(p)) * mat( X.T) * mat(y) def predict_score(self, X, weight): return mat(X) * mat(weight) def predict_class(self, score, y_vals): max_indicator = np.argmax(score, axis=1) return np.array([y_vals[i][0] for i in max_indicator]) def calc_predict_error(self, predicted_class, y): predicted_indicator = np.array( [predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size
class Least_Square_Discriminant (Classifier): def __init__(self, X, y): # initialize and train the classifier # Data_Preprocessor will copy X self.data_preprocessor = Data_Preprocessor(X) X = self.data_preprocessor.predict(X) self.y_vals = np.unique(y) y_recode = self.recode_y_to_bit_vector(y) self.weight = self.calc_weight(X, y_recode) def predict(self, X_new, output=0): # used for making prediction X_new = self.data_preprocessor.predict(X_new) predicted_score = self.predict_score(X_new, self.weight) predicted_class = self.predict_class(predicted_score, self.y_vals) return predicted_class def validate(self, X_new, y_new, output=0): # used for validating the prediction performance X_new = self.data_preprocessor.predict(X_new) predicted_score = self.predict_score(X_new, self.weight) predicted_class = self.predict_class(predicted_score, self.y_vals) prediction_error = self.calc_predict_error(predicted_class, y_new) return prediction_error def recode_y_to_bit_vector(self, y): y_vals = self.y_vals y_new = np.zeros((y.size, y_vals.size)) for i in range(0, y.size): y_new[i, np.argmax(y_vals == y[i])] = 1 return y_new def calc_weight(self, X, y): p = X.shape[1] # Here we add an identity matrix to X'X to fix the condition return np.linalg.inv(mat(X.T) * mat(X) + np.identity(p)) * mat(X.T) * mat(y) def predict_score(self, X, weight): return mat(X) * mat(weight) def predict_class(self, score, y_vals): max_indicator = np.argmax(score, axis=1) return np.array([y_vals[i][0] for i in max_indicator]) def calc_predict_error(self, predicted_class, y): predicted_indicator = np.array([predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size
class Fisher_Projection(Classifier): def __init__(self, X, y): self.data_preprocessor = Data_Preprocessor(X) X = self.data_preprocessor.predict(X) y = np.copy(y) self.y_vals = np.unique(y) self.weight = self.calc_fisher_weight_vector(X, y) def predict(self, X): X = self.data_preprocessor.predict(X) return np.dot(X, self.weight) def validate(self): pass def calc_between_class_variance(self, X, y): num_obs, num_features = X.shape mu_all = np.mean(X, axis=0) between_class_variance = np.zeros((num_features, num_features)) for k in range(0, self.y_vals.size): index = y == self.y_vals[k] X_sub = X[index, :] mu = np.mean(X_sub, axis=0) between_class_variance += X_sub.shape[0] * np.outer( mu - mu_all, mu - mu_all) return between_class_variance def calc_within_class_variance(self, X, y): num_obs, num_features = X.shape within_class_var = np.zeros((num_features, num_features)) for k in range(0, self.y_vals.size): index = y == self.y_vals[k] X_sub = X[index, :] within_class_var += X_sub.shape[0] * np.cov( X_sub, rowvar=0, bias=1) # add an identity matrix to the variance matrix to fix its condition within_class_var += np.identity(num_features) return within_class_var def calc_fisher_weight_vector(self, X, y): between_class_variance = self.calc_between_class_variance(X, y) within_class_variance = self.calc_within_class_variance(X, y) tmp_matrix = mat( np.linalg.inv(within_class_variance)) * mat(between_class_variance) w, v = eigs(tmp_matrix, k=self.y_vals.size - 1) # print(w.real) return v.real
class Fisher_Projection (Classifier): def __init__(self, X, y): self.data_preprocessor = Data_Preprocessor(X) X = self.data_preprocessor.predict(X) y = np.copy(y) self.y_vals = np.unique(y) self.weight = self.calc_fisher_weight_vector(X, y) def predict(self, X): X = self.data_preprocessor.predict(X) return np.dot(X, self.weight) def validate(self): pass def calc_between_class_variance(self, X, y): num_obs, num_features = X.shape mu_all = np.mean(X, axis=0) between_class_variance = np.zeros((num_features, num_features)) for k in range(0, self.y_vals.size): index = y == self.y_vals[k] X_sub = X[index, :] mu = np.mean(X_sub, axis=0) between_class_variance += X_sub.shape[0] * np.outer(mu - mu_all, mu - mu_all) return between_class_variance def calc_within_class_variance(self, X, y): num_obs, num_features = X.shape within_class_var = np.zeros((num_features, num_features)) for k in range(0, self.y_vals.size): index = y == self.y_vals[k] X_sub = X[index, :] within_class_var += X_sub.shape[0] * np.cov(X_sub, rowvar=0, bias=1) # add an identity matrix to the variance matrix to fix its condition within_class_var += np.identity(num_features) return within_class_var def calc_fisher_weight_vector(self, X, y): between_class_variance = self.calc_between_class_variance(X, y) within_class_variance = self.calc_within_class_variance(X, y) tmp_matrix = mat(np.linalg.inv(within_class_variance)) * mat(between_class_variance) w, v = eigs(tmp_matrix, k=self.y_vals.size - 1) # print(w.real) return v.real
class Logistic_Regression(Classifier): def __init__(self, X, y, lambda_): # preprocess X self.data_preprocessor = Data_Preprocessor( X) # Data_Preprocessor will copy X X = self.data_preprocessor.predict(X) X = self.add_intercept(X) # preprocess y y = np.copy(y) self.y_vals = np.unique(y) # check number of classes here assert self.y_vals.size == 2 y[y == self.y_vals[0]] = -1 y[y == self.y_vals[1]] = +1 # train the model self.weight = self.lr_train(X, y, lambda_) def predict(self, X, output=0): X = self.data_preprocessor.predict(X) X = self.add_intercept(X) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) return predicted_class def validate(self, X, y, output=0): X = self.data_preprocessor.predict(X) X = self.add_intercept(X) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) prediction_error = self.calc_predict_error(predicted_class, y) return prediction_error def add_intercept(self, X): num_obs = X.shape[0] X_new = np.concatenate((np.ones((num_obs, 1)), X), axis=1) return X_new def lr_loss(self, w, X, y, lambda_): # y must be in {-1, +1} # the first column of X should be all 1 num_obs, num_features = X.shape loss = 0 grad = np.zeros((1, num_features)) H = -y * np.dot(X, w) H = [h if h > 10 else log(1 + exp(h)) for h in H] loss -= np.sum(H) loss -= lambda_ / 2 * np.dot(w[1:], w[1:]) return -loss def lr_gradient(self, w, X, y, lambda_): # y must be in {-1, +1} num_obs, num_features = X.shape grad = np.zeros((1, num_features)) grad += mat((1 - sigmoid(y * np.dot(X, w))) * y) * mat(X) # do not regularize intercep grad -= lambda_ * np.concatenate(([0], w[1:])) return -grad[0] def grad_check(self, w, X, y, lambda_): num_obs, num_features = X.shape grad0 = lr_gradient(w, X, y, lambda_) print(grad0) eps = 1e-05 grad1 = np.zeros_like(grad0) for i in range(0, num_features): delta = np.zeros_like(w) delta[i] = eps grad1[i] = (lr_loss(w + delta, X, y, lambda_) - lr_loss(w - delta, X, y, lambda_)) / 2 / eps print(np.linalg.norm(grad1 - grad0) / np.linalg.norm(grad0)) def lr_train(self, X, y, lambda_): # random initialization num_obs, num_features = X.shape w = (np.random.rand(num_features) - 0.5) * 2 lr_fmin_result = fmin(f=self.lr_loss, x0=w, fprime=self.lr_gradient, args=(X, y, lambda_), maxiter=50, disp=False) return lr_fmin_result def predict_score(self, X): return mat(X) * mat(self.weight).T def predict_class(self, predicted_score): return [ self.y_vals[0] if s < 0 else self.y_vals[1] for s in predicted_score ] def calc_predict_error(self, predicted_class, y): predicted_indicator = np.array( [predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size
class Logistic_Regression (Classifier): def __init__(self, X, y, lambda_): # preprocess X self.data_preprocessor = Data_Preprocessor(X) # Data_Preprocessor will copy X X = self.data_preprocessor.predict(X) X = self.add_intercept(X) # preprocess y y = np.copy(y) self.y_vals = np.unique(y) # check number of classes here assert self.y_vals.size == 2 y[y == self.y_vals[0]] = -1 y[y == self.y_vals[1]] = +1 # train the model self.weight = self.lr_train(X, y, lambda_) def predict(self, X, output=0): X = self.data_preprocessor.predict(X) X = self.add_intercept(X) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) return predicted_class def validate(self, X, y, output=0): X = self.data_preprocessor.predict(X) X = self.add_intercept(X) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) prediction_error = self.calc_predict_error(predicted_class, y) return prediction_error def add_intercept(self, X): num_obs = X.shape[0] X_new = np.concatenate((np.ones((num_obs, 1)), X), axis=1) return X_new def lr_loss(self, w, X, y, lambda_): # y must be in {-1, +1} # the first column of X should be all 1 num_obs, num_features = X.shape loss = 0 grad = np.zeros((1, num_features)) H = - y * np.dot(X, w) H = [h if h > 10 else log(1 + exp(h)) for h in H] loss -= np.sum(H) loss -= lambda_ / 2 * np.dot(w[1:], w[1:]) return -loss def lr_gradient(self, w, X, y, lambda_): # y must be in {-1, +1} num_obs, num_features = X.shape grad = np.zeros((1, num_features)) grad += mat((1 - sigmoid(y * np.dot(X, w))) * y) * mat(X) # do not regularize intercep grad -= lambda_ * np.concatenate(([0], w[1:])) return -grad[0] def grad_check(self, w, X, y, lambda_): num_obs, num_features = X.shape grad0 = lr_gradient(w, X, y, lambda_) print(grad0) eps = 1e-05 grad1 = np.zeros_like(grad0) for i in range(0, num_features): delta = np.zeros_like(w) delta[i] = eps grad1[i] = (lr_loss(w + delta, X, y, lambda_) - lr_loss(w - delta, X, y, lambda_)) / 2 / eps print(np.linalg.norm(grad1 - grad0) / np.linalg.norm(grad0)) def lr_train(self, X, y, lambda_): # random initialization num_obs, num_features = X.shape w = (np.random.rand(num_features) - 0.5) * 2 lr_fmin_result = fmin(f=self.lr_loss, x0=w, fprime=self.lr_gradient, args=(X, y, lambda_), maxiter=50, disp=False) return lr_fmin_result def predict_score(self, X): return mat(X) * mat(self.weight).T def predict_class(self, predicted_score): return [self.y_vals[0] if s < 0 else self.y_vals[1] for s in predicted_score] def calc_predict_error(self, predicted_class, y): predicted_indicator = np.array([predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size
class SVM_SMO (Classifier): """class SVM_SMO which implements the SMO algorithm for training (linear) SVM Attributes: data_preprocessor (Data_Preprocessor): a Data_Preprocessor instance loglist (list): store dual objective function value for each iteration X (numpy.array): Design Matrix y (numpy.array): Response Vector y_vals (numpy.array): class labels """ def __init__(self, X, y, C, iter_max): """initialzie the classifier and train the model Args: X (numpy.array): Design Matrix y (numpy.array): Response Vector C (float): penalty parameter iter_max (int): maximum interation for the algorithm """ self.data_preprocessor = Data_Preprocessor(X) self.X = self.data_preprocessor.predict(X) self.y = copy(y) self.y_vals = unique(self.y) self.y[self.y == self.y_vals[0]] = -1 self.y[self.y == self.y_vals[1]] = 1 self.loglist = [] self.alpha_array, self.b = self.train_svm(self.X, self.y, C, iter_max) def predict(self, X, output=0): """make prediction for the new Design Matrix X Args: X (numpy.array): new Design Matrix output (int, optional): Description Returns: numpy.array: vector of prediction class """ X = self.data_preprocessor.predict(X) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) return predicted_class def validate(self, X, y, output=0): """validate prediction result for the new Desgin Matrix X and new Response Vector y Args: X (numpy.array): new Design Matrix y (numpy.array): new Response Vector output (int, optional): Description Returns: float: prediction error on the new Inputs """ X = self.data_preprocessor.predict(X) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) prediction_error = self.calc_predict_error(predicted_class, y) return prediction_error def predict_score(self, X): """calculate prediction score for new Design Matrix X Args: X (numpy.array): new Design Matrix Returns: numpy.array: vector of prediction score """ return self.get_output(X, self.alpha_array, self.X, self.y, self.b) def predict_class(self, predicted_score): """predict the class label for each observation in the new Design Matrix X Args: predicted_score (numpy.array): vector of prediction score Returns: numpy.array: vector of predicted class label """ return [self.y_vals[0] if s < 0 else self.y_vals[1] for s in predicted_score] def calc_predict_error(self, predicted_class, y): """Calculate the prediction error Args: predicted_class (numpy.array): vector of predicted class label y (numpy.array): vector of true class label Returns: float: overall error rate """ predicted_indicator = array( [predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - sum(predicted_indicator) / y.size def get_output(self, x_new, alpha_array, X, y, b): """Calculate f(x_new) Args: x_new (numpy.array): new design matrix alpha_array (numpy.array): current alpha array X (numpy.array): Design Matrix y (numpy.array): Response Vector b (list): threshold Returns: numpy.array: f(x_new) """ signed_y = y * alpha_array return dot(dot(signed_y, X), x_new.T) - b def calc_objective_fast(self, alpha_array, y, K): """Calculate the dual objective function Args: alpha_array (numpy.array): current alpha array y (numpy.array): Response Vector K (numpy.array): kernel Matrix dot(X, X.T) Returns: float: dual objective function value """ signed_y = y * alpha_array return sum(alpha_array) - 0.5 * dot(dot(signed_y, K), signed_y) def calc_LH(self, alpha1, alpha2, y1, y2, C): """Calculate the lower and upper bound for new alpha2 Args: alpha1 (float): alpha2 (float): y1 (numpy.array): y2 (numpy.array): C (float): Returns: (L, H): """ L = H = 0 if y1 != y2: L = maximum(0, alpha2 - alpha1) H = minimum(C, C + alpha2 - alpha1) else: L = maximum(0, alpha1 + alpha2 - C) H = minimum(C, alpha1 + alpha2) return (L, H) def choose_i1(self, i2, alpha_index_nonbound, alpha_array, X, y, b): """Heuristically choose alpha1 to optimize given alpha2 Args: i2 (int): index of alpha2 alpha_index_nonbound (numpy.array): array of nonbound alpha's index alpha_array (numpy.array): current alpha array X (numpy.array): y (numpy.array): b (list): Returns: int: index of alpha1 """ E2 = self.get_output(X[i2, :], alpha_array, X, y, b) - y[i2] E1_array = self.get_output( X[alpha_index_nonbound, :], alpha_array, X, y, b) - y[alpha_index_nonbound] index = argmax(fabs(E1_array - E2[0])) return alpha_index_nonbound[index] def update_b(self, b_old, alpha1, alpha2, a1, a2, x1, x2, y1, y2, E1, E2, C): """update the threshold parameter b Args: b_old (list): alpha1 (float): alpha2 (float): a1 (float): a2 (float): x1 (numpy.array): x2 (numpy.array): y1 (int): y2 (int): E1 (float): E2 (float): Returns: float: updated parameter b """ b1 = E1 + y1 * (a1 - alpha1) * dot(x1, x1) + y2 * \ (a2 - alpha2) * dot(x1, x2) + b_old b2 = E2 + y1 * (a1 - alpha1) * dot(x1, x2) + y2 * \ (a2 - alpha2) * dot(x2, x2) + b_old if (a1 == C or a1 == 0) and (a2 == C or a2 == 0): return (b1 + b2) / 2 elif a1 == C or a1 == 0: return b2 else: return b1 def take_step(self, i1, i2, alpha_array, X, y, C, b, K): """Optimize given alpha1 and alpha2 Args: i1 (int): index of alpha1 i2 (int): index of alpha2 alpha_array (numpy.array): current alpha array X (numpy.array): y (numpy.array): C (float): b (list): K (numpy.array): Returns: boolean: True if we optimize the alpha pairs, otherwise False """ if i1 == i2: return False eps = 1e-05 alpha1, alpha2 = alpha_array[[i1, i2]] y1, y2 = y[[i1, i2]] X1, X2 = X[[i1, i2]] E1 = self.get_output(X1, alpha_array, X, y, b) - y1 E2 = self.get_output(X2, alpha_array, X, y, b) - y2 s = y1 * y2 L, H = self.calc_LH(alpha1, alpha2, y1, y2, C) if fabs(L - H) < eps: return False k11 = dot(X1, X1) k12 = dot(X1, X2) k22 = dot(X2, X2) eta = 2 * k12 - k11 - k22 if eta < 0: a2 = alpha2 - y2 * (E1 - E2) / eta if a2 < L: a2 = L elif a2 > H: a2 = H else: alpha_array[i2] = L alpha_array[i1] = alpha1 + s * (alpha2 - L) Lobj = self.calc_objective_fast(alpha_array, X, y, K) alpha_array[i2] = H alpha_array[i1] = alpha1 + s * (alpha2 - H) Hobj = self.calc_objective_fast(alpha_array, X, y, K) alpha_array[i1] = alpha1 alpha_array[i2] = alpha2 if Lobj > Hobj + eps: a2 = L elif Lobj < Hobj - eps: a2 = H else: a2 = alpha2 if a2 < eps: a2 = 0 elif a2 > C - eps: a2 = C if fabs(a2 - alpha2) < eps * (a2 + alpha2 + eps): return False a1 = alpha1 + s * (alpha2 - a2) # update b b_old = b[0] b_new = self.update_b(b_old, alpha1, alpha2, a1, a2, X1, X2, y1, y2, E1, E2, C) b[0] = b_new[0] alpha_array[i1] = a1 alpha_array[i2] = a2 return True def examine_example(self, i2, alpha_array, X, y, C, b, K): """Given alpha2, find alpha1 and optimize them Args: i2 (index): index of alpha2 alpha_array (numpy.array): current array of alpha X (numpy.array): y (numpy.array): C (float): b (list): K (numpy.array): Returns: boolean: True if we optimize alpha2, otherwise False """ n, p = X.shape y2 = y[i2] alpha2 = alpha_array[i2] X2 = X[i2] E2 = self.get_output(X2, alpha_array, X, y, b) - y2 r2 = E2 * y2 tol = 1e-03 if (r2 < -tol and alpha2 < C) or (r2 > tol and alpha2 > 0): # find thoses nonbound alphas alpha_index_nonbound = [i for i in range(n) if alpha_array[i] != 0 and alpha_array[i] != C] num_nonbound = len(alpha_index_nonbound) if num_nonbound > 1: # heuristicly choose i1 i1 = self.choose_i1( i2, alpha_index_nonbound, alpha_array, X, y, b) if i1 >= 0 and self.take_step(i1, i2, alpha_array, X, y, C, b, K): return True # iterate over all nonbound alphas using random start index start_index = choice(len(alpha_index_nonbound), 1) alpha_index_nonbound_modified = concatenate( (alpha_index_nonbound[start_index:], alpha_index_nonbound[0:start_index])) for i1 in alpha_index_nonbound_modified: if self.take_step(i1, i2, alpha_array, X, y, C, b, K): return True # iterate over all alphas using random start index start_index = choice(n, 1) alpha_index_modified = concatenate( (arange(start_index, n), arange(start_index, 2))) for i1 in alpha_index_modified: if self.take_step(i1, i2, alpha_array, X, y, C, b, K): return True return False def train_svm(self, X, y, C, iter_max): """Train SVM using SMO algorithm Args: X (numpy.array): Design Matrix y (numpy.array): Response Vector {-1, 1} C (float): penalty parameter iter_max (int): maximum number of iterations Returns: (numpy.array, list): (alpha_array, b), trained parameters """ n, p = X.shape K = dot(X, X.T) alpha_array = zeros(n) b = [0] num_changed = 0 examine_all = True while num_changed > 0 or examine_all: num_changed = 0 if examine_all: alpha_index = permutation(range(n)) for i2 in alpha_index: if self.examine_example(i2, alpha_array, X, y, C, b, K): num_changed += 1 self.loglist.append( self.calc_objective_fast(alpha_array, y, K)) iter_max -= 1 if iter_max < 0: break if iter_max < 0: break else: alpha_index_nonbound = [i for i in range(n) if alpha_array[i] != 0 and alpha_array[i] != C] alpha_index_nonbound = permutation(alpha_index_nonbound) for i2 in alpha_index_nonbound: if self.examine_example(i2, alpha_array, X, y, C, b, K): num_changed += 1 self.loglist.append( self.calc_objective_fast(alpha_array, y, K)) iter_max -= 1 if iter_max < 0: break if iter_max < 0: break # stop if the number of changed alphas are less than n / 10 if num_changed < n / 10: break if examine_all: examine_all = False elif num_changed == 0: examine_all = True return (alpha_array, b)
class Naive_Bayes (Classifier): def __init__(self, X, y): # Data_Preprocessor will copy X self.data_preprocessor = Data_Preprocessor(X) X = self.data_preprocessor.predict(X) y = np.copy(y) self.y_vals = np.unique(y) self.mean_array, self.std_array = self.estimate_mean_std(X, y) self.prior = self.calc_prior(y) def predict(self, X_new, output=0): # used for making prediction X_new = self.data_preprocessor.predict(X_new) predicted_score = self.predict_score(X_new) predicted_class = self.predict_class(predicted_score) return predicted_class def validate(self, X_new, y_new, output=0): # used for validating the prediction performance X_new = self.data_preprocessor.predict(X_new) predicted_score = self.predict_score(X_new) predicted_class = self.predict_class(predicted_score) prediction_error = self.calc_predict_error(predicted_class, y_new) return prediction_error def estimate_mean_std(self, X, y): num_obs, num_features = X.shape num_classes = self.y_vals.size mean_array = np.zeros((num_classes, num_features)) std_array = np.zeros((num_classes, num_features)) for k in range(0, num_classes): index = y == self.y_vals[k] X_sub = X[index, :] mean_array[k, :] = np.mean(X_sub, axis=0) std_array[k, :] = np.std(X_sub, axis=0, ddof=1) std_array[std_array < 1e-03] = 1e-03 # print(mean_array) # print(std_array) return (mean_array, std_array) def calc_prior(self, y): prior = [np.sum(y == y_val) / y.size for y_val in self.y_vals] return prior def predict_score(self, X_new): num_obs, num_features = X_new.shape num_classes = self.mean_array.shape[0] ans = np.zeros((num_obs, num_classes)) for k in range(0, num_classes): for j in range(0, num_features): ans[:, k] += norm.logpdf(X_new[:, j], loc=self.mean_array[k, j], scale=self.std_array[k, j]) log_prior = [log(p) for p in self.prior] ans += log_prior return ans def predict_class(self, predicted_score): max_indicator = np.argmax(predicted_score, axis=1) return np.array([self.y_vals[i] for i in max_indicator]) def calc_predict_error(self, predicted_class, y): predicted_indicator = np.array([predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size
class Naive_Bayes(Classifier): def __init__(self, X, y): # Data_Preprocessor will copy X self.data_preprocessor = Data_Preprocessor(X) X = self.data_preprocessor.predict(X) y = np.copy(y) self.y_vals = np.unique(y) self.mean_array, self.std_array = self.estimate_mean_std(X, y) self.prior = self.calc_prior(y) def predict(self, X_new, output=0): # used for making prediction X_new = self.data_preprocessor.predict(X_new) predicted_score = self.predict_score(X_new) predicted_class = self.predict_class(predicted_score) return predicted_class def validate(self, X_new, y_new, output=0): # used for validating the prediction performance X_new = self.data_preprocessor.predict(X_new) predicted_score = self.predict_score(X_new) predicted_class = self.predict_class(predicted_score) prediction_error = self.calc_predict_error(predicted_class, y_new) return prediction_error def estimate_mean_std(self, X, y): num_obs, num_features = X.shape num_classes = self.y_vals.size mean_array = np.zeros((num_classes, num_features)) std_array = np.zeros((num_classes, num_features)) for k in range(0, num_classes): index = y == self.y_vals[k] X_sub = X[index, :] mean_array[k, :] = np.mean(X_sub, axis=0) std_array[k, :] = np.std(X_sub, axis=0, ddof=1) std_array[std_array < 1e-03] = 1e-03 # print(mean_array) # print(std_array) return (mean_array, std_array) def calc_prior(self, y): prior = [np.sum(y == y_val) / y.size for y_val in self.y_vals] return prior def predict_score(self, X_new): num_obs, num_features = X_new.shape num_classes = self.mean_array.shape[0] ans = np.zeros((num_obs, num_classes)) for k in range(0, num_classes): for j in range(0, num_features): ans[:, k] += norm.logpdf(X_new[:, j], loc=self.mean_array[k, j], scale=self.std_array[k, j]) log_prior = [log(p) for p in self.prior] ans += log_prior return ans def predict_class(self, predicted_score): max_indicator = np.argmax(predicted_score, axis=1) return np.array([self.y_vals[i] for i in max_indicator]) def calc_predict_error(self, predicted_class, y): predicted_indicator = np.array( [predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - np.sum(predicted_indicator) / y.size
class SVM_SMO(Classifier): """class SVM_SMO which implements the SMO algorithm for training (linear) SVM Attributes: data_preprocessor (Data_Preprocessor): a Data_Preprocessor instance loglist (list): store dual objective function value for each iteration X (numpy.array): Design Matrix y (numpy.array): Response Vector y_vals (numpy.array): class labels """ def __init__(self, X, y, C, iter_max): """initialzie the classifier and train the model Args: X (numpy.array): Design Matrix y (numpy.array): Response Vector C (float): penalty parameter iter_max (int): maximum interation for the algorithm """ self.data_preprocessor = Data_Preprocessor(X) self.X = self.data_preprocessor.predict(X) self.y = copy(y) self.y_vals = unique(self.y) self.y[self.y == self.y_vals[0]] = -1 self.y[self.y == self.y_vals[1]] = 1 self.loglist = [] self.alpha_array, self.b = self.train_svm(self.X, self.y, C, iter_max) def predict(self, X, output=0): """make prediction for the new Design Matrix X Args: X (numpy.array): new Design Matrix output (int, optional): Description Returns: numpy.array: vector of prediction class """ X = self.data_preprocessor.predict(X) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) return predicted_class def validate(self, X, y, output=0): """validate prediction result for the new Desgin Matrix X and new Response Vector y Args: X (numpy.array): new Design Matrix y (numpy.array): new Response Vector output (int, optional): Description Returns: float: prediction error on the new Inputs """ X = self.data_preprocessor.predict(X) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) prediction_error = self.calc_predict_error(predicted_class, y) return prediction_error def predict_score(self, X): """calculate prediction score for new Design Matrix X Args: X (numpy.array): new Design Matrix Returns: numpy.array: vector of prediction score """ return self.get_output(X, self.alpha_array, self.X, self.y, self.b) def predict_class(self, predicted_score): """predict the class label for each observation in the new Design Matrix X Args: predicted_score (numpy.array): vector of prediction score Returns: numpy.array: vector of predicted class label """ return [ self.y_vals[0] if s < 0 else self.y_vals[1] for s in predicted_score ] def calc_predict_error(self, predicted_class, y): """Calculate the prediction error Args: predicted_class (numpy.array): vector of predicted class label y (numpy.array): vector of true class label Returns: float: overall error rate """ predicted_indicator = array( [predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - sum(predicted_indicator) / y.size def get_output(self, x_new, alpha_array, X, y, b): """Calculate f(x_new) Args: x_new (numpy.array): new design matrix alpha_array (numpy.array): current alpha array X (numpy.array): Design Matrix y (numpy.array): Response Vector b (list): threshold Returns: numpy.array: f(x_new) """ signed_y = y * alpha_array return dot(dot(signed_y, X), x_new.T) - b def calc_objective_fast(self, alpha_array, y, K): """Calculate the dual objective function Args: alpha_array (numpy.array): current alpha array y (numpy.array): Response Vector K (numpy.array): kernel Matrix dot(X, X.T) Returns: float: dual objective function value """ signed_y = y * alpha_array return sum(alpha_array) - 0.5 * dot(dot(signed_y, K), signed_y) def calc_LH(self, alpha1, alpha2, y1, y2, C): """Calculate the lower and upper bound for new alpha2 Args: alpha1 (float): alpha2 (float): y1 (numpy.array): y2 (numpy.array): C (float): Returns: (L, H): """ L = H = 0 if y1 != y2: L = maximum(0, alpha2 - alpha1) H = minimum(C, C + alpha2 - alpha1) else: L = maximum(0, alpha1 + alpha2 - C) H = minimum(C, alpha1 + alpha2) return (L, H) def choose_i1(self, i2, alpha_index_nonbound, alpha_array, X, y, b): """Heuristically choose alpha1 to optimize given alpha2 Args: i2 (int): index of alpha2 alpha_index_nonbound (numpy.array): array of nonbound alpha's index alpha_array (numpy.array): current alpha array X (numpy.array): y (numpy.array): b (list): Returns: int: index of alpha1 """ E2 = self.get_output(X[i2, :], alpha_array, X, y, b) - y[i2] E1_array = self.get_output(X[alpha_index_nonbound, :], alpha_array, X, y, b) - y[alpha_index_nonbound] index = argmax(fabs(E1_array - E2[0])) return alpha_index_nonbound[index] def update_b(self, b_old, alpha1, alpha2, a1, a2, x1, x2, y1, y2, E1, E2, C): """update the threshold parameter b Args: b_old (list): alpha1 (float): alpha2 (float): a1 (float): a2 (float): x1 (numpy.array): x2 (numpy.array): y1 (int): y2 (int): E1 (float): E2 (float): Returns: float: updated parameter b """ b1 = E1 + y1 * (a1 - alpha1) * dot(x1, x1) + y2 * \ (a2 - alpha2) * dot(x1, x2) + b_old b2 = E2 + y1 * (a1 - alpha1) * dot(x1, x2) + y2 * \ (a2 - alpha2) * dot(x2, x2) + b_old if (a1 == C or a1 == 0) and (a2 == C or a2 == 0): return (b1 + b2) / 2 elif a1 == C or a1 == 0: return b2 else: return b1 def take_step(self, i1, i2, alpha_array, X, y, C, b, K): """Optimize given alpha1 and alpha2 Args: i1 (int): index of alpha1 i2 (int): index of alpha2 alpha_array (numpy.array): current alpha array X (numpy.array): y (numpy.array): C (float): b (list): K (numpy.array): Returns: boolean: True if we optimize the alpha pairs, otherwise False """ if i1 == i2: return False eps = 1e-05 alpha1, alpha2 = alpha_array[[i1, i2]] y1, y2 = y[[i1, i2]] X1, X2 = X[[i1, i2]] E1 = self.get_output(X1, alpha_array, X, y, b) - y1 E2 = self.get_output(X2, alpha_array, X, y, b) - y2 s = y1 * y2 L, H = self.calc_LH(alpha1, alpha2, y1, y2, C) if fabs(L - H) < eps: return False k11 = dot(X1, X1) k12 = dot(X1, X2) k22 = dot(X2, X2) eta = 2 * k12 - k11 - k22 if eta < 0: a2 = alpha2 - y2 * (E1 - E2) / eta if a2 < L: a2 = L elif a2 > H: a2 = H else: alpha_array[i2] = L alpha_array[i1] = alpha1 + s * (alpha2 - L) Lobj = self.calc_objective_fast(alpha_array, X, y, K) alpha_array[i2] = H alpha_array[i1] = alpha1 + s * (alpha2 - H) Hobj = self.calc_objective_fast(alpha_array, X, y, K) alpha_array[i1] = alpha1 alpha_array[i2] = alpha2 if Lobj > Hobj + eps: a2 = L elif Lobj < Hobj - eps: a2 = H else: a2 = alpha2 if a2 < eps: a2 = 0 elif a2 > C - eps: a2 = C if fabs(a2 - alpha2) < eps * (a2 + alpha2 + eps): return False a1 = alpha1 + s * (alpha2 - a2) # update b b_old = b[0] b_new = self.update_b(b_old, alpha1, alpha2, a1, a2, X1, X2, y1, y2, E1, E2, C) b[0] = b_new[0] alpha_array[i1] = a1 alpha_array[i2] = a2 return True def examine_example(self, i2, alpha_array, X, y, C, b, K): """Given alpha2, find alpha1 and optimize them Args: i2 (index): index of alpha2 alpha_array (numpy.array): current array of alpha X (numpy.array): y (numpy.array): C (float): b (list): K (numpy.array): Returns: boolean: True if we optimize alpha2, otherwise False """ n, p = X.shape y2 = y[i2] alpha2 = alpha_array[i2] X2 = X[i2] E2 = self.get_output(X2, alpha_array, X, y, b) - y2 r2 = E2 * y2 tol = 1e-03 if (r2 < -tol and alpha2 < C) or (r2 > tol and alpha2 > 0): # find thoses nonbound alphas alpha_index_nonbound = [ i for i in range(n) if alpha_array[i] != 0 and alpha_array[i] != C ] num_nonbound = len(alpha_index_nonbound) if num_nonbound > 1: # heuristicly choose i1 i1 = self.choose_i1(i2, alpha_index_nonbound, alpha_array, X, y, b) if i1 >= 0 and self.take_step(i1, i2, alpha_array, X, y, C, b, K): return True # iterate over all nonbound alphas using random start index start_index = choice(len(alpha_index_nonbound), 1) alpha_index_nonbound_modified = concatenate( (alpha_index_nonbound[start_index:], alpha_index_nonbound[0:start_index])) for i1 in alpha_index_nonbound_modified: if self.take_step(i1, i2, alpha_array, X, y, C, b, K): return True # iterate over all alphas using random start index start_index = choice(n, 1) alpha_index_modified = concatenate( (arange(start_index, n), arange(start_index, 2))) for i1 in alpha_index_modified: if self.take_step(i1, i2, alpha_array, X, y, C, b, K): return True return False def train_svm(self, X, y, C, iter_max): """Train SVM using SMO algorithm Args: X (numpy.array): Design Matrix y (numpy.array): Response Vector {-1, 1} C (float): penalty parameter iter_max (int): maximum number of iterations Returns: (numpy.array, list): (alpha_array, b), trained parameters """ n, p = X.shape K = dot(X, X.T) alpha_array = zeros(n) b = [0] num_changed = 0 examine_all = True while num_changed > 0 or examine_all: num_changed = 0 if examine_all: alpha_index = permutation(range(n)) for i2 in alpha_index: if self.examine_example(i2, alpha_array, X, y, C, b, K): num_changed += 1 self.loglist.append( self.calc_objective_fast(alpha_array, y, K)) iter_max -= 1 if iter_max < 0: break if iter_max < 0: break else: alpha_index_nonbound = [ i for i in range(n) if alpha_array[i] != 0 and alpha_array[i] != C ] alpha_index_nonbound = permutation(alpha_index_nonbound) for i2 in alpha_index_nonbound: if self.examine_example(i2, alpha_array, X, y, C, b, K): num_changed += 1 self.loglist.append( self.calc_objective_fast(alpha_array, y, K)) iter_max -= 1 if iter_max < 0: break if iter_max < 0: break # stop if the number of changed alphas are less than n / 10 if num_changed < n / 10: break if examine_all: examine_all = False elif num_changed == 0: examine_all = True return (alpha_array, b)
class SVM_SGD (Classifier): """class SVM_SGD which implements the Pegasos algorithm for training linear SVM Attributes: data_preprocessor (Data_Preprocessor): a Data_Preprocessor instance loglist (list): store primal objective function value for each iteration weight (numpy.array): trained weight y_vals (numpy.array): class labels """ def __init__(self, X, y, para_lambda, k): """initialzie the classifier and train the model Args: X (numpy.array): Design Matrix y (numpy.array): Response Vector para_lambda (float): regularization parameter k (int): maximum training sample size for each iteration """ assert k > 0 self.data_preprocessor = Data_Preprocessor(X) X = self.data_preprocessor.predict(X) y = copy(y) self.y_vals = unique(y) y[y == self.y_vals[0]] = -1 y[y == self.y_vals[1]] = 1 self.loglist = [] self.weight = self.calc_weight(X, y, para_lambda, k) def predict(self, X, output=0): """make prediction for the new Design Matrix X Args: X (numpy.array): new Design Matrix output (int, optional): Description Returns: numpy.array: vector of prediction class """ X = self.data_preprocessor.predict(X) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) return predicted_class def validate(self, X, y, output=0): """validate prediction result for the new Desgin Matrix X and new Response Vector y Args: X (numpy.array): new Design Matrix y (numpy.array): new Response Vector output (int, optional): Description Returns: float: prediction error on the new Inputs """ X = self.data_preprocessor.predict(X) predicted_score = self.predict_score(X) predicted_class = self.predict_class(predicted_score) prediction_error = self.calc_predict_error(predicted_class, y) return prediction_error def predict_score(self, X): """calculate prediction score for new Design Matrix X Args: X (numpy.array): new Design Matrix Returns: numpy.array: vector of prediction score """ return dot(X, self.weight) def predict_class(self, predicted_score): """predict the class label for each observation in the new Design Matrix X Args: predicted_score (numpy.array): vector of prediction score Returns: numpy.array: vector of predicted class label """ return [self.y_vals[0] if s < 0 else self.y_vals[1] for s in predicted_score] def calc_predict_error(self, predicted_class, y): """Calculate the prediction error Args: predicted_class (numpy.array): vector of predicted class label y (numpy.array): vector of true class label Returns: float: overall error rate """ predicted_indicator = array( [predicted_class[i] == y[i] for i in range(0, y.size)]) return 1 - sum(predicted_indicator) / y.size def calc_weight(self, X, y, para_lambda, k): """estimate the weight vector given X, y, para_lambda and k Args: X (numpy.array): Design Matrix y (numpy.array): Response Vector para_lambda (float): regularization parameter k (int): maximum training sample size for each iteration Returns: numpy.array: the trained weight vector """ n, p = X.shape weight = self.initialize_weight(p, para_lambda) for i in range(1, 10000): X_work, y_work = self.select_workset(X, y, weight, k) self.loglist.append(self.calc_loss_function( X, y, weight, para_lambda)) weight_new = self.update_weight( X_work, y_work, weight, para_lambda, k, i) if sum((weight_new - weight) ** 2) < 0.01: break else: weight = weight_new return weight def select_workset(self, X, y, weight, k): """Select training set for each iteration Args: X (numpy.array): Design Matrix y (numpy.array): Response Vector weight (numpy.array): weight vector k (int): maximum training sample size for each iteration Returns: (numpy.array, numpy.array): (X_train, y_train) """ n, p = X.shape index = array([]) while index.size == 0: index = choice(n, k) X_sub = X[index, :] y_sub = y[index] sub_index = (dot(X_sub, weight) * y_sub) < 1 index = index[sub_index] return (X[index, :], y[index]) def initialize_weight(self, p, para_lambda): """initialize the weight vector Args: p (int): number of features in the Design Matrix para_lambda (float): regularization parameter Returns: numpy.array: a satisfactory weight vector """ weight = zeros(p) weight.fill(sqrt(1 / (p * para_lambda))) neg_index = choice(p, size=(int)(p / 2)) weight[neg_index] = -weight[neg_index] return weight def update_weight(self, X, y, weight, para_lambda, k, iter_num): """update the weight vector Args: X (numpy.array): Design Matrix y (numpy.array): Response Vector weight (numpy.array): weight vector para_lambda (float): regularization parameter k (int): maximum training sample size for each iteration iter_num (int): current iteration number Returns: numpy.array: an updated weight vector """ eta = 1 / (para_lambda * iter_num) # step size weight_half = (1 - eta * para_lambda) * weight + eta / k * dot(y, X) if sum(weight_half ** 2) < 1e-07: weight_half = maximum(weight_half, 1e-04) weight_new = minimum(1, 1 / sqrt(para_lambda) / sqrt(sum(weight_half ** 2))) * weight_half return weight_new def calc_loss_function(self, X, y, weight, para_lambda): """calcualte the primal objective function for linear SVM Args: X (numpy.array): Design Matrix y (numpy.array): Response Vector weight (numpy.array): weight parameter para_lambda (float): regularization parameter Returns: float: current value of the primal objective function """ n, p = X.shape tmp_loss = 1 - y * dot(X, weight) loss = sum(tmp_loss[tmp_loss > 0]) / n + \ para_lambda / 2 * dot(weight, weight) return loss