def main(): # If you do not have an existing feature vector data_set = "../machine_learning/income-data/income.train.txt.5k" X, Y, features = get_binary_features(data_set) # If you have an existing feature vector you want to compute against data_set = "../machine_learning/income-data/income.dev.txt" X_dev, Y_dev, features = get_binary_features(data_set, features)
def main(): data_set = "../income-data/income.train.txt" X, Y, features = get_binary_features(data_set) print (X.shape) data_set = "../income-data/income.dev.txt" X_dev, Y_dev, features = get_binary_features(data_set, features) print (X_dev.shape) perceptron = Perceptron(feature_size=len(X[0,:])) # print ("Before training:") # print(perceptron.test(X, Y)) # perceptron.train(X,Y) # print("After (batch) training:") # print(perceptron.test(X, Y)) perceptron.reset() for j in range(1): for i in range(len(X[:,0])): perceptron.train_online(X[i,:], Y[i]) print ("After single training: ") print(perceptron.test(X,Y)) print ("Average") perceptron.reset() perceptron.average_train(X, Y, maxIter=5) print ("Naive average (with maximum iterations)") perceptron.reset() perceptron.naive_average_train(X, Y, maxIter=10) print(perceptron.test(X,Y)) print ("MIRA") mira = Perceptron(feature_size=len(X[0,:]), mira_aggro=0.0) for j in range(10): for i in range(len(X[:,0])): mira.train_mira(X[i,:], Y[i]) print(mira.test(X,Y)) print ("MIRA Average") mira.reset() mira.train_mira_average(X,Y, maxIter=5) print(mira.test(X,Y))
def main(): data_set = "../income-data/income.train.txt" X, Y, features = get_binary_features(data_set) print(X.shape) data_set = "../income-data/income.dev.txt" X_dev, Y_dev, features = get_binary_features(data_set, features) print(X_dev.shape) # data_set = "../income-data/income.train.txt" # X, Y, features = get_numbered_features(data_set) # print X.shape # data_set = "../income-data/income.dev.txt" # X_dev, Y_dev, features = get_numbered_features(data_set, features) # print X_dev.shape # data_set = "../income-data/income.train.txt" # X, Y, features = get_numbered_binary_features(data_set) # print X.shape # data_set = "../income-data/income.dev.txt" # X_dev, Y_dev, features = get_numbered_binary_features(data_set, features) # print X_dev.shape # data_set = "../income-data/income.train.txt" # X, Y, features = get_binned_features(data_set) # print X.shape # data_set = "../income-data/income.dev.txt" # X_dev, Y_dev, features = get_binned_features(data_set, features) # print X_dev.shape # data_set = "../income-data/income.train.txt" # X, Y, features = get_num_ed_features(data_set) # print X.shape # data_set = "../income-data/income.dev.txt" # X_dev, Y_dev, features = get_num_ed_features(data_set, features) # print X_dev.shape print("---------------------------------------------------------------") print("---------------------------------------------------------------") perceptron = Perceptron(feature_size=len(X[0, :])) perceptron.reset() epochs = 5 count = 0 max_score = 0 max_score_epoch = 0 err_rate = 0 best_err_rate = 0 err_rate_list = np.array([]) print("START PERCEPTRON") for j in range(epochs): print("EPOCH ", j + 1) for i in range(len(X[:, 0])): perceptron.train_online(X[i, :], Y[i]) count += 1 if count % 1000 == 0: score = perceptron.test(X_dev, Y_dev) err_rate = (1.0 - score) * 100 err_rate_list = np.append(err_rate_list, err_rate) epoch_v = (1.0 * j) + ((1.0 * i) / len(X[:, 0])) print("Epoch: ", epoch_v, "Score: ", score, "Error Rate: ", err_rate) if max_score < score: max_score = score best_err_rate = (1.0 - max_score) * 100 max_score_epoch = (1.0 * j) + ((1.0 * i) / len(X[:, 0])) avg_err_rate = np.average(err_rate_list) print("Average Error Rate: ", avg_err_rate) print("Max Score: ", max_score) print("Best Error Rate: ", best_err_rate) print("At Epoch: ", max_score_epoch) print("END PERCEPTRON") print("---------------------------------------------------------------") print("START NAIVE AVERAGE PERCEPTRON") perceptron.reset() na_score = 0 na_err_rate = 0 perceptron.naive_average_train(X, Y, maxIter=5) na_score = perceptron.test(X_dev, Y_dev) * 100 na_err_rate = 100 - na_score print("Score: ", na_score, "Error Rate: ", na_err_rate) print("END NAIVE AVERAGE PERCEPTRON") print("---------------------------------------------------------------") print("START SMART AVERAGE PERCEPTRON") perceptron.reset() sa_score = 0 sa_err_rate = 0 perceptron.average_train(X, Y, maxIter=5) sa_score = perceptron.test(X_dev, Y_dev) * 100 sa_err_rate = 100 - sa_score print("Score: ", sa_score, "Error Rate: ", sa_err_rate) print("END SMART AVERAGE PERCEPTRON") print("---------------------------------------------------------------") print("START MIRA") perceptron.reset() mira_score = 0 mira_err_rate = 0 mira = Perceptron(feature_size=len(X[0, :]), mira_aggro=0.0) for j in range(5): for i in range(len(X[:, 0])): mira.train_mira(X[i, :], Y[i]) mira_score = mira.test(X_dev, Y_dev) * 100 mira_err_rate = 100 - mira_score print("Score: ", mira_score, "Error Rate: ", mira_err_rate) print("END MIRA") # print("---------------------------------------------------------------") # # print("START AVERAGE MIRA") # # mira.reset() # # a_mira_score = 0 # a_mira_err_rate = 0 # # mira.train_mira_average(X, Y, maxIter=5) # a_mira_score = mira.test(X_dev, Y_dev) * 100 # a_mira_err_rate = 100 - a_mira_score # print("Score: ", a_mira_score, "Error Rate: ", a_mira_err_rate) # # print("END AVERAGE MIRA") print("---------------------------------------------------------------") print("---------------------------------------------------------------")
from data_reader import get_binary_features if __name__ == "__main__": # If you do not have an existing feature vector data_set = "../income-data/income.train.txt.5k" X, Y, features = get_binary_features(data_set) # If you have an existing feature vector you want to compute against data_set = "../income-data/income.dev.txt" X_dev, Y_dev, features = get_binary_features(data_set, features)
def main(C=1): # If you do not have an existing feature vector data_set = "../machine_learning/income-data/income.train.txt.5k" X, Y, features = get_binary_features(data_set) # If you have an existing feature vector you want to compute against data_set = "../machine_learning/income-data/income.dev.txt" X_dev, Y_dev, features = get_binary_features(data_set, features) Y = np.ravel(Y) Y_dev = np.ravel(Y_dev) # ----------------------------------------------------------------------------- start = time.time() clf = svm.SVC(kernel='linear', C=C) clf.fit(X, Y) end = time.time() # ----------------------------------------------------------------------------- supp_vec = clf.support_vectors_ num_supp_vec = clf.n_support_ train_score = clf.score(X, Y) train_error = 1.0 - train_score dev_score = clf.score(X_dev, Y_dev) dev_error = 1.0 - dev_score t = end - start # ----------------------------------------------------------------------------- w = clf.coef_ a = clf.dual_coef_ sum_xi = 0.0 xi_array = np.zeros((len(supp_vec[:, 0]), 1), dtype=np.float64) for i in range(len(supp_vec[:, 0])): xi = 1 - (1.0 * Y[clf.support_[i]] * np.inner(w, X[clf.support_[i], :])) xi_array[i, 0] = xi print("------------------Xi-------------------") print(xi_array.shape) print(xi_array) np.set_printoptions(threshold=np.nan) abs_xi_array = np.transpose(abs(xi_array)) print(abs_xi_array.argsort()) # print(abs_xi_array) print(xi_array[723, 0]) print(xi_array[232, 0]) print(xi_array[873, 0]) print(xi_array[107, 0]) print(xi_array[699, 0])