def main(args): lr = args.lr n_hu = args.hu epoch = args.epoch train = args.train test = args.test test = test.replace('\r','') #Removes the carriage return cuz I use windows data_origin = test.replace('_test.json','') data_origin = data_origin.replace('data/','') #Load the data in and convert to numpy with open(train,"r") as read_file: train = json.load(read_file) train = json2numpy(train) train = train.train_pp() with open(test,"r") as read_file: test = json.load(read_file) test = json2numpy(test) test = test.test_pp(train) xlist = [] trainlist = [] testlist = [] if n_hu > 0: title = 'Neural Network: F1 vs. Epochs' filename = 'nnet_f1_curve.png' caption = ('Dataset: ' + data_origin + ', Learning rate: ' + str(lr) + ' , Number of hidden units: ' + str(n_hu) + ', Maximum number of epochs: ' + str(epoch)) for e in range(1,epoch+1): xlist.append(e) x = 'nn' w = nn_train(lr,n_hu,e,train, screen_print = 0) po = nn_predict(test,w, screen_print = 0) testlist.append(po[1]) po = nn_predict(train,w, screen_print = 0) trainlist.append(po[1]) else: title = 'Logistic Regression: F1 vs. Epochs' filename = 'logistic_f1_curve.png' caption = ('Dataset: ' + data_origin + ', Learning rate: ' + str(lr) + ', Maximum number of epochs: ' + str(epoch)) for e in range(1,epoch+1): xlist.append(e) x = 'lr' w = lr_train(lr,e, train, screen_print = 0) po = lr_predict(test,w, screen_print = 0) testlist.append(po[1]) po = lr_predict(train,w, screen_print = 0) trainlist.append(po[1]) #Plot the data plt.plot(xlist,trainlist) plt.plot(xlist,testlist) plt.legend(['Train','Test'], loc = "lower right") plt.xlabel('Number of epochs') plt.ylabel('F1') plt.ylim([0.0, 1.05]) plt.xlim([0.0, epoch+5]) plt.title(caption, fontsize = 9, wrap = True) plt.suptitle(title) plt.savefig('./figs/'+filename)
def main(args): method = args.method trees = args.trees depth = args.depth train = args.train test = args.test test = test.replace('\r','') #Removes the carriage return cuz I use windows #Load the data in and convert to numpy with open(train,"r") as read_file: train = json.load(read_file) train = json2numpy(train) with open(test,"r") as read_file: test = json.load(read_file) test = json2numpy(test) #Pull the true labels truth = test.labels #Generate the predictions from the ensemble if method == 'bag': predictions = bootstrap(trees, depth, train, test, display = False) elif method == 'boost': predictions = adaboost(trees, depth, train, test, display = False) else: print('Invalid ensemble method.') build_confusion_matrix(test, predictions, verbose = True)
def main(args): trees = args.trees depth = args.depth train = args.train test = args.test test = test.replace('\r','') #Removes the carriage return cuz I use windows #Load the data in and convert to numpy with open(train,"r") as read_file: train = json.load(read_file) train = json2numpy(train) with open(test,"r") as read_file: test = json.load(read_file) test = json2numpy(test) bootstrap(trees, depth, train, test, display = True)
def main(args): lr = args.lr epoch = args.epoch train = args.train test = args.test test = test.replace('\r','') #Removes the carriage return cuz I use windows #Load the data in and convert to numpy with open(train,"r") as read_file: train = json.load(read_file) train = json2numpy(train) train = train.train_pp() with open(test,"r") as read_file: test = json.load(read_file) test = json2numpy(test) test = test.test_pp(train) #lr_train trains the weights for the logistic regression w = lr_train(lr,epoch,train) #lr_predict predicts classes for test set predictions = lr_predict(test,w)
def main(args): method = args.method trees = args.trees depth = args.depth train = args.train test = args.test test = test.replace('\r','') #Removes the carriage return cuz I use windows data_origin = test.replace('_test.json','') data_origin = data_origin.replace('data/','') #Load the data in and convert to numpy with open(train,"r") as read_file: train = json.load(read_file) train = json2numpy(train) with open(test,"r") as read_file: test = json.load(read_file) test = json2numpy(test) #Pull the true labels truth = test.labels #Generate the predictions from the ensemble, create list of coordinates xlist = [] ylist = [] if method == 'bag': title = 'Decision Tree Bagging: Accuracy vs. Number of trees' filename = 'bagged_tree_plot.pdf' caption = ('Dataset: ' + data_origin) depth1 = depth - 2 depth2 = depth depth3 = depth + 2 for j in [depth1,depth2,depth3]: for i in range(1,trees+1): predictions = bootstrap(i, j, train, test, display = False) accuracy = sum((truth == predictions).astype(float)) / test.length xlist.append(i) ylist.append(accuracy) plt.plot(xlist,ylist) xlist = [] ylist = [] elif method == 'boost': title = 'Decision Tree AdaBoost: Accuracy vs. Number of trees' filename = 'boosted_tree_plot.pdf' caption = ('Dataset: ' + data_origin) depth1 = depth - 1 depth2 = depth depth3 = depth + 1 for j in [depth1,depth2,depth3]: for i in range(1,trees+1): predictions = adaboost(i, j, train, test, display = False) accuracy = sum((truth == predictions).astype(float)) / test.length xlist.append(i) ylist.append(accuracy) plt.plot(xlist,ylist) xlist = [] ylist = [] else: print('Invalid ensemble method.') #Plot the data plt.legend([depth1,depth2,depth3], loc = "lower right", title = "Maximum tree depth", fancybox = True) plt.xlabel('Ensemble size') plt.ylabel('Accuracy') plt.ylim([0.0, 1.05]) plt.xlim([1, trees]) plt.title(caption, fontsize = 9, wrap = True) plt.suptitle(title) plt.savefig(filename)