def main(): """ Main file to run from the command line. """ # set up the program to take in arguments from the command line parser = argparse.ArgumentParser() parser.add_argument("xTrain", help="filename for features of the training data") parser.add_argument( "yTrain", help="filename for labels associated with training data") parser.add_argument("xTest", help="filename for features of the test data") parser.add_argument( "yTest", help="filename for labels associated with the test data") parser.add_argument("lr", type=float, help="learning rate") parser.add_argument("bs", type=int, help="batch size") parser.add_argument("epoch", type=int, help="max number of epochs") parser.add_argument("--seed", default=334, type=int, help="default seed number") args = parser.parse_args() # load the train and test data xTrain = file_to_numpy(args.xTrain) yTrain = file_to_numpy(args.yTrain) xTest = file_to_numpy(args.xTest) yTest = file_to_numpy(args.yTest) # setting the seed for deterministic behavior np.random.seed(args.seed) model = SgdLR(args.lr, args.bs, args.epoch) trainStats = model.train_predict(xTrain, yTrain, xTest, yTest) print(trainStats)
def main(): """ Main file to run from the command line. """ # set up the program to take in arguments from the command line parser = argparse.ArgumentParser() parser.add_argument("xTrain", help="filename for features of the training data") parser.add_argument("yTrain", help="filename for labels associated with training data") parser.add_argument("xTest", help="filename for features of the test data") parser.add_argument("yTest", help="filename for labels associated with the test data") args = parser.parse_args() # load the train and test data xTrain = file_to_numpy(args.xTrain) yTrain = file_to_numpy(args.yTrain) xTest = file_to_numpy(args.xTest) yTest = file_to_numpy(args.yTest) model = StandardLR() trainStats = model.train_predict(xTrain, yTrain, xTest, yTest) print(trainStats)
def main(): """ Main file to run from the command line. """ # set up the program to take in arguments from the command line parser = argparse.ArgumentParser() parser.add_argument("xTrain", help="filename for features of the training data") parser.add_argument( "yTrain", help="filename for labels associated with training data") parser.add_argument("xTest", help="filename for features of the test data") parser.add_argument( "yTest", help="filename for labels associated with the test data") parser.add_argument("lr", type=float, help="learning rate") parser.add_argument("bs", type=int, help="batch size") parser.add_argument("epoch", type=int, help="max number of epochs") parser.add_argument("--seed", default=334, type=int, help="default seed number") args = parser.parse_args() # load the train and test data xTrain = file_to_numpy(args.xTrain) yTrain = file_to_numpy(args.yTrain) xTest = file_to_numpy(args.xTest) yTest = file_to_numpy(args.yTest) # setting the seed for deterministic behavior np.random.seed(args.seed) batchSize = [1, 10, 30, 130, 215, 640, 16770] lr = [.01, 1, 10, 100, 100, 1000, 100000] # Goes through a list of batch sizes and ideal learning rates and graphs the required information for i in range(len(batchSize)): model = SgdLR(lr[i], batchSize[i], args.epoch) trainStats = model.train_predict(xTrain, yTrain, xTest, yTest) time = [] trainMse = [] testMse = [] for key in trainStats: temp = trainStats[key] time.append(temp['time']) trainMse.append(temp['train-mse']) testMse.append(temp['test-mse']) plt.subplot(2, 1, 1) color = '#' + "%06x" % random.randint(0, 0xFFFFFF) plt.scatter(time, trainMse, c=color, label='BS: %d ' % (batchSize[i])) plt.subplot(2, 1, 2) color = '#' + "%06x" % random.randint(0, 0xFFFFFF) plt.scatter(time, testMse, c=color, label='BS: %d ' % (batchSize[i])) model = StandardLR() trainStats = model.train_predict(xTrain, yTrain, xTest, yTest) plt.subplot(2, 1, 1) color = '#' + "%06x" % random.randint(0, 0xFFFFFF) plt.plot((trainStats[0])['time'], (trainStats[0])['train-mse'], c=color, label='Closed Form', marker='o', markerSIze=12) plt.subplot(2, 1, 2) color = '#' + "%06x" % random.randint(0, 0xFFFFFF) plt.plot((trainStats[0])['time'], (trainStats[0])['test-mse'], c=color, label='Closed Form', marker='o', markerSIze=12) plt.subplot(2, 1, 1) plt.xscale('log') plt.xlabel('Total Time') plt.ylabel('MSE') plt.title('Training-MSE') plt.legend() plt.subplot(2, 1, 2) plt.xscale('log') plt.xlabel('Total Time') plt.ylabel('MSE') plt.title('Test-MSE') plt.legend() plt.show()
import sgdLR import lr import matplotlib.pyplot as plt xTrain = lr.file_to_numpy('new_xTrain.csv') yTrain = lr.file_to_numpy('eng_yTrain.csv') xTest = lr.file_to_numpy('new_xTest.csv') yTest = lr.file_to_numpy('eng_yTest.csv') batch_size = [1, 86, 215, 5590, 16770] learning_rate = [1, 0.1, 0.001, 0.0005] for j in range(1,len(batch_size)): result_mse = [] epoch_range = range(1, 30) for i in range(len(learning_rate)): result_mse.append([]) model = sgdLR.SgdLR(learning_rate[i], batch_size[j], 30) trainStats = model.train_predict(xTrain, yTrain, xTest, yTest) for epoch in epoch_range: result_mse[i].append(trainStats[len(xTrain) / batch_size[j] * epoch - 1]['train-mse']) fig = plt.figure() plt.plot(range(1, 30), result_mse[0], 'b-', label='lr=1') plt.plot(range(1, 30), result_mse[1], 'r-', label='lr=0.1') plt.plot(range(1, 30), result_mse[2], 'g-', label='lr=0.001') plt.plot(range(1, 30), result_mse[3], 'm-', label='lr=0.0005')
def main(): """ Main file to run from the command line. """ # set up the program to take in arguments from the command line parser = argparse.ArgumentParser() parser.add_argument("xTrain", help="filename for features of the training data") parser.add_argument("yTrain", help="filename for labels associated with training data") parser.add_argument("xTest", help="filename for features of the test data") parser.add_argument("yTest", help="filename for labels associated with the test data") parser.add_argument("lr", type=float, help="learning rate") parser.add_argument("bs", type=int, help="batch size") parser.add_argument("epoch", type=int, help="max number of epochs") parser.add_argument("--seed", default=334, type=int, help="default seed number") args = parser.parse_args() # load the train and test data xTrain = file_to_numpy(args.xTrain) yTrain = file_to_numpy(args.yTrain) xTest = file_to_numpy(args.xTest) yTest = file_to_numpy(args.yTest) # setting the seed for deterministic behavior np.random.seed(args.seed) # # ORIGINAL CODE # # model = SgdLR(args.lr, args.bs, args.epoch) # trainStats = model.train_predict(xTrain, yTrain, xTest, yTest) # print(trainStats) # END ORIGINAL CODE # # # Code for Question 3B ################################ # for lr in [0.1, 0.01, 0.001, 0.0001, .00001]: # For each learning rate, train a model and plot the train mse # model = SgdLR(lr, 1, args.epoch) # trainStats = model.train_predict(xTrain, yTrain, xTest, yTest, fraction_train_data=.4) # results = [trainStats[i]['train-mse'] for i in trainStats.keys()] # plt.plot(results, label=lr) # # plt.title("Training MSE for Various Learning Rates with Batch Size 1 and 100 Epochs") # plt.ylim(top=1.25, bottom=.3) # The plot y axis was limited so that large values wouldn't # plt.xlabel("Epoch") # squeeze the small values. Reduced distortion but # plt.ylabel("MSE") # higher, irrelevant initial MSE's were cut off. # plt.legend() # plt.show() # # Code for Question 3C ################################ # model = SgdLR(.001, 1, args.epoch) # the optimal MSE was chose from the above plot manually # trainStats = model.train_predict(xTrain, yTrain, xTest, yTest) # train model using lr=.001 # results = [trainStats[i]['train-mse'] for i in trainStats.keys()] # plt.plot(results, label='train-mse-lr=.001') # results = [trainStats[i]['test-mse'] for i in trainStats.keys()] # plt.plot(results, label='test-mse-lr=.001') # # # plots the train and test mse for the optimal lr over the number of epochs # # plt.title("Training and Test MSE for .001 Learning Rate with Batch Size 1 and 100 Epochs") # plt.xlabel("Epoch") # plt.ylabel("MSE") # plt.legend() # plt.show() # # Code for Question 4A ################################ # The following code plots different learning rates for each batch size to find the optimal one # makes 12 plots, one for each batch size, not included in write up because way too many plots list_of_batches = [1, 5, 10, 15, 26, 39, 65, 86, 129, 195, 258, len(xTrain)]