Example #1
0
def main():
    """
    Main file to run from the command line.
    """
    # set up the program to take in arguments from the command line
    parser = argparse.ArgumentParser()
    parser.add_argument("xTrain",
                        help="filename for features of the training data")
    parser.add_argument(
        "yTrain", help="filename for labels associated with training data")
    parser.add_argument("xTest", help="filename for features of the test data")
    parser.add_argument(
        "yTest", help="filename for labels associated with the test data")
    parser.add_argument("lr", type=float, help="learning rate")
    parser.add_argument("bs", type=int, help="batch size")
    parser.add_argument("epoch", type=int, help="max number of epochs")
    parser.add_argument("--seed",
                        default=334,
                        type=int,
                        help="default seed number")

    args = parser.parse_args()
    # load the train and test data
    xTrain = file_to_numpy(args.xTrain)
    yTrain = file_to_numpy(args.yTrain)
    xTest = file_to_numpy(args.xTest)
    yTest = file_to_numpy(args.yTest)

    # setting the seed for deterministic behavior
    np.random.seed(args.seed)
    model = SgdLR(args.lr, args.bs, args.epoch)
    trainStats = model.train_predict(xTrain, yTrain, xTest, yTest)
    print(trainStats)
Example #2
0
def main():
    """
    Main file to run from the command line.
    """
    # set up the program to take in arguments from the command line
    parser = argparse.ArgumentParser()
    parser.add_argument("xTrain",
                        help="filename for features of the training data")
    parser.add_argument("yTrain",
                        help="filename for labels associated with training data")
    parser.add_argument("xTest",
                        help="filename for features of the test data")
    parser.add_argument("yTest",
                        help="filename for labels associated with the test data")

    args = parser.parse_args()
    # load the train and test data
    xTrain = file_to_numpy(args.xTrain)
    yTrain = file_to_numpy(args.yTrain)
    xTest = file_to_numpy(args.xTest)
    yTest = file_to_numpy(args.yTest)

    model = StandardLR()
    trainStats = model.train_predict(xTrain, yTrain, xTest, yTest)
    print(trainStats)
Example #3
0
def main():
    """
    Main file to run from the command line.
    """
    # set up the program to take in arguments from the command line
    parser = argparse.ArgumentParser()
    parser.add_argument("xTrain",
                        help="filename for features of the training data")
    parser.add_argument(
        "yTrain", help="filename for labels associated with training data")
    parser.add_argument("xTest", help="filename for features of the test data")
    parser.add_argument(
        "yTest", help="filename for labels associated with the test data")
    parser.add_argument("lr", type=float, help="learning rate")
    parser.add_argument("bs", type=int, help="batch size")
    parser.add_argument("epoch", type=int, help="max number of epochs")
    parser.add_argument("--seed",
                        default=334,
                        type=int,
                        help="default seed number")

    args = parser.parse_args()
    # load the train and test data
    xTrain = file_to_numpy(args.xTrain)
    yTrain = file_to_numpy(args.yTrain)
    xTest = file_to_numpy(args.xTest)
    yTest = file_to_numpy(args.yTest)

    # setting the seed for deterministic behavior
    np.random.seed(args.seed)

    batchSize = [1, 10, 30, 130, 215, 640, 16770]
    lr = [.01, 1, 10, 100, 100, 1000, 100000]

    # Goes through a list of batch sizes and ideal learning rates and graphs the required information
    for i in range(len(batchSize)):
        model = SgdLR(lr[i], batchSize[i], args.epoch)
        trainStats = model.train_predict(xTrain, yTrain, xTest, yTest)

        time = []
        trainMse = []
        testMse = []

        for key in trainStats:
            temp = trainStats[key]
            time.append(temp['time'])
            trainMse.append(temp['train-mse'])
            testMse.append(temp['test-mse'])

        plt.subplot(2, 1, 1)

        color = '#' + "%06x" % random.randint(0, 0xFFFFFF)
        plt.scatter(time, trainMse, c=color, label='BS: %d ' % (batchSize[i]))

        plt.subplot(2, 1, 2)

        color = '#' + "%06x" % random.randint(0, 0xFFFFFF)
        plt.scatter(time, testMse, c=color, label='BS: %d ' % (batchSize[i]))

    model = StandardLR()
    trainStats = model.train_predict(xTrain, yTrain, xTest, yTest)

    plt.subplot(2, 1, 1)
    color = '#' + "%06x" % random.randint(0, 0xFFFFFF)
    plt.plot((trainStats[0])['time'], (trainStats[0])['train-mse'],
             c=color,
             label='Closed Form',
             marker='o',
             markerSIze=12)

    plt.subplot(2, 1, 2)
    color = '#' + "%06x" % random.randint(0, 0xFFFFFF)
    plt.plot((trainStats[0])['time'], (trainStats[0])['test-mse'],
             c=color,
             label='Closed Form',
             marker='o',
             markerSIze=12)

    plt.subplot(2, 1, 1)

    plt.xscale('log')
    plt.xlabel('Total Time')
    plt.ylabel('MSE')
    plt.title('Training-MSE')
    plt.legend()

    plt.subplot(2, 1, 2)

    plt.xscale('log')
    plt.xlabel('Total Time')
    plt.ylabel('MSE')
    plt.title('Test-MSE')
    plt.legend()

    plt.show()
Example #4
0
import sgdLR
import lr
import matplotlib.pyplot as plt




xTrain = lr.file_to_numpy('new_xTrain.csv')
yTrain = lr.file_to_numpy('eng_yTrain.csv')
xTest = lr.file_to_numpy('new_xTest.csv')
yTest = lr.file_to_numpy('eng_yTest.csv')

batch_size = [1, 86, 215, 5590, 16770]
learning_rate = [1, 0.1, 0.001, 0.0005]
for j in range(1,len(batch_size)):
    result_mse = []
    epoch_range = range(1, 30)
    for i in range(len(learning_rate)):
        result_mse.append([])
        model = sgdLR.SgdLR(learning_rate[i], batch_size[j], 30)
        trainStats = model.train_predict(xTrain, yTrain, xTest, yTest)
        for epoch in epoch_range:
            result_mse[i].append(trainStats[len(xTrain) / batch_size[j] * epoch - 1]['train-mse'])

    fig = plt.figure()

    plt.plot(range(1, 30), result_mse[0], 'b-', label='lr=1')
    plt.plot(range(1, 30), result_mse[1], 'r-', label='lr=0.1')
    plt.plot(range(1, 30), result_mse[2], 'g-', label='lr=0.001')
    plt.plot(range(1, 30), result_mse[3], 'm-', label='lr=0.0005')
Example #5
0
def main():
    """
    Main file to run from the command line.
    """
    # set up the program to take in arguments from the command line
    parser = argparse.ArgumentParser()
    parser.add_argument("xTrain",
                        help="filename for features of the training data")
    parser.add_argument("yTrain",
                        help="filename for labels associated with training data")
    parser.add_argument("xTest",
                        help="filename for features of the test data")
    parser.add_argument("yTest",
                        help="filename for labels associated with the test data")
    parser.add_argument("lr", type=float, help="learning rate")
    parser.add_argument("bs", type=int, help="batch size")
    parser.add_argument("epoch", type=int, help="max number of epochs")
    parser.add_argument("--seed", default=334, 
                        type=int, help="default seed number")

    args = parser.parse_args()
    # load the train and test data
    xTrain = file_to_numpy(args.xTrain)
    yTrain = file_to_numpy(args.yTrain)
    xTest = file_to_numpy(args.xTest)
    yTest = file_to_numpy(args.yTest)

    # setting the seed for deterministic behavior
    np.random.seed(args.seed)
    #
    # ORIGINAL CODE
    #

    # model = SgdLR(args.lr, args.bs, args.epoch)
    # trainStats = model.train_predict(xTrain, yTrain, xTest, yTest)
    # print(trainStats)

    # END ORIGINAL CODE
    #

    # # Code for Question 3B ################################
    # for lr in [0.1, 0.01, 0.001, 0.0001, .00001]:     # For each learning rate, train a model and plot the train mse
    #     model = SgdLR(lr, 1, args.epoch)
    #     trainStats = model.train_predict(xTrain, yTrain, xTest, yTest, fraction_train_data=.4)
    #     results = [trainStats[i]['train-mse'] for i in trainStats.keys()]
    #     plt.plot(results, label=lr)
    #
    # plt.title("Training MSE for Various Learning Rates with Batch Size 1 and 100 Epochs")
    # plt.ylim(top=1.25, bottom=.3)                     # The plot y axis was limited so that large values wouldn't
    # plt.xlabel("Epoch")                               # squeeze the small values. Reduced distortion but
    # plt.ylabel("MSE")                                 # higher, irrelevant initial MSE's were cut off.
    # plt.legend()
    # plt.show()

    # # Code for Question 3C ################################
    # model = SgdLR(.001, 1, args.epoch)                # the optimal MSE was chose from the above plot manually
    # trainStats = model.train_predict(xTrain, yTrain, xTest, yTest)        # train model using lr=.001
    # results = [trainStats[i]['train-mse'] for i in trainStats.keys()]
    # plt.plot(results, label='train-mse-lr=.001')
    # results = [trainStats[i]['test-mse'] for i in trainStats.keys()]
    # plt.plot(results, label='test-mse-lr=.001')
    #
    # # plots the train and test mse for the optimal lr over the number of epochs
    #
    # plt.title("Training and Test MSE for .001 Learning Rate with Batch Size 1 and 100 Epochs")
    # plt.xlabel("Epoch")
    # plt.ylabel("MSE")
    # plt.legend()
    # plt.show()

    # # Code for Question 4A ################################
    # The following code plots different learning rates for each batch size to find the optimal one
    # makes 12 plots, one for each batch size, not included in write up because way too many plots
    list_of_batches = [1, 5, 10, 15, 26, 39, 65, 86, 129, 195, 258, len(xTrain)]