Example #1
0
def main(args):
    lr = args.lr
    n_hu = args.hu
    epoch = args.epoch
    train = args.train
    test = args.test
    test = test.replace('\r','') #Removes the carriage return cuz I use windows
    data_origin = test.replace('_test.json','')
    data_origin = data_origin.replace('data/','')

    #Load the data in and convert to numpy
    with open(train,"r") as read_file:
        train = json.load(read_file)
        train = json2numpy(train)
        train = train.train_pp()
    with open(test,"r") as read_file:
        test = json.load(read_file)
        test = json2numpy(test)
        test = test.test_pp(train)

    xlist = []
    trainlist = []
    testlist = []
    if n_hu > 0:
        title = 'Neural Network: F1 vs. Epochs'
        filename = 'nnet_f1_curve.png'
        caption = ('Dataset: ' + data_origin + ', Learning rate: ' + str(lr) + ' , Number of hidden units: ' + str(n_hu) + ', Maximum number of epochs: ' + str(epoch))
        for e in range(1,epoch+1):
            xlist.append(e)
            x = 'nn'
            w = nn_train(lr,n_hu,e,train, screen_print = 0)
            po = nn_predict(test,w, screen_print = 0)
            testlist.append(po[1])
            po = nn_predict(train,w, screen_print = 0)
            trainlist.append(po[1])

    else:
        title = 'Logistic Regression: F1 vs. Epochs'
        filename = 'logistic_f1_curve.png'
        caption = ('Dataset: ' + data_origin + ', Learning rate: ' + str(lr) + ', Maximum number of epochs: ' + str(epoch))
        for e in range(1,epoch+1):
            xlist.append(e)
            x = 'lr'
            w = lr_train(lr,e, train, screen_print = 0)
            po = lr_predict(test,w, screen_print = 0)
            testlist.append(po[1])
            po = lr_predict(train,w, screen_print = 0)
            trainlist.append(po[1])

    #Plot the data
    plt.plot(xlist,trainlist)
    plt.plot(xlist,testlist)
    plt.legend(['Train','Test'], loc = "lower right")
    plt.xlabel('Number of epochs')
    plt.ylabel('F1')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, epoch+5])
    plt.title(caption, fontsize = 9, wrap = True)
    plt.suptitle(title)
    plt.savefig('./figs/'+filename)
Example #2
0
def main(args):
    method = args.method
    trees = args.trees
    depth = args.depth
    train = args.train
    test = args.test
    test = test.replace('\r','') #Removes the carriage return cuz I use windows

    #Load the data in and convert to numpy
    with open(train,"r") as read_file:
        train = json.load(read_file)
        train = json2numpy(train)
    with open(test,"r") as read_file:
        test = json.load(read_file)
        test = json2numpy(test)

    #Pull the true labels
    truth = test.labels
    #Generate the predictions from the ensemble
    if method == 'bag':
        predictions = bootstrap(trees, depth, train, test, display = False)
    elif method == 'boost':
        predictions = adaboost(trees, depth, train, test, display = False)
    else:
        print('Invalid ensemble method.')

    build_confusion_matrix(test, predictions, verbose = True)
Example #3
0
def main(args):
    trees = args.trees
    depth = args.depth
    train = args.train
    test = args.test
    test = test.replace('\r','') #Removes the carriage return cuz I use windows

    #Load the data in and convert to numpy
    with open(train,"r") as read_file:
        train = json.load(read_file)
        train = json2numpy(train)
    with open(test,"r") as read_file:
        test = json.load(read_file)
        test = json2numpy(test)

    bootstrap(trees, depth, train, test, display = True)
Example #4
0
def main(args):
    lr = args.lr
    epoch = args.epoch
    train = args.train
    test = args.test
    test = test.replace('\r','') #Removes the carriage return cuz I use windows

    #Load the data in and convert to numpy
    with open(train,"r") as read_file:
        train = json.load(read_file)
        train = json2numpy(train)
        train = train.train_pp()
    with open(test,"r") as read_file:
        test = json.load(read_file)
        test = json2numpy(test)
        test = test.test_pp(train)

    #lr_train trains the weights for the logistic regression
    w = lr_train(lr,epoch,train)

    #lr_predict predicts classes for test set
    predictions = lr_predict(test,w)
Example #5
0
def main(args):
    method = args.method
    trees = args.trees
    depth = args.depth
    train = args.train
    test = args.test
    test = test.replace('\r','') #Removes the carriage return cuz I use windows
    data_origin = test.replace('_test.json','')
    data_origin = data_origin.replace('data/','')

    #Load the data in and convert to numpy
    with open(train,"r") as read_file:
        train = json.load(read_file)
        train = json2numpy(train)
    with open(test,"r") as read_file:
        test = json.load(read_file)
        test = json2numpy(test)


    #Pull the true labels
    truth = test.labels
    #Generate the predictions from the ensemble, create list of coordinates
    xlist = []
    ylist = []
    if method == 'bag':
        title = 'Decision Tree Bagging: Accuracy vs. Number of trees'
        filename = 'bagged_tree_plot.pdf'
        caption = ('Dataset: ' + data_origin)
        depth1 = depth - 2
        depth2 = depth 
        depth3 = depth + 2
        for j in [depth1,depth2,depth3]:
            for i in range(1,trees+1):
                predictions = bootstrap(i, j, train, test, display = False)
                accuracy = sum((truth == predictions).astype(float)) / test.length 
                xlist.append(i)
                ylist.append(accuracy)
            plt.plot(xlist,ylist)
            xlist = []
            ylist = []
    elif method == 'boost':
        title = 'Decision Tree AdaBoost: Accuracy vs. Number of trees'
        filename = 'boosted_tree_plot.pdf'
        caption = ('Dataset: ' + data_origin)
        depth1 = depth - 1
        depth2 = depth 
        depth3 = depth + 1
        for j in [depth1,depth2,depth3]:
            for i in range(1,trees+1):
                predictions = adaboost(i, j, train, test, display = False)
                accuracy = sum((truth == predictions).astype(float)) / test.length 
                xlist.append(i)
                ylist.append(accuracy)
            plt.plot(xlist,ylist)
            xlist = []
            ylist = []
    else:
        print('Invalid ensemble method.')
    
    #Plot the data
    plt.legend([depth1,depth2,depth3], loc = "lower right", title = "Maximum tree depth", fancybox = True)
    plt.xlabel('Ensemble size')
    plt.ylabel('Accuracy')
    plt.ylim([0.0, 1.05])
    plt.xlim([1, trees])
    plt.title(caption, fontsize = 9, wrap = True)
    plt.suptitle(title)
    plt.savefig(filename)