Ejemplo n.º 1
0
Archivo: batch.py Proyecto: jtsui/hw4
def main():
    data = scipy.io.loadmat('spamData.mat')
    xtrain = preprocess.log_transform(data['Xtrain'])
    # xtrain = data['Xtrain']
    ytrain = data['ytrain']
    xtest = preprocess.log_transform(data['Xtest'])
    ytest = data['ytest']
    threshold = 0.1
    reg_learn_pairs = [(0.1, 0.0001), (0.001, 0.001), (0.001, 0.0001), (0.0001, 0.001), (0.0001, 0.0001)]
    for regularization_weight, learning_rate in reg_learn_pairs:
        print 'Regularization_weight %s learning_rate %s' % (regularization_weight, learning_rate)
        xplot = []
        yplot = []
        beta = batch(xtrain, ytrain, threshold, regularization_weight, learning_rate, xplot, yplot)
        train = test_error(xtrain, ytrain, beta)
        test = test_error(xtest, ytest, beta)
        with open('res.txt', 'a') as f:
            f.write('%s\t%s\t%s\t%s\n' % (regularization_weight, learning_rate, train, test))
            f.flush()
        # plot xplot vs yplot
        pyplot.plot(xplot, yplot)
        pyplot.title('Training Loss vs Number of Iterations.\nregularization_weight %s learning_rate %s' % (
            regularization_weight, learning_rate))
        pyplot.xlabel("Number of Iterations")
        pyplot.ylabel("Negative Log Likelihood")
        pyplot.show()
Ejemplo n.º 2
0
def main():
    if len(sys.argv) != 2:
        print 'Missing args. Usage: python crossval.py [regularization weight]'
        return
    the_file, regularization_weight = sys.argv
    regularization_weight = float(regularization_weight)
    data = scipy.io.loadmat('spamData.mat')
    xtrain = preprocess.log_transform(data['Xtrain'])
    ytrain = data['ytrain']
    shuffled_xtrain, shuffled_ytrain = shuffle(xtrain, ytrain)

    threshold = 0.0001
    learning_rate = 0.0001
    print 'Regularization_weight %s learning_rate %s' % (regularization_weight, learning_rate)
    train = 0
    test = 0
    for i in range(5):
        xtest, ytest, xtrain, ytrain = partition(shuffled_xtrain, shuffled_ytrain, i)
        beta, xp, yp = batch(xtrain, ytrain, threshold, regularization_weight, learning_rate)
        train += test_error(xtrain, ytrain, beta)
        test += test_error(xtest, ytest, beta)
    train = train / 5
    test = test / 5
    print '%s\t%s\t%s\t%s' % (regularization_weight, learning_rate, train, test)
    with open('res%s.txt' % regularization_weight, 'a') as f:
        f.write('%s\t%s\t%s\t%s\n' % (regularization_weight, learning_rate, train, test))
        f.flush()