コード例 #1
    def closed_form_ridge_regression(self, training_data, training_results):
        cv = crossvalidate.crossvalidate()
        l = cv.determine_lambda(training_data, training_results)
        weights = self.closed_form_lin_regression(training_data,
                                                  training_results, l)

        return weights
コード例 #2
    def gradient_descent_ridge_regression(self, training_data,
        cv = crossvalidate.crossvalidate()
        l = cv.determine_lambda(training_data, training_results)
        weights = self.gradient_descent_lin_regression(training_data,
                                                       training_results, l)

        return weights
コード例 #3
P = 1
svmclassify = trainsvm(xTr, yTr, C, 'rbf', P)

# Get training error of initial classifier
train_preds = svmclassify(xTr)
train_error = np.mean(train_preds != yTr)
print("Train error:", train_error)

# Visualize svmclassify on the training set
visdecision(xTr, yTr, svmclassify)

# Do crossvalidation (you will want to test different parameters)
Cs = (2.0**np.linspace(-1, 6, 200))  #linspace (start, stop, num=50)
Ps = (np.linspace(0.01, 5, 200))
bestC, bestP, lowest_error, errors = crossvalidate(xTr, yTr, 'rbf', Cs, Ps)

print("Best C:", bestC)
print("Best P:", bestP)

# Save the best parameters to be run on test data by the autorader
best_parameters = {'C': bestC, 'P': bestP}
pickle.dump(best_parameters, open('best_parameters.pickle', 'wb'))
# Don't forget to commit best_parameters.pickle!

# Plot the performance of different parameters (this looks better with more parameter choices)
# x,y = np.array(np.meshgrid(Cs, Ps))
# plt.contourf(x, y, errors)
# plt.colorbar()
# plt.xlabel('C')
コード例 #4
def _runExperiment(train, test, cross_epochs, epochs, batch_size, classifierName):
    Runs the experiment returning the classifier generated
    print 'Running the experiment'
    print '  training data:      ', train
    print '  testing data:       ', test
    print '  epochs:             ', epochs
    print '  batch size:         ', batch_size
    print '  classifier:         ', classifierName

    print 'Loading data ...      ',
    start = time.clock()
    with open(train, 'r') as trainfile:
        xdata, ydata = pickle.load(trainfile)
    xdata = preprocessData(xdata, reshape=(classifierName != 'ConvNet'))
    # Split into verify set and data set
    xlen = len(xdata)
    xverify = xdata[:xlen/5]
    yverify = ydata[:xlen/5]
    xdata = xdata[xlen/5:]
    ydata = ydata[xlen/5:]
    print 'done'
    kb_used = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
    print '  elapsed time:       ', time.clock() - start
    print '  memory used:        ', kb_used, 'KB,', float(kb_used) / 2**10, 'MB'
    print '  xshape:             ', xdata.shape
    print '  yshape:             ', ydata.shape

    print 'Perform cross-validation'
    k = 5
    rValues = [0.0001, 0.001, 0.01, 0.1, 0.5]
    CValues = [0.0001, 0.001, 0.01, 0.1]
    hiddenDims = [1, 20, 40, 80]
    out_im1_amounts = [10, 20]
    out_im2_amounts = [20, 50]

    rValues = [0.05]
    CValues = [0.01]
    hiddenDims = [20]
    out_im1_amounts = [10]
    out_im2_amounts = [20]

    mlpLearner = lambda dim_in, dim_hidden, r, C: \
        Mlp(dim_in, dim_hidden, 2, r, C)
    im_shape = (40, 68)
    convnetLearner = lambda dim_in, out_im1, out_im2, dim_hidden, r, C: \
            (out_im1, out_im2),
            ((5, 5), (5, 5)),
            ((2, 2), (2, 2)),
    classifierMap = {
        'MLP': (
            list(itertools.product(hiddenDims, rValues, CValues)),
            ['hidden-dims', 'r', 'C']
        'SVM': (SVM, list(itertools.product(rValues, CValues)), ['r', 'C']),
        'Perceptron': (Perceptron, [(x,) for x in rValues], ['r']),
        'AveragedPerceptron': (
            [(x,) for x in rValues],
        'LogisticRegression': (
            list(itertools.product([2], rValues, CValues)),
            ['dim_out', 'r', 'C']
        'ConvNet': (
            list(itertools.product(out_im1_amounts, out_im2_amounts, hiddenDims, rValues, CValues)),
            ['kernel_1', 'kernel_2', 'hidden-dims', 'r', 'C']
    algorithm, hyperparams, hypernames = classifierMap[classifierName]
    if classifierName in ('LogisticRegression', 'MLP'):
        ydata -= ydata.min()
        ydata /= ydata.max()
        yverify -= yverify.min()
        yverify /= yverify.max()
    start = time.clock()
    print 'Doing {k}-cross validation sequentially'.format(k=k)
    print '  params:             ', hypernames
    for i in xrange(len(hypernames)):
        print '  {0} values:    '.format(hypernames[i]), sorted(set([x[i] for x in hyperparams]))
    params = crossvalidate(algorithm, xdata, ydata, k, cross_epochs, batch_size,
                           hyperparams, hypernames)
    print '  elapsed time:       ', time.clock() - start
    print '  best params:        ', params
    # I tried to get threading to work, but it just didn't do very well.
    # It took a lot longer than single threaded, maybe because theano can't be
    # parallelized like that...
    #print 'Doing {k}-cross validation multithreaded'.format(k=k)
    #params = crossvalidate_threaded(AveragedPerceptron, xdata, ydata, k,
    #                            epochs, batch_size,
    #                            hyperparams, hypernames)
    kb_used = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
    print '  memory used before gc:', kb_used, 'KB,', float(kb_used) / 2**10, 'MB'
    kb_used = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
    print '  memory used after gc: ', kb_used, 'KB,', float(kb_used) / 2**10, 'MB'

    print 'Training Perceptron'
    print '  epochs:             ', epochs
    print '  batch size:         ', batch_size
    print '  training  ',
    start = time.clock()
    classifier = algorithm(xdata.shape[1], *params)
    classifier.train(xdata, ydata, epochs, batch_size, xverify=xverify, yverify=yverify)
    print ' done'
    predictions = classifier.predict(xdata)
    print '  elapsed time:       ', time.clock() - start
    print '  training accuracy:  ', np.sum(ydata == predictions) / float(len(ydata))
    kb_used = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
    print '  memory used before gc:', kb_used, 'KB,', float(kb_used) / 2**10, 'MB'
    del xdata
    del ydata
    kb_used = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
    print '  memory used after gc: ', kb_used, 'KB,', float(kb_used) / 2**10, 'MB'

    print 'Loading test data ... ',
    start = time.clock()
    with open(test, 'r') as testfile:
        testx, testy = pickle.load(testfile)
    testx = preprocessData(testx, reshape=(classifierName != 'ConvNet'))
    if classifierName in ('LogisticRegression', 'MLP'):
        testy -= testy.min()
        testy /= testy.max()
    print ' done'
    kb_used = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
    print '  elapsed time:       ', time.clock() - start
    print '  memory used:        ', kb_used, 'KB,', float(kb_used) / 2**10, 'MB'
    print '  xshape:             ', testx.shape
    print '  yshape:             ', testy.shape

    print 'Running classifier on test set'
    start = time.clock()
    predictions = classifier.predict(testx)
    print '  elapsed time:       ', time.clock() - start
    print '  test accuracy:      ', np.sum(testy == predictions) / float(len(testy))

    return classifier
コード例 #5
def testLearner(name, trainExamples, testExamples, crossepochs=10,
                epochs=10, batchSize=1, learnerName='SVM'):
    Trains an averaged Perceptron classifier from the training examples and
    then calculates the accuracy of the generated classifier on the test

    Prints out the results to the console
    featuresList = np.asarray([x.features for x in trainExamples], dtype=theano.config.floatX)
    labels = np.asarray([x.label for x in trainExamples], dtype=theano.config.floatX)
    testFeatures = np.asarray([x.features for x in testExamples], dtype=theano.config.floatX)
    testLabels = np.asarray([x.label for x in testExamples], dtype=theano.config.floatX)

    rvalues = [0.01, 0.05, 0.1, 0.5]
    Cvalues = [0.001, 0.005, 0.01, 0.05]
    dimvalues = [10, 20]

    logreglearner = lambda dim_in, r, C: LogisticRegression(dim_in, 2, r, C)
    mlplearner = lambda dim_in, dim_hidden, r, C: Mlp(dim_in, dim_hidden, 2, r, C)
    learnerMap = {
        'Perceptron': (Perceptron, [(x,) for x in rvalues], ['r']),
        'AveragedPerceptron': (AveragedPerceptron, [(x,) for x in rvalues], ['r']),
        'SVM': (SVM, list(itertools.product(rvalues, Cvalues)), ['r', 'C']),
        'LogisticRegression': (
            list(itertools.product(rvalues, Cvalues)),
            ['r', 'C']
        'MLP': (
            list(itertools.product(dimvalues, rvalues, Cvalues)),
            ['hidden-dimension', 'r', 'C']
    learner, hypers, names = learnerMap[learnerName]
    logisticLearners = ('LogisticRegression', 'MLP')
    if learnerName in logisticLearners:
        labels -= labels.min()
        labels /= labels.max()
        testLabels -= testLabels.min()
        testLabels /= testLabels.max()

    k = 5
    print 'Performing cross-validation'
    print '  dataset:        ', name
    print '  learner:        ', learnerName
    print '  k:              ', k
    print '  parameters:     ', names
    bestHyper = crossvalidate(learner, featuresList, labels, k, crossepochs,
                              batchSize, hypers, names)
    print '  best params:    ', names, '=', bestHyper

    p = learner(len(featuresList[0]), *bestHyper)
    print 'training ' + name + ' ',
    p.train(featuresList, labels, epochs, batchSize)
    print ' done'

    # Test accuracy on the training set
    predictions = p.predict(featuresList)
    accuracy = np.sum(labels == predictions) / float(len(labels))
    print name, '  train accuracy:  ', accuracy

    # Test accuracy on the testing set
    predictions = p.predict(testFeatures)
    accuracy = np.sum(testLabels == predictions) / float(len(testLabels))
    print name, '  test accuracy:   ', accuracy