Example #1
0
def question7():
    """
        Use N=10 training data points. Get weights with Linear Regression, use those
        weights to initialize a PLA and see how long it takes to converge, over 1000 runs.
    """

    N = 10.0
    trial = 5000
    convergence_steps = np.ones((trial, 1)) * np.NAN
    for i in xrange(trial):
        target_fn = generate_targetFn()
        data = generate_dataPoints_fromFn(N, target_fn)
        w = LG(data[:, 0:2], data[:, 2])
        classifier = PLA(data, weights=w)
        convergence_steps[i] = classifier.train()
    print "Problem 7: Average steps took for classifier convergence: %0.3f" % (convergence_steps.mean())
Example #2
0
def runComparison(N):
    """
    For training-sample size N, run experiment 1000 times, where in each iteration:
        1. Create target function and training data.
        2. Train PLA and SVM
        3. Create ntest testing points, and compare the misclassification performance
           as percentage of disagreements.
    """
    ntest = 1000    # number of out-of-sample dataset points
    PLA_miss = np.ones(1000)*np.NAN
    SVM_miss = np.ones(1000)*np.NAN
    SVM_nsupp = np.ones(1000)*np.NAN
    for i in xrange(1000):
        f = generate_targetFn()
        trainingSet = generate_dataPoints_fromFn(N, f)
        while all_same_sign(trainingSet):
            trainingSet = generate_dataPoints_fromFn(N, f)
        testingSet = generate_dataPoints_fromFn(ntest, f)
        while all_same_sign(testingSet):
            testingSet = generate_dataPoints_fromFn(ntest, f)

        # train PLA
        step_lim = 10000
        classifier = PLA(trainingSet)
        classifier.train(lim = step_lim)
        # test PLA
        (results, misclassified) = classifier.classify(testingSet)
        PLA_miss[i] = misclassified/(ntest*1.0)

        # train SVM
        (w, b, SVM_nsupp[i]) = SVM(trainingSet)
        # test SVM
        (results, misclassified) = apply_SVM(w, b, testingSet)
        SVM_miss[i] = misclassified/(ntest*1.0)

    # find how often SVM is better than PLA in out-of-sample performance
    print "For N=%d, SVM is better than PLA %0.3f of the times" % (N, sum(np.less(SVM_miss, PLA_miss))/1000.0)
    print "For N=%d, average number of support vectors is %0.3f" % (N, np.mean(SVM_nsupp))