def runComparison(N): """ For training-sample size N, run experiment 1000 times, where in each iteration: 1. Create target function and training data. 2. Train PLA and SVM 3. Create ntest testing points, and compare the misclassification performance as percentage of disagreements. """ ntest = 1000 # number of out-of-sample dataset points PLA_miss = np.ones(1000)*np.NAN SVM_miss = np.ones(1000)*np.NAN SVM_nsupp = np.ones(1000)*np.NAN for i in xrange(1000): f = generate_targetFn() trainingSet = generate_dataPoints_fromFn(N, f) while all_same_sign(trainingSet): trainingSet = generate_dataPoints_fromFn(N, f) testingSet = generate_dataPoints_fromFn(ntest, f) while all_same_sign(testingSet): testingSet = generate_dataPoints_fromFn(ntest, f) # train PLA step_lim = 10000 classifier = PLA(trainingSet) classifier.train(lim = step_lim) # test PLA (results, misclassified) = classifier.classify(testingSet) PLA_miss[i] = misclassified/(ntest*1.0) # train SVM (w, b, SVM_nsupp[i]) = SVM(trainingSet) # test SVM (results, misclassified) = apply_SVM(w, b, testingSet) SVM_miss[i] = misclassified/(ntest*1.0) # find how often SVM is better than PLA in out-of-sample performance print "For N=%d, SVM is better than PLA %0.3f of the times" % (N, sum(np.less(SVM_miss, PLA_miss))/1000.0) print "For N=%d, average number of support vectors is %0.3f" % (N, np.mean(SVM_nsupp))
def question7(): """ Use N=10 training data points. Get weights with Linear Regression, use those weights to initialize a PLA and see how long it takes to converge, over 1000 runs. """ N = 10.0 trial = 5000 convergence_steps = np.ones((trial, 1)) * np.NAN for i in xrange(trial): target_fn = generate_targetFn() data = generate_dataPoints_fromFn(N, target_fn) w = LG(data[:, 0:2], data[:, 2]) classifier = PLA(data, weights=w) convergence_steps[i] = classifier.train() print "Problem 7: Average steps took for classifier convergence: %0.3f" % (convergence_steps.mean())