import dumbClassifiers as du import datasets as data import runClassifier as run import numpy import knn #9 curve = run.learningCurveSet(knn.KNN({'isKNN':True,'K':5}),data.DigitData) run.plotCurve('K-Nearest Neighbor on 5-NN; DIgitsData',curve) #11 curve = run.hyperparamCurveSet(knn.KNN({'isKNN':True}), 'K', [1,2,3,4,5,6,7,8,9,10],data.DigitData) run.plotCurve('Hyperparameter Curve on DigitsData',curve) #12 arr = [] counter = 1 while counter < 20: arr.append(counter) counter += .5 curve = run.hyperparamCurveSet(knn.KNN({'isKNN':False}), 'eps', arr ,data.DigitData) run.plotCurve('Hyperparameter Curve on DigitsData',curve)
from numpy import * from pylab import * import util import binary import datasets import knn import runClassifier # Learning curves for K = 1, 2, 10, 20 for i in [1, 2, 10, 20] : (dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': True, 'K': i}), datasets.DigitData) runClassifier.plotCurve("Learning Curve for knn, K=%d" % (i), [dataSizes, trainAcc, testAcc]) ylim([.2,1.1]) savefig("LC_k%d.png" % (i) ) close() # Learning curves for \epsilon = 5.0, 10.0, 15.0, 20.0 for i in [5.0, 10.0, 15.0, 20.0] : (dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': False, 'eps': i}), datasets.DigitData) runClassifier.plotCurve("Learning Curve for knn, eps=%f" % (i), [dataSizes, trainAcc, testAcc]) ylim([.45,1.1]) savefig("LC_eps%f.png" % (i) ) close() # Learning curve for K = 5 (dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': True, 'K': 5}), datasets.DigitData) runClassifier.plotCurve("Learning Curve for knn, K=5", [dataSizes, trainAcc, testAcc]) savefig("LC_k5.png")
import datasets as data import runClassifier as run import numpy import perceptron curve = run.learningCurveSet(perceptron.Perceptron({'numEpoch': 10}), data.TwoDDiagonal) run.plotCurve("Perceptron Learning Curve on Sentiment Data", curve)
h = dt.DT({'maxDepth': 2}) h.train(datasets.TennisData.X, datasets.TennisData.Y) print(h) h = dt.DT({'maxDepth': 5}) h.train(datasets.TennisData.X, datasets.TennisData.Y) print(h) # Sentiment data h = dt.DT({'maxDepth': 2}) h.train(datasets.SentimentData.X, datasets.SentimentData.Y) print(h) print(datasets.SentimentData.words[2428]) print(datasets.SentimentData.words[3842]) print(datasets.SentimentData.words[3892]) runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.SentimentData) runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.SentimentData) runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.SentimentData) curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 9}), datasets.SentimentData) runClassifier.plotCurve('DT on Sentiment Data', curve) # W2 curve = runClassifier.hyperparamCurveSet(dt.DT({}), 'maxDepth', [1, 2, 4, 6, 8, 12, 16], datasets.SentimentData) runClassifier.plotCurve('DT on Sentiment Data (hyperparameter)', curve)
import perceptron import datasets import runClassifier # learning curve for epoch = 5 curve = runClassifier.learningCurveSet(perceptron.Perceptron({'numEpoch': 5}), datasets.SentimentData) runClassifier.plotCurve('Perceptron on Sentiment Data', curve) # different values for epoch curve = runClassifier.hyperparamCurveSet(perceptron.Perceptron({}), 'numEpoch', [1,2,3,4,5,6,7,8,9,10], datasets.SentimentData) runClassifier.plotCurve('Perceptron on Sentiment Data (hyperparameter)', curve)
import perceptron import datasets as data import runClassifier as run import pylab from numpy import * #13 curve = run.learningCurveSet(perceptron.Perceptron({'numEpoch': 10}), data.SentimentData) run.plotCurve("Perceptron Learning Curve", curve) #14 h = perceptron.Perceptron({'numEpoch': 10}) h.train(data.TwoDDiagonal.X, data.TwoDAxisAligned.Y) run.plotData(data.TwoDDiagonal.X, data.TwoDAxisAligned.Y) run.plotClassifier(h.weights, h.bias) pylab.show() #15 class DummyData: X = array([[-1, 2], [1, 2], [-1, -2], [1, -2], [-2, 1], [-2, -1], [2, 1], [2, -1]]) Y = array([1, 1, 1, 1, -1, -1, -1, -1]) Xte = X Yte = Y h = perceptron.Perceptron({'numEpoch': 10}) h.train(DummyData.X, DummyData.Y)
import knn import datasets import runClassifier # different values for K curve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':True}), 'K', [1,2,3,4,5,6,7,8,9,10], datasets.DigitData) runClassifier.plotCurve('KNN on Digit Data (hyperparameter K)', curve) # different values for epsilon curve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':False}), 'eps', [1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0], datasets.DigitData) runClassifier.plotCurve('KNN on Digit Data (hyperparameter epsilon)', curve) # learning curve for K = 5 curve = runClassifier.learningCurveSet(knn.KNN({'isKNN':True, 'K':5}), datasets.DigitData) runClassifier.plotCurve('KNN on Digit Data', curve)
from numpy import * from pylab import * import util, datasets, runClassifier, binary import dt import dumbClassifiers X = datasets.TennisData.X Y = datasets.TennisData.Y data = datasets.TennisData # ----- for dt ----- maxD = 1; reload(dt) h = dt.DT({'maxDepth': maxD}) curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 5}), datasets.CFTookAI) curveh = runClassifier.hyperparamCurveSet(dt.DT({'maxDepth': 5}), 'maxDepth', [1,2,3,4,5,6,7,8,9,10], datasets.CFTookAI) runClassifier.plotCurve('', curveh) h = dt.DT({'maxDepth': 10}) h.train(datasets.CFTookAI.X, datasets.CFTookAI.Y); # # the maxDepth that does the best on test set is 5, we reach 1 on the training data with maxDepth=10 # # with maxDepth=5, the features are: with5 = dt.DT({'maxDepth':5}) with5.train(datasets.CFTookAI.X, datasets.CFTookAI.Y) # # CFTookAI.X is (400, 55), the 55 features. # # the top 5 features are (indices): top5With5 = [1,44, 37, 54, 52, 48]; # #which are: bestWith5_courses = datasets.CFTookAI.courseNames[top5With5] # #result: array(['introduction to information technology', # # 'database management systems', 'complexity theory',
import dumbClassifiers as du import datasets as data import runClassifier as run import numpy import dt # Test the ZDecision Tree curve = run.learningCurveSet(dt.DT({'maxDepth': 6}), data.SentimentData) run.plotCurve('Decision Tree Learning Curve on Sediment Data', curve) curve = run.hyperparamCurveSet(dt.DT({}), 'maxDepth', [1, 2, 4, 6, 8, 12, 16], data.SentimentData) run.plotCurve('Decision Tree Hyperparameter Curve on Sediment Data', curve)
import datasets import runClassifier import perceptron def plotCurve(titleString, res): plot(res[0], res[1], 'b-', res[0], res[2], 'r-') legend( ('Train', 'Test') ) xlabel('# of training points') ylabel('Accuracy') title(titleString) # A - learning curve with 5 epochs (dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(perceptron.Perceptron({'numEpoch': 5}), datasets.SentimentData) plotCurve("Learning Curve for Perceptron", [dataSizes, trainAcc, testAcc]) savefig("LC_perceptron.png") close() # B - number of epochs vs train/test accuracy # Removed third return value def trainTest(classifier, X, Y, Xtest, Ytest): """ Train a classifier on data (X,Y) and evaluate on data (Xtest,Ytest). Return a triple of: * Training data accuracy * Test data accuracy * Individual predictions on Xtest.