Example #1
0
import dumbClassifiers as du
import datasets as data
import runClassifier as run
import numpy
import knn

#9
curve = run.learningCurveSet(knn.KNN({'isKNN':True,'K':5}),data.DigitData)
run.plotCurve('K-Nearest Neighbor on 5-NN; DIgitsData',curve)

#11
curve = run.hyperparamCurveSet(knn.KNN({'isKNN':True}), 'K', [1,2,3,4,5,6,7,8,9,10],data.DigitData)
run.plotCurve('Hyperparameter Curve on DigitsData',curve)

#12
arr = []
counter = 1
while counter < 20:
    arr.append(counter)
    counter += .5

curve = run.hyperparamCurveSet(knn.KNN({'isKNN':False}), 'eps', arr ,data.DigitData)
run.plotCurve('Hyperparameter Curve on DigitsData',curve)
Example #2
0
from numpy import *
from pylab import *

import util
import binary
import datasets
import knn
import runClassifier

# Learning curves for K = 1, 2, 10, 20

for i in [1, 2, 10, 20] :
	(dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': True, 'K': i}), datasets.DigitData)
	runClassifier.plotCurve("Learning Curve for knn, K=%d" % (i), [dataSizes, trainAcc, testAcc])
	ylim([.2,1.1])
	savefig("LC_k%d.png" % (i) )
	close()

# Learning curves for \epsilon = 5.0, 10.0, 15.0, 20.0
for i in [5.0, 10.0, 15.0, 20.0] :
	(dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': False, 'eps': i}), datasets.DigitData)
	runClassifier.plotCurve("Learning Curve for knn, eps=%f" % (i), [dataSizes, trainAcc, testAcc])
	ylim([.45,1.1])
	savefig("LC_eps%f.png" % (i) )
	close()

# Learning curve for K = 5
(dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(knn.KNN({'isKNN': True, 'K': 5}), datasets.DigitData)

runClassifier.plotCurve("Learning Curve for knn, K=5", [dataSizes, trainAcc, testAcc])
savefig("LC_k5.png")
Example #3
0
import datasets as data
import runClassifier as run
import numpy
import perceptron

curve = run.learningCurveSet(perceptron.Perceptron({'numEpoch': 10}),
                             data.TwoDDiagonal)
run.plotCurve("Perceptron Learning Curve on Sentiment Data", curve)
Example #4
0
h = dt.DT({'maxDepth': 2})
h.train(datasets.TennisData.X, datasets.TennisData.Y)
print(h)

h = dt.DT({'maxDepth': 5})
h.train(datasets.TennisData.X, datasets.TennisData.Y)
print(h)

# Sentiment data
h = dt.DT({'maxDepth': 2})
h.train(datasets.SentimentData.X, datasets.SentimentData.Y)
print(h)

print(datasets.SentimentData.words[2428])
print(datasets.SentimentData.words[3842])
print(datasets.SentimentData.words[3892])

runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.SentimentData)
runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.SentimentData)
runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.SentimentData)

curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 9}),
                                       datasets.SentimentData)
runClassifier.plotCurve('DT on Sentiment Data', curve)

# W2
curve = runClassifier.hyperparamCurveSet(dt.DT({}), 'maxDepth',
                                         [1, 2, 4, 6, 8, 12, 16],
                                         datasets.SentimentData)
runClassifier.plotCurve('DT on Sentiment Data (hyperparameter)', curve)
import perceptron
import datasets
import runClassifier

# learning curve for epoch = 5
curve = runClassifier.learningCurveSet(perceptron.Perceptron({'numEpoch': 5}), datasets.SentimentData)
runClassifier.plotCurve('Perceptron on Sentiment Data', curve)

# different values for epoch
curve = runClassifier.hyperparamCurveSet(perceptron.Perceptron({}), 'numEpoch', [1,2,3,4,5,6,7,8,9,10], datasets.SentimentData)
runClassifier.plotCurve('Perceptron on Sentiment Data (hyperparameter)', curve)
Example #6
0
import perceptron
import datasets as data
import runClassifier as run
import pylab
from numpy import *

#13
curve = run.learningCurveSet(perceptron.Perceptron({'numEpoch': 10}),
                             data.SentimentData)
run.plotCurve("Perceptron Learning Curve", curve)

#14
h = perceptron.Perceptron({'numEpoch': 10})
h.train(data.TwoDDiagonal.X, data.TwoDAxisAligned.Y)
run.plotData(data.TwoDDiagonal.X, data.TwoDAxisAligned.Y)
run.plotClassifier(h.weights, h.bias)
pylab.show()


#15
class DummyData:
    X = array([[-1, 2], [1, 2], [-1, -2], [1, -2], [-2, 1], [-2, -1], [2, 1],
               [2, -1]])
    Y = array([1, 1, 1, 1, -1, -1, -1, -1])

    Xte = X
    Yte = Y


h = perceptron.Perceptron({'numEpoch': 10})
h.train(DummyData.X, DummyData.Y)
Example #7
0
import knn
import datasets
import runClassifier

# different values for K
curve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':True}), 'K', [1,2,3,4,5,6,7,8,9,10], datasets.DigitData)
runClassifier.plotCurve('KNN on Digit Data (hyperparameter K)', curve)

# different values for epsilon
curve = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':False}), 'eps', [1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0], datasets.DigitData)
runClassifier.plotCurve('KNN on Digit Data (hyperparameter epsilon)', curve)

# learning curve for K = 5
curve = runClassifier.learningCurveSet(knn.KNN({'isKNN':True, 'K':5}), datasets.DigitData)
runClassifier.plotCurve('KNN on Digit Data', curve)
Example #8
0
from numpy import *
from pylab import *
import util, datasets, runClassifier, binary
import dt
import dumbClassifiers

X = datasets.TennisData.X
Y = datasets.TennisData.Y
data = datasets.TennisData

# ----- for dt -----
maxD = 1;
reload(dt)
h = dt.DT({'maxDepth': maxD})
curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 5}), datasets.CFTookAI)
curveh = runClassifier.hyperparamCurveSet(dt.DT({'maxDepth': 5}), 'maxDepth', [1,2,3,4,5,6,7,8,9,10], datasets.CFTookAI)
runClassifier.plotCurve('', curveh)

h = dt.DT({'maxDepth': 10})
h.train(datasets.CFTookAI.X, datasets.CFTookAI.Y);
# # the maxDepth that does the best on test set is 5, we reach 1 on the training data with maxDepth=10
# # with maxDepth=5, the features are:
 with5 = dt.DT({'maxDepth':5})
 with5.train(datasets.CFTookAI.X, datasets.CFTookAI.Y)
# # CFTookAI.X is (400, 55), the 55 features.
# # the top 5 features are (indices):
 top5With5 = [1,44, 37, 54, 52, 48];
# #which are:
 bestWith5_courses = datasets.CFTookAI.courseNames[top5With5]
# #result: array(['introduction to information technology',
#       #  'database management systems', 'complexity theory',
Example #9
0
import dumbClassifiers as du
import datasets as data
import runClassifier as run
import numpy
import dt

# Test the ZDecision Tree
curve = run.learningCurveSet(dt.DT({'maxDepth': 6}), data.SentimentData)
run.plotCurve('Decision Tree Learning Curve on Sediment Data', curve)

curve = run.hyperparamCurveSet(dt.DT({}), 'maxDepth', [1, 2, 4, 6, 8, 12, 16],
                               data.SentimentData)
run.plotCurve('Decision Tree Hyperparameter Curve on Sediment Data', curve)
Example #10
0
import datasets
import runClassifier
import perceptron


def plotCurve(titleString, res):
    plot(res[0], res[1], 'b-',
         res[0], res[2], 'r-')
    legend( ('Train', 'Test') )
    xlabel('# of training points')
    ylabel('Accuracy')
    title(titleString)
    
# A - learning curve with 5 epochs    

(dataSizes, trainAcc, testAcc) = runClassifier.learningCurveSet(perceptron.Perceptron({'numEpoch': 5}), datasets.SentimentData)
plotCurve("Learning Curve for Perceptron", [dataSizes, trainAcc, testAcc])
savefig("LC_perceptron.png")
close()


# B - number of epochs vs train/test accuracy

# Removed third return value
def trainTest(classifier, X, Y, Xtest, Ytest):
	"""
	Train a classifier on data (X,Y) and evaluate on
	data (Xtest,Ytest).  Return a triple of:
	  * Training data accuracy
	  * Test data accuracy
	  * Individual predictions on Xtest.