def runTest(d): print "AlwaysPredictOne: " h = du.AlwaysPredictOne({}) run.trainTestSet(h, d) print "AlwaysPredictMostFrequent:" h = du.AlwaysPredictMostFrequent({}) run.trainTestSet(h, d) print "FirstFeatureClassifier:" h = du.FirstFeatureClassifier({}) run.trainTestSet(h, d)
def main(): # predictOne = dumbClassifiers.AlwaysPredictOne({}) # h.train(datasets.TennisData.X, datasets.TennisData.Y) # h.predictAll(datasets.TennisData.X) # print "Training Accuracy: ", mean((datasets.TennisData.Y > 0) == (h.predictAll(datasets.TennisData.X) > 0) ) # print "Test Accuracy: ", mean((datasets.TennisData.Yte > 0) == (h.predictAll(datasets.TennisData.Xte) > 0) ) # runClassifier.trainTestSet(h, datasets.TennisData) # predictFrequent = dumbClassifiers.AlwaysPredictMostFrequent({}) # predictOne = dumbClassifiers.AlwaysPredictOne({}) # predictFrequent = dumbClassifiers.AlwaysPredictMostFrequent({}) # runClassifier.trainTestSet(predictOne, datasets.CFTookAI) # runClassifier.trainTestSet(predictFrequent, datasets.CFTookAI) runClassifier.trainTestSet(dumbClassifiers.FirstFeatureClassifier({}), datasets.TennisData) runClassifier.trainTestSet(dumbClassifiers.FirstFeatureClassifier({}), datasets.CFTookAI) runClassifier.trainTestSet(dumbClassifiers.FirstFeatureClassifier({}), datasets.CFTookCG)
# x = linspace(-5, 5, 500) # plot(x, f(x), 'b-') # plot(-0.4538, f(-0.4538), 'r*') #plot the global min # title('f(x) = sin(x*pi) + x^2/2') x_global, t = gd.gd(f, derF, 0, 10, 0.2) x_local, t = gd.gd(f, derF, 1, 10, 0.2) ########## #For linear import linear h = linear.LinearClassifier({'lossFunction': linear.SquaredLoss(), 'lambda': 0, 'numIter': 100, 'stepSize': 0.5}) runClassifier.trainTestSet(h, datasets.TwoDAxisAligned) X = datasets.TwoDAxisAligned.X Y = datasets.TwoDAxisAligned.Y #mlGraphics.plotLinearClassifier(h, X, Y) h = linear.LinearClassifier({'lossFunction': linear.SquaredLoss(), 'lambda': 10, 'numIter': 100, 'stepSize': 0.5}) runClassifier.trainTestSet(h, datasets.TwoDAxisAligned) h = linear.LinearClassifier({'lossFunction': linear.SquaredLoss(), 'lambda': 10, 'numIter': 100, 'stepSize': 0.5}) runClassifier.trainTestSet(h, datasets.TwoDDiagonal) h = linear.LinearClassifier({'lossFunction': linear.HingeLoss(), 'lambda': 1, 'numIter': 100, 'stepSize': 0.5}) runClassifier.trainTestSet(h, datasets.TwoDDiagonal)
import runClassifier, datasets, binary, util, dumbClassifiers, perceptron runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 1}), datasets.TennisData) print 'Expected: Training accuracy 0.642857, test accuracy 0.666667' runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 2}), datasets.TennisData) print 'Expected: Training accuracy 0.857143, test accuracy 1' runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 1}), datasets.SentimentData) print 'Expected: Training accuracy 0.835833, test accuracy 0.755' runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 2}), datasets.SentimentData) print 'Expected: Training accuracy 0.955, test accuracy 0.7975'
# runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 1}), datasets.DigitData) # runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 3}), datasets.DigitData) # runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 5}), datasets.DigitData) # runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 2}), datasets.TennisData) # runClassifier.plotData(datasets.TwoDDiagonal.X, datasets.TwoDDiagonal.Y) # h = perceptron.Perceptron({'numEpoch': 200}) # h.train(datasets.TwoDDiagonal.X, datasets.TwoDDiagonal.Y) # runClassifier.plotClassifier(np.array([7.3, 18.9]), 0.0) # runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 2}), datasets.SentimentData) # generate curve for wu6 b num_epochs = 10 train_acc_list = [] test_acc_list = [] for i in range(num_epochs): train_acc, test_acc, _= runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': i+1}), datasets.SentimentData) train_acc_list.append(train_acc) test_acc_list.append(test_acc) X = np.arange(num_epochs) + 1 # runClassifier.plotCurve('Perceptron Learning Curve for 5 epochs', percetron_curve) plt.plot(X,train_acc_list, label="Training Accuracy") plt.plot(X,test_acc_list, label="Testing Accuracy") plt.title("Train/Test Accuracy VS Number of Epochs") plt.xlabel("Number of Epochs") plt.ylabel("Accuracy") plt.legend() plt.savefig(os.path.join("graph_output","wu6_b" +".png")) pass
def main(): h = dt.DT({'maxDepth': 5}) h.train( datasets.TennisData.X, datasets.TennisData.Y ) print h print runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.TennisData) runClassifier.trainTestSet(dt.DT({'maxDepth': 2}), datasets.TennisData) runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.TennisData) runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.TennisData) print runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.CFTookCG) runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.CFTookCG) runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.CFTookCG) print #curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 5}), datasets.CFTookAI) #runClassifier.plotCurve('DT on AI', curve) curve = runClassifier.hyperparamCurveSet(dt.DT({'maxDepth': 5}), 'maxDepth', [1,2,3,4,5,6,7,8,9,10], datasets.CFTookAI) runClassifier.plotCurve( 'DT on AI (hyperparameter)', curve ) """ print "WU4:" h = dt.DT({'maxDepth': 3}) h.train( datasets.CFTookCG.X, datasets.CFTookCG.Y ) print h print "0", datasets.CFDataRatings.courseNames[6], datasets.CFDataRatings.courseIds[6] print "1-left", datasets.CFDataRatings.courseNames[34], datasets.CFDataRatings.courseIds[34] print "1-left-2-left", datasets.CFDataRatings.courseNames[48], datasets.CFDataRatings.courseIds[48] print "1-left-2-right", datasets.CFDataRatings.courseNames[27], datasets.CFDataRatings.courseIds[27] print "1-right", datasets.CFDataRatings.courseNames[54], datasets.CFDataRatings.courseIds[54] print "1-right-2-left", datasets.CFDataRatings.courseNames[32], datasets.CFDataRatings.courseIds[32] print "1-right-2-left", datasets.CFDataRatings.courseNames[53], datasets.CFDataRatings.courseIds[53] """ return
from datasets import * import datasets,gd,linear,runClassifier f = linear.LinearClassifier({'lossFunction': linear.HingeLoss(), 'lambda': 1, 'numIter': 100000, 'stepSize': 1}) runClassifier.trainTestSet(f, datasets.TwoDDiagonal) print(f)
import binary import util import datasets import runClassifier import dt runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.GenderData)
#Yhat = util.raiseNotDefined() ### TODO: YOUR CODE HERE Yhat = dot( w, X.T ) #debug( w ) #gr = util.raiseNotDefined() ### TODO: YOUR CODE HERE gr = lossFn.lossGradient( X, Y, Yhat ) + lambd * w return gr # run gradient descent; our initial point will just be our # weight vector # pdb.set_trace() w, trajectory = gd(func, grad, self.weights, numIter, stepSize) # store the weights and trajectory self.weights = w self.trajectory = trajectory if __name__=='__main__': import runClassifier, datasets # for debugging import pdb from logging import * basicConfig( level=DEBUG ) h = LinearClassifier( { 'lossFunction': HingeLoss(), 'lambda': 1, 'numIter': 100, 'stepSize': 0.5 } ) runClassifier.trainTestSet( h, datasets.TwoDAxisAligned )
# # f = linear.LinearClassifier({'lossFunction': linear.LogisticLoss(), 'lambda': 10, 'numIter': 100, 'stepSize': 0.5}) # runClassifier.trainTestSet(f, datasets.TwoDDiagonal) # # Training accuracy 0.99, test accuracy 0.86 # print(f) # # w=array([ 0.29809083, 1.01287561]) # WU5 print("Logistic:") f = linear.LinearClassifier({ 'lossFunction': linear.LogisticLoss(), 'lambda': 1, 'numIter': 100, 'stepSize': 0.5 }) runClassifier.trainTestSet(f, datasets.WineDataBinary) large = [ 0.606423261902, 0.689199007903, 0.710890552154, 0.770124769156, 0.883289753118 ] small = [ -1.1695212164, -0.765309390643, -0.683593167789, -0.629590728143, -0.532191672468 ] # f.weights.sort() # print(f.weights) print("Printing WineDataBinary.words:")
h = dt.DT({'maxDepth': 2}) h.train(datasets.TennisData.X, datasets.TennisData.Y) print(h) h = dt.DT({'maxDepth': 5}) h.train(datasets.TennisData.X, datasets.TennisData.Y) print(h) # Sentiment data h = dt.DT({'maxDepth': 2}) h.train(datasets.SentimentData.X, datasets.SentimentData.Y) print(h) print(datasets.SentimentData.words[2428]) print(datasets.SentimentData.words[3842]) print(datasets.SentimentData.words[3892]) runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.SentimentData) runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.SentimentData) runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.SentimentData) curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 9}), datasets.SentimentData) runClassifier.plotCurve('DT on Sentiment Data', curve) # W2 curve = runClassifier.hyperparamCurveSet(dt.DT({}), 'maxDepth', [1, 2, 4, 6, 8, 12, 16], datasets.SentimentData) runClassifier.plotCurve('DT on Sentiment Data (hyperparameter)', curve)
import runClassifier, datasets, binary, util, dumbClassifiers, knn, HighD, KNNDigits print "runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 0.5}), datasets.TennisData)" print "Expected: Training accuracy 1, test accuracy 1" runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 0.5}), datasets.TennisData) print "runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 1.0}), datasets.TennisData)" print "Expected: Training accuracy 0.857143, test accuracy 0.833333" runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 1.0}), datasets.TennisData) print "runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 2.0}), datasets.TennisData)" print "Expected: Training accuracy 0.642857, test accuracy 0.5" runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 2.0}), datasets.TennisData) print "runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 1}), datasets.TennisData)" print "Expected: Training accuracy 1, test accuracy 1" runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 1}), datasets.TennisData) print "runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 3}), datasets.TennisData)" print "Expected: Training accuracy 0.785714, test accuracy 0.833333" runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 3}), datasets.TennisData) print "runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 5}), datasets.TennisData)" print "Training accuracy 0.857143, test accuracy 0.833333" runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 5}), datasets.TennisData) print "runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 6.0}), datasets.DigitData)" print "Expected: Training accuracy 0.96, test accuracy 0.64" runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 6.0}), datasets.DigitData) print "runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 8.0}), datasets.DigitData)" print "Expected: Training accuracy 0.88, test accuracy 0.81"
#result:array(['geographical information systems and spatial databases', # 'algorithms', 'neural modeling', # 'empirical research methods for computer science', # 'fundamentals of software testing'], # dtype='|S62') # for wu 4 # CGDT = dt.DT({'maxDepth': 3}) # CGDT.train(datasets.CFTookCG.X, datasets.CFTookCG.Y) # ----- for KNN.py ----- import knn # eps ball eps = 0.5 runClassifier.trainTestSet(knn.KNN({'isKNN':False, 'eps':eps}), datasets.TennisData) runClassifier.trainTestSet(knn.KNN({'isKNN':False, 'eps':eps}), datasets.CFTookAI) K = 1 runClassifier.trainTestSet(knn.KNN({'isKNN':True, 'K':K}), datasets.TennisData) runClassifier.trainTestSet(knn.KNN({'isKNN':True, 'K':K}), datasets.CFTookAI) # curves curveKNN = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':True, 'K':1}), 'K', range(0,10), datasets.CFDataRatings) runClassifier.plotCurve('KNN on AI: K=[0:10]', curveKNN) curveEps = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':False, 'eps':0.5}), 'eps',arange(1,10,0.5), datasets.CFTookAI)
import runClassifier, datasets, binary, util, dumbClassifiers h = dumbClassifiers.AlwaysPredictMostFrequent({}) runClassifier.trainTestSet(h, datasets.TennisData) h