예제 #1
0
def runTest(d):

    print "AlwaysPredictOne: "
    h = du.AlwaysPredictOne({})
    run.trainTestSet(h, d)

    print "AlwaysPredictMostFrequent:"
    h = du.AlwaysPredictMostFrequent({})
    run.trainTestSet(h, d)

    print "FirstFeatureClassifier:"
    h = du.FirstFeatureClassifier({})
    run.trainTestSet(h, d)
예제 #2
0
파일: q1.py 프로젝트: kotarohara/ml2011fall
def main():
    # predictOne = dumbClassifiers.AlwaysPredictOne({})
    # h.train(datasets.TennisData.X, datasets.TennisData.Y)
    # h.predictAll(datasets.TennisData.X)
    # print "Training Accuracy: ", mean((datasets.TennisData.Y > 0) == (h.predictAll(datasets.TennisData.X) > 0) )
    # print "Test Accuracy: ", mean((datasets.TennisData.Yte > 0) == (h.predictAll(datasets.TennisData.Xte) > 0) )
    # runClassifier.trainTestSet(h, datasets.TennisData)
    # predictFrequent = dumbClassifiers.AlwaysPredictMostFrequent({})
    # predictOne = dumbClassifiers.AlwaysPredictOne({})
    # predictFrequent = dumbClassifiers.AlwaysPredictMostFrequent({})
    # runClassifier.trainTestSet(predictOne, datasets.CFTookAI)
    # runClassifier.trainTestSet(predictFrequent, datasets.CFTookAI)
    runClassifier.trainTestSet(dumbClassifiers.FirstFeatureClassifier({}), datasets.TennisData)
    runClassifier.trainTestSet(dumbClassifiers.FirstFeatureClassifier({}), datasets.CFTookAI)
    runClassifier.trainTestSet(dumbClassifiers.FirstFeatureClassifier({}), datasets.CFTookCG)
예제 #3
0
# x = linspace(-5, 5, 500)
# plot(x, f(x), 'b-')
# plot(-0.4538, f(-0.4538), 'r*') #plot the global min
# title('f(x) = sin(x*pi) + x^2/2')

x_global, t = gd.gd(f, derF, 0, 10, 0.2)
x_local, t = gd.gd(f, derF, 1, 10, 0.2)

##########

#For linear
import linear
h = linear.LinearClassifier({'lossFunction': linear.SquaredLoss(), 'lambda': 0, 'numIter': 100, 'stepSize': 0.5})


runClassifier.trainTestSet(h, datasets.TwoDAxisAligned)

X = datasets.TwoDAxisAligned.X
Y = datasets.TwoDAxisAligned.Y

#mlGraphics.plotLinearClassifier(h, X, Y)

h = linear.LinearClassifier({'lossFunction': linear.SquaredLoss(), 'lambda': 10, 'numIter': 100, 'stepSize': 0.5})
runClassifier.trainTestSet(h, datasets.TwoDAxisAligned)


h = linear.LinearClassifier({'lossFunction': linear.SquaredLoss(), 'lambda': 10, 'numIter': 100, 'stepSize': 0.5})
runClassifier.trainTestSet(h, datasets.TwoDDiagonal)

h = linear.LinearClassifier({'lossFunction': linear.HingeLoss(), 'lambda': 1, 'numIter': 100, 'stepSize': 0.5})
runClassifier.trainTestSet(h, datasets.TwoDDiagonal)
예제 #4
0
import runClassifier, datasets, binary, util, dumbClassifiers, perceptron

runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 1}),
                           datasets.TennisData)
print 'Expected: Training accuracy 0.642857, test accuracy 0.666667'

runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 2}),
                           datasets.TennisData)
print 'Expected: Training accuracy 0.857143, test accuracy 1'

runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 1}),
                           datasets.SentimentData)
print 'Expected: Training accuracy 0.835833, test accuracy 0.755'

runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 2}),
                           datasets.SentimentData)
print 'Expected: Training accuracy 0.955, test accuracy 0.7975'
예제 #5
0

    # runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 1}), datasets.DigitData)
    # runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 3}), datasets.DigitData)
    # runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 5}), datasets.DigitData)
    # runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 2}), datasets.TennisData)

    # runClassifier.plotData(datasets.TwoDDiagonal.X, datasets.TwoDDiagonal.Y)
    # h = perceptron.Perceptron({'numEpoch': 200})
    # h.train(datasets.TwoDDiagonal.X, datasets.TwoDDiagonal.Y)
    # runClassifier.plotClassifier(np.array([7.3, 18.9]), 0.0)
    # runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': 2}), datasets.SentimentData)

    # generate curve for wu6 b
    num_epochs = 10
    train_acc_list = []
    test_acc_list = []
    for i in range(num_epochs):
        train_acc, test_acc, _= runClassifier.trainTestSet(perceptron.Perceptron({'numEpoch': i+1}), datasets.SentimentData)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
    X = np.arange(num_epochs) + 1
    # runClassifier.plotCurve('Perceptron Learning Curve for 5 epochs', percetron_curve)
    plt.plot(X,train_acc_list, label="Training Accuracy")
    plt.plot(X,test_acc_list, label="Testing Accuracy")
    plt.title("Train/Test Accuracy VS Number of Epochs")
    plt.xlabel("Number of Epochs")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.savefig(os.path.join("graph_output","wu6_b" +".png"))
    pass
예제 #6
0
def main():
	h = dt.DT({'maxDepth': 5})
	h.train( datasets.TennisData.X, datasets.TennisData.Y )
	print h
	print
	
	runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.TennisData)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 2}), datasets.TennisData)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.TennisData)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.TennisData)	
	print
	
	runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.CFTookCG)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.CFTookCG)
	runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.CFTookCG)
	print 
	
	#curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 5}), datasets.CFTookAI)
	#runClassifier.plotCurve('DT on AI', curve)
	
	curve = runClassifier.hyperparamCurveSet(dt.DT({'maxDepth': 5}), 'maxDepth', [1,2,3,4,5,6,7,8,9,10], datasets.CFTookAI)
	runClassifier.plotCurve( 'DT on AI (hyperparameter)', curve )
	"""
	print "WU4:"
	h = dt.DT({'maxDepth': 3})
	h.train( datasets.CFTookCG.X, datasets.CFTookCG.Y )
	print h

	print "0", datasets.CFDataRatings.courseNames[6], datasets.CFDataRatings.courseIds[6]
	print "1-left", datasets.CFDataRatings.courseNames[34], datasets.CFDataRatings.courseIds[34]
	print "1-left-2-left", datasets.CFDataRatings.courseNames[48], datasets.CFDataRatings.courseIds[48]
	print "1-left-2-right", datasets.CFDataRatings.courseNames[27], datasets.CFDataRatings.courseIds[27]
	print "1-right", datasets.CFDataRatings.courseNames[54], datasets.CFDataRatings.courseIds[54]
	print "1-right-2-left", datasets.CFDataRatings.courseNames[32], datasets.CFDataRatings.courseIds[32]
	print "1-right-2-left", datasets.CFDataRatings.courseNames[53], datasets.CFDataRatings.courseIds[53]
	"""
	return
예제 #7
0
from datasets import *
import datasets,gd,linear,runClassifier
f = linear.LinearClassifier({'lossFunction': linear.HingeLoss(), 'lambda': 1, 'numIter': 100000, 'stepSize': 1})
runClassifier.trainTestSet(f, datasets.TwoDDiagonal)
print(f)


예제 #8
0
import binary
import util
import datasets
import runClassifier
import dt

runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.GenderData)
예제 #9
0
			#Yhat = util.raiseNotDefined()	### TODO: YOUR CODE HERE
			Yhat = dot( w, X.T ) 

			#debug( w )
			#gr   = util.raiseNotDefined()	### TODO: YOUR CODE HERE
			gr = lossFn.lossGradient( X, Y, Yhat ) + lambd * w
			
			return gr

		# run gradient descent; our initial point will just be our
		# weight vector
		# pdb.set_trace()
		w, trajectory = gd(func, grad, self.weights, numIter, stepSize)

		# store the weights and trajectory
		self.weights = w
		self.trajectory = trajectory

if __name__=='__main__':
	import runClassifier, datasets
	
	# for debugging
	import pdb
	from logging import *
	basicConfig( level=DEBUG )
	
	h = LinearClassifier( { 'lossFunction': HingeLoss(), 'lambda': 1, 'numIter': 100, 'stepSize': 0.5 } )
	runClassifier.trainTestSet( h, datasets.TwoDAxisAligned )
	
	
	
예제 #10
0
#
# f = linear.LinearClassifier({'lossFunction': linear.LogisticLoss(), 'lambda': 10, 'numIter': 100, 'stepSize': 0.5})
# runClassifier.trainTestSet(f, datasets.TwoDDiagonal)
# # Training accuracy 0.99, test accuracy 0.86
# print(f)
# # w=array([ 0.29809083,  1.01287561])

# WU5
print("Logistic:")
f = linear.LinearClassifier({
    'lossFunction': linear.LogisticLoss(),
    'lambda': 1,
    'numIter': 100,
    'stepSize': 0.5
})
runClassifier.trainTestSet(f, datasets.WineDataBinary)

large = [
    0.606423261902, 0.689199007903, 0.710890552154, 0.770124769156,
    0.883289753118
]

small = [
    -1.1695212164, -0.765309390643, -0.683593167789, -0.629590728143,
    -0.532191672468
]

# f.weights.sort()
# print(f.weights)

print("Printing WineDataBinary.words:")
예제 #11
0
h = dt.DT({'maxDepth': 2})
h.train(datasets.TennisData.X, datasets.TennisData.Y)
print(h)

h = dt.DT({'maxDepth': 5})
h.train(datasets.TennisData.X, datasets.TennisData.Y)
print(h)

# Sentiment data
h = dt.DT({'maxDepth': 2})
h.train(datasets.SentimentData.X, datasets.SentimentData.Y)
print(h)

print(datasets.SentimentData.words[2428])
print(datasets.SentimentData.words[3842])
print(datasets.SentimentData.words[3892])

runClassifier.trainTestSet(dt.DT({'maxDepth': 1}), datasets.SentimentData)
runClassifier.trainTestSet(dt.DT({'maxDepth': 3}), datasets.SentimentData)
runClassifier.trainTestSet(dt.DT({'maxDepth': 5}), datasets.SentimentData)

curve = runClassifier.learningCurveSet(dt.DT({'maxDepth': 9}),
                                       datasets.SentimentData)
runClassifier.plotCurve('DT on Sentiment Data', curve)

# W2
curve = runClassifier.hyperparamCurveSet(dt.DT({}), 'maxDepth',
                                         [1, 2, 4, 6, 8, 12, 16],
                                         datasets.SentimentData)
runClassifier.plotCurve('DT on Sentiment Data (hyperparameter)', curve)
예제 #12
0
import runClassifier, datasets, binary, util, dumbClassifiers, knn, HighD, KNNDigits

print "runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 0.5}), datasets.TennisData)"
print "Expected: Training accuracy 1, test accuracy 1"
runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 0.5}), datasets.TennisData)

print "runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 1.0}), datasets.TennisData)"
print "Expected: Training accuracy 0.857143, test accuracy 0.833333"
runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 1.0}), datasets.TennisData)

print "runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 2.0}), datasets.TennisData)"
print "Expected: Training accuracy 0.642857, test accuracy 0.5"
runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 2.0}), datasets.TennisData)

print "runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 1}), datasets.TennisData)"
print "Expected: Training accuracy 1, test accuracy 1"
runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 1}), datasets.TennisData)

print "runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 3}), datasets.TennisData)"
print "Expected: Training accuracy 0.785714, test accuracy 0.833333"
runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 3}), datasets.TennisData)

print "runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 5}), datasets.TennisData)"
print "Training accuracy 0.857143, test accuracy 0.833333"
runClassifier.trainTestSet(knn.KNN({'isKNN': True, 'K': 5}), datasets.TennisData)

print "runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 6.0}), datasets.DigitData)"
print "Expected: Training accuracy 0.96, test accuracy 0.64"
runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 6.0}), datasets.DigitData)
print "runClassifier.trainTestSet(knn.KNN({'isKNN': False, 'eps': 8.0}), datasets.DigitData)"
print "Expected: Training accuracy 0.88, test accuracy 0.81"
예제 #13
0
#result:array(['geographical information systems and spatial databases',
      #  'algorithms', 'neural modeling',
      #  'empirical research methods for computer science',
      #  'fundamentals of software testing'], 
      # dtype='|S62')
# for wu 4
# CGDT = dt.DT({'maxDepth': 3})
# CGDT.train(datasets.CFTookCG.X, datasets.CFTookCG.Y)



# ----- for KNN.py -----
import knn
# eps ball
eps = 0.5
runClassifier.trainTestSet(knn.KNN({'isKNN':False, 'eps':eps}), datasets.TennisData)

runClassifier.trainTestSet(knn.KNN({'isKNN':False, 'eps':eps}), datasets.CFTookAI)


 K = 1
 runClassifier.trainTestSet(knn.KNN({'isKNN':True, 'K':K}), datasets.TennisData)

 runClassifier.trainTestSet(knn.KNN({'isKNN':True, 'K':K}), datasets.CFTookAI)

# curves
curveKNN = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':True, 'K':1}), 'K', range(0,10), datasets.CFDataRatings)

runClassifier.plotCurve('KNN on AI: K=[0:10]', curveKNN)

curveEps = runClassifier.hyperparamCurveSet(knn.KNN({'isKNN':False, 'eps':0.5}), 'eps',arange(1,10,0.5), datasets.CFTookAI)
예제 #14
0
import runClassifier, datasets, binary, util, dumbClassifiers

h = dumbClassifiers.AlwaysPredictMostFrequent({})
runClassifier.trainTestSet(h, datasets.TennisData)
h