def test_numpy_load(self): x, y = load_data.load(self.xFile, self.yFile, np=True) array_type = type(numpy.array([])) self.assertIsInstance(x, array_type) self.assertIsInstance(y, array_type) self.assertEqual(len(x), len(self.expectedX)) for i in range(0, len(x)): self.assertEqual(len(x[i]), len(self.expectedX[i])) for j in range(0, len(x[i])): self.assertEqual(x[i, j], self.expectedX[i][j]) self.assertEqual(len(y), len(self.expectedY)) for i in range(0, len(y)): self.assertEqual(y[i], self.expectedY[i])
#!/usr/bin/python ''' Script to run a K - nearest neighbors algorithm and print out the 5-fold cross validation error for various k ''' from sklearn import cross_validation, neighbors from plos_classification.load_data import load from datetime import datetime print datetime.now(), 'loading data' X, y = load('data/xtrain.csv', 'data/ytrain.csv', np=True) total = len(X) print datetime.now(), 'iterating over k' for k in range(2, 11): errors = [] for train_index, test_index in cross_validation.KFold(total, 5): model = neighbors.KNeighborsClassifier(k) model.fit(X[train_index], y[train_index]) errors.append(model.score(X[test_index], y[test_index])) print 'Average error for k = %s: %s' % (k, sum(errors) / len(errors))
def test_standard_load(self): x, y = load_data.load(self.xFile, self.yFile) self.assertEqual(x, self.expectedX) self.assertEqual(y, self.expectedY)
#!/usr/bin/python ''' Script to run a K - nearest neighbors algorithm and print out the 5-fold cross validation error for various k ''' from sklearn import cross_validation, neighbors from plos_classification.load_data import load from datetime import datetime print datetime.now(), 'loading data' X,y = load('data/xtrain.csv','data/ytrain.csv',np=True) total = len(X) print datetime.now(), 'iterating over k' for k in range(2,11): errors = [] for train_index, test_index in cross_validation.KFold(total,5): model = neighbors.KNeighborsClassifier(k) model.fit(X[train_index],y[train_index]) errors.append(model.score(X[test_index],y[test_index])) print 'Average error for k = %s: %s' % (k,sum(errors)/len(errors))