def test_numpy_load(self):
        x, y = load_data.load(self.xFile, self.yFile, np=True)
        array_type = type(numpy.array([]))
        self.assertIsInstance(x, array_type)
        self.assertIsInstance(y, array_type)

        self.assertEqual(len(x), len(self.expectedX))
        for i in range(0, len(x)):
            self.assertEqual(len(x[i]), len(self.expectedX[i]))
            for j in range(0, len(x[i])):
                self.assertEqual(x[i, j], self.expectedX[i][j])

        self.assertEqual(len(y), len(self.expectedY))
        for i in range(0, len(y)):
            self.assertEqual(y[i], self.expectedY[i])
Beispiel #2
0
#!/usr/bin/python
'''
Script to run a K - nearest neighbors algorithm and print out the 5-fold cross validation
error for various k
'''
from sklearn import cross_validation, neighbors
from plos_classification.load_data import load
from datetime import datetime

print datetime.now(), 'loading data'
X, y = load('data/xtrain.csv', 'data/ytrain.csv', np=True)
total = len(X)

print datetime.now(), 'iterating over k'
for k in range(2, 11):
    errors = []
    for train_index, test_index in cross_validation.KFold(total, 5):
        model = neighbors.KNeighborsClassifier(k)
        model.fit(X[train_index], y[train_index])
        errors.append(model.score(X[test_index], y[test_index]))
    print 'Average error for k = %s: %s' % (k, sum(errors) / len(errors))
 def test_standard_load(self):
     x, y = load_data.load(self.xFile, self.yFile)
     self.assertEqual(x, self.expectedX)
     self.assertEqual(y, self.expectedY)
#!/usr/bin/python
'''
Script to run a K - nearest neighbors algorithm and print out the 5-fold cross validation
error for various k
'''
from sklearn import cross_validation, neighbors
from plos_classification.load_data import load
from datetime import datetime

print datetime.now(), 'loading data'
X,y = load('data/xtrain.csv','data/ytrain.csv',np=True)
total = len(X)

print datetime.now(), 'iterating over k'
for k in range(2,11):
    errors = []
    for train_index, test_index in cross_validation.KFold(total,5):
        model = neighbors.KNeighborsClassifier(k)
        model.fit(X[train_index],y[train_index])
        errors.append(model.score(X[test_index],y[test_index]))
    print 'Average error for k = %s: %s' % (k,sum(errors)/len(errors))