def k_nearest_neighbour(k): _, trainingset = readdata.read_training_data() _, testcases = readdata.read_test_data() correct = 0 total_per_digit = [0] * 10 correct_per_digit = [0] * 10 errors = open('./errors.txt', 'w') for test in testcases: total_per_digit[int(test[0])] += 1 classification = algorithms.k_nearest_neighbour(k, test, trainingset) if classification == test[0]: correct_per_digit[int(test[0])] += 1 correct += 1 else: errors.write('Misclassified ' + test[0] + ' as ' + classification + '\n') print 'The overall recognition rate is: ' + str( correct / float(len(testcases))) print 'Broken down by digit:' for i, num in enumerate(correct_per_digit): print 'For digit ' + str(i) + ': ' + str( num / float(total_per_digit[i]))
def prepare_training_set(): """ Loads the training set and prepares the data """ raw_training_data = read_training_data() training_set = [] training_solution_set = [] # iterate through all training example for example in raw_training_data: image = example[0] solution = example[1] characters = extract_characters(image) for i in range(len(characters)): # only add classes we have not yet had an example before. A little hacky, but minimizes model size if not solution[i] in training_solution_set: training_set.append(characters[i].ravel()) training_solution_set.append(solution[i]) clf = SVC(C=100, gamma=0.0001) clf.fit(training_set, training_solution_set) joblib.dump(clf, 'model.pkl', compress=9)
def k_nearest_neighbour(k): _, trainingset = readdata.read_training_data() _, testcases = readdata.read_test_data() correct = 0 total_per_digit = [0] * 10 correct_per_digit = [0] * 10 errors = open('./errors.txt', 'w') for test in testcases: total_per_digit[int(test[0])] += 1 classification = algorithms.k_nearest_neighbour(k, test, trainingset) if classification == test[0]: correct_per_digit[int(test[0])] += 1 correct += 1 else: errors.write('Misclassified ' + test[0] + ' as ' + classification + '\n') print 'The overall recognition rate is: ' + str(correct / float(len(testcases))) print 'Broken down by digit:' for i, num in enumerate(correct_per_digit): print 'For digit ' + str(i) + ': ' + str(num / float(total_per_digit[i]))
def linear_regression(): _, trainingset = readdata.read_training_data() _, testcases = readdata.read_test_data() algorithms.linear(testcases, trainingset)
def rnd_for(): _, trainingset = readdata.read_training_data() _, testcases = readdata.read_test_data() algorithms.random_forest(testcases, trainingset)
def naive_bayes(): _, trainingset = readdata.read_training_data() _, testcases = readdata.read_test_data() algorithms.naive_bayes(testcases, trainingset)
def support_vector_machine(): _, trainingset = readdata.read_training_data() _, testcases = readdata.read_test_data() algorithms.support_vector_machine(testcases, trainingset)