def k_nearest_neighbour(k):

    _, trainingset = readdata.read_training_data()
    _, testcases = readdata.read_test_data()

    correct = 0
    total_per_digit = [0] * 10
    correct_per_digit = [0] * 10

    errors = open('./errors.txt', 'w')

    for test in testcases:
        total_per_digit[int(test[0])] += 1
        classification = algorithms.k_nearest_neighbour(k, test, trainingset)
        if classification == test[0]:
            correct_per_digit[int(test[0])] += 1
            correct += 1
        else:
            errors.write('Misclassified ' + test[0] + ' as ' + classification +
                         '\n')

    print 'The overall recognition rate is: ' + str(
        correct / float(len(testcases)))
    print 'Broken down by digit:'
    for i, num in enumerate(correct_per_digit):
        print 'For digit ' + str(i) + ': ' + str(
            num / float(total_per_digit[i]))
Esempio n. 2
0
def prepare_training_set():
    """ Loads the training set and prepares the data """
    raw_training_data = read_training_data()
    training_set = []
    training_solution_set = []

    # iterate through all training example
    for example in raw_training_data:
        image = example[0]
        solution = example[1]
        characters = extract_characters(image)

        for i in range(len(characters)):
            # only add classes we have not yet had an example before. A little hacky, but minimizes model size
            if not solution[i] in training_solution_set:
                training_set.append(characters[i].ravel())
                training_solution_set.append(solution[i])

    clf = SVC(C=100, gamma=0.0001)
    clf.fit(training_set, training_solution_set)
    joblib.dump(clf, 'model.pkl', compress=9)
def k_nearest_neighbour(k):

    _, trainingset = readdata.read_training_data()
    _, testcases = readdata.read_test_data()

    correct = 0
    total_per_digit = [0] * 10
    correct_per_digit = [0] * 10

    errors = open('./errors.txt', 'w')
    
    for test in testcases:
        total_per_digit[int(test[0])] += 1
        classification = algorithms.k_nearest_neighbour(k, test, trainingset)
        if classification == test[0]:
            correct_per_digit[int(test[0])] += 1
            correct += 1
        else:
            errors.write('Misclassified ' + test[0] + ' as ' + classification + '\n')

    print 'The overall recognition rate is: ' + str(correct / float(len(testcases)))
    print 'Broken down by digit:'
    for i, num in enumerate(correct_per_digit):
        print 'For digit ' + str(i) + ': ' + str(num / float(total_per_digit[i]))
def linear_regression():
    _, trainingset = readdata.read_training_data()
    _, testcases = readdata.read_test_data()

    algorithms.linear(testcases, trainingset)
def rnd_for():
    _, trainingset = readdata.read_training_data()
    _, testcases = readdata.read_test_data()

    algorithms.random_forest(testcases, trainingset)
def naive_bayes():
    _, trainingset = readdata.read_training_data()
    _, testcases = readdata.read_test_data()

    algorithms.naive_bayes(testcases, trainingset)
def support_vector_machine():
    _, trainingset = readdata.read_training_data()
    _, testcases = readdata.read_test_data()

    algorithms.support_vector_machine(testcases, trainingset)
def linear_regression():
    _, trainingset = readdata.read_training_data()
    _, testcases = readdata.read_test_data()

    algorithms.linear(testcases, trainingset)
def rnd_for():
    _, trainingset = readdata.read_training_data()
    _, testcases = readdata.read_test_data()

    algorithms.random_forest(testcases, trainingset)
Esempio n. 10
0
def naive_bayes():
    _, trainingset = readdata.read_training_data()
    _, testcases = readdata.read_test_data()

    algorithms.naive_bayes(testcases, trainingset)
Esempio n. 11
0
def support_vector_machine():
    _, trainingset = readdata.read_training_data()
    _, testcases = readdata.read_test_data()

    algorithms.support_vector_machine(testcases, trainingset)