Esempio n. 1
0
def run_knn():
	# get 100 rows 
	cases = 300
	start = random.randint(0, len(test_data) - (cases + 1))
	examples = dict()
	for k, v in list(test_data.items())[start:start+cases]:
		examples[k] = v

	right = 0
	wrong = 0
	i = 1
	correct_certain = []
	incorrect_certain = []
	positives = 0
	negatives = 0
	correct_avg_dist = []
	incorrect_avg_dist = []
	correct_min_dist = []
	incorrect_min_dist = []

	for key in examples:
		row = examples[key]
		actual = row[0]
		classification = clsfy.knn(row[1:], train_data, 2, 3)
		if classification[0] == 0:
			negatives += 1
		else:
			positives += 1
		if  classification[0] == actual:
			right += 1
			print("Correct classification. Certainty: ", classification[1])
			correct_certain.append(classification[1])
			correct_min_dist.append(classification[2])
			correct_avg_dist.append(classification[3])
		else:
			print("Incorrect classification. Certainty: ", classification[1], ' Actual: ', actual)
			incorrect_certain.append(classification[1])
			incorrect_min_dist.append(classification[2])
			incorrect_avg_dist.append(classification[3])
			wrong += 1
		i += 1
	
	print("\n\nRight: ", right, '/', cases, ' = ', right/cases)
	print("Wrong: ", wrong, '/', cases, ' = ', wrong/cases)

	print("\nCorrect Certainty: ", sum(correct_certain)/len(correct_certain))
	print("Incorrect Certainty: ", sum(incorrect_certain)/len(incorrect_certain))

	print("\nPositives: ", positives, '/', cases)
	print("Negatives: ", negatives, '/', cases)

	print("\nCorrect Min. Distance Average: ", sum(correct_min_dist)/len(correct_min_dist))
	print("Incorrect Min. Distance Average: ", sum(incorrect_min_dist)/len(incorrect_min_dist))

	print("\nCorrect Avg Distance Average: ", sum(correct_avg_dist)/len(correct_avg_dist))
	print("Incorrect Avg Distance Average: ", sum(incorrect_avg_dist)/len(incorrect_avg_dist))
Esempio n. 2
0
def main_loop(neighbors, training_data):
    while True:
        print_options()
        choice = input('Select an option: ')
        if choice.lower() == 'e':
            break
        elif choice.lower() == 'n':
            x_t = get_iris_data()
            classification = clsfy.knn(x_t, training_data, 3, neighbors)[0]
            if classification == IRIS_SETOSA[0]:
                print('Classification: ', IRIS_SETOSA[1])
            elif classification == IRIS_VERSICOLOR[0]:
                print('Classification: ', IRIS_VERSICOLOR[1])
            elif classification == IRIS_VIRGINICA[0]:
                print('Classification: ', IRIS_VIRGINICA[1])
        else:
            print('Invalid option.')
    return
Esempio n. 3
0
        train[k] = v

    test = dict()
    for k, v in list(data.items())[-100:]:
        test[k] = v

    right = 0
    wrong = 0
    correct_avg_dists = []
    correct_min_dists = []
    incorrect_min_dists = []
    incorrect_avg_dists = []
    for key in test:
        row = test[key]
        actual_class = row[0]
        class_data = clsfy.knn(row[1:], train, 3, neighbors)
        predicted_class = class_data[0]
        if actual_class != predicted_class:
            wrong += 1
            incorrect_min_dists.append(class_data[1])
            incorrect_avg_dists.append(class_data[2])
        else:
            right += 1
            correct_min_dists.append(class_data[1])
            correct_avg_dists.append(class_data[2])

    print('\n\nNeighbors: ', neighbors)

    print("Correct: ", right, '/', len(test), ' = ', right / len(test))
    print("Wrong: ", wrong, '/', len(test), ' = ', wrong / len(test))
Esempio n. 4
0
                # w: write params to json
                if cv2.waitKey(1) & 0xFF == ord('w'):
                    data = {
                        'digit_spacing': spacing,
                        'digit_start': digit_start,
                        'digit_width': width,
                        'y_offset_bottom': y_offset_bottom,
                        'y_offset_top': y_offset_top
                    }
                    print(data)
                    with open('segvision.json', 'w+') as f:
                        json.dump(data, f, sort_keys=True, indent=4)

                # i: identify image on command
                if cv2.waitKey(1) & 0xFF == ord('i'):
                    identified = knn(cropped_digits, 7)
                    print(identified)

                # s: save images
                if cv2.waitKey(1) & 0xFF == ord('s'):
                    for i, digit in enumerate(cropped_digits):
                        cv2.imwrite("digit" + str(i) + ".jpg", roi)

                # esc: quit
                if cv2.waitKey(1) & 0xFF == 27:
                    break

                crop = False

    except TypeError:
        pass
Esempio n. 5
0
def extract_digits(img):

    y_offset_top = 2
    y_offset_bottom = 3
    window_w = WINDOW_MIN
    h, w = img.shape[:2]

    vals = []
    crop_start = []
    # find the coords of the first digit
    digit_start = find_digit_start(img)

    digits = []

    # loop through enitre screen
    for x in range(digit_start, w, window_w):
        digit = Digit(img)
        # digit candidates
        y0 = 0 + y_offset_top
        y1 = h - y_offset_bottom
        x0 = x
        x1 = x + window_w
        ret, digit_crop, percent_white = crop_digits(img, y0, y1, x0, x1)
        if ret == 0:
            # digits.append(digit_crop)
            digit.add_crop(digit_crop)
            digit.add_coords((y0, y1, x0, x1))
            digits.append(digit)

    # display these digits
    sep_digits = []
    results = []
    for digit in digits:
        # account for variance in digit crops
        more_digits = digit_variance(digit, 2)
        sep_digits.append(more_digits)
        ret = knn(digit.crop, k=5)
        # print('result:', ret)
        results.append(ret)
        date = str(datetime.now())
        name = "screen" + date[:10] + "_" + date[20:] + ".jpg"
        # cv2.imshow('digit', digit.crop)
        # cv2.waitKey(0)

    # # go through all the digits and run them through knn
    # print('digit group size', len(sep_digits))
    # for digit_group in sep_digits:
    #     results = []
    #     print('size:', len(digit_group))
    #     for digit in digit_group:
    #         img = digit.crop
    #         cv2.imshow('digit', img)
    #         cv2.waitKey(0)
    #         ret = knn(cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), k=5)
    #         # print(ret)
    #         for val in ret:
    #             results.append(val)

    #     # identify the digit
    #     if len(results) > 0:
    #         try:
    #             print('digit is:',statistics.mode(results))
    #         except statistics.StatisticsError:
    #             print('digit is:',results[0])
    #     # cv2.imshow('digit', digit.crop)
    #     # cv2.waitKey(0)

    return results
Esempio n. 6
0
            else:
                english_preprocessor = EnglishProcessor()
                positional_index = Positional(
                    preprocessor=english_preprocessor)
                positional_index.add_docs(train['Text'])
                tf_idf = TF_IDF(positional_index, english_preprocessor)
                docvecs = np.transpose(tf_idf.tf_idf_matrix)

                inferred_X_test = []
                for doc in test['Text']:
                    _, vec = tf_idf.search(doc, True)
                    inferred_X_test.append(vec.T)

            k = int(input("Enter the k value"))
            knn_predict = knn(docvecs, inferred_X_test, y_train, k)
            knn_precision = precision_score(y_test,
                                            knn_predict,
                                            average='micro')
            knn_recall = recall_score(y_test, knn_predict, average='micro')
            knn_f1 = f1_score(y_test, knn_predict, average='micro')
            knn_f1_manually = (2 * knn_precision *
                               knn_recall) / (knn_precision + knn_recall)
            knn_accuracy = accuracy_score(y_test, knn_predict)
            print('knn result: ', knn_predict)
            print('knn precision: ', knn_precision)
            print('knn recall: ', knn_recall)
            print('knn F1 score: ', knn_f1)
            print('knn F1 manually: ', knn_f1_manually)
            print('knn accuracy: ', knn_accuracy)
            print('metrics:')