def run_knn(): # get 100 rows cases = 300 start = random.randint(0, len(test_data) - (cases + 1)) examples = dict() for k, v in list(test_data.items())[start:start+cases]: examples[k] = v right = 0 wrong = 0 i = 1 correct_certain = [] incorrect_certain = [] positives = 0 negatives = 0 correct_avg_dist = [] incorrect_avg_dist = [] correct_min_dist = [] incorrect_min_dist = [] for key in examples: row = examples[key] actual = row[0] classification = clsfy.knn(row[1:], train_data, 2, 3) if classification[0] == 0: negatives += 1 else: positives += 1 if classification[0] == actual: right += 1 print("Correct classification. Certainty: ", classification[1]) correct_certain.append(classification[1]) correct_min_dist.append(classification[2]) correct_avg_dist.append(classification[3]) else: print("Incorrect classification. Certainty: ", classification[1], ' Actual: ', actual) incorrect_certain.append(classification[1]) incorrect_min_dist.append(classification[2]) incorrect_avg_dist.append(classification[3]) wrong += 1 i += 1 print("\n\nRight: ", right, '/', cases, ' = ', right/cases) print("Wrong: ", wrong, '/', cases, ' = ', wrong/cases) print("\nCorrect Certainty: ", sum(correct_certain)/len(correct_certain)) print("Incorrect Certainty: ", sum(incorrect_certain)/len(incorrect_certain)) print("\nPositives: ", positives, '/', cases) print("Negatives: ", negatives, '/', cases) print("\nCorrect Min. Distance Average: ", sum(correct_min_dist)/len(correct_min_dist)) print("Incorrect Min. Distance Average: ", sum(incorrect_min_dist)/len(incorrect_min_dist)) print("\nCorrect Avg Distance Average: ", sum(correct_avg_dist)/len(correct_avg_dist)) print("Incorrect Avg Distance Average: ", sum(incorrect_avg_dist)/len(incorrect_avg_dist))
def main_loop(neighbors, training_data): while True: print_options() choice = input('Select an option: ') if choice.lower() == 'e': break elif choice.lower() == 'n': x_t = get_iris_data() classification = clsfy.knn(x_t, training_data, 3, neighbors)[0] if classification == IRIS_SETOSA[0]: print('Classification: ', IRIS_SETOSA[1]) elif classification == IRIS_VERSICOLOR[0]: print('Classification: ', IRIS_VERSICOLOR[1]) elif classification == IRIS_VIRGINICA[0]: print('Classification: ', IRIS_VIRGINICA[1]) else: print('Invalid option.') return
train[k] = v test = dict() for k, v in list(data.items())[-100:]: test[k] = v right = 0 wrong = 0 correct_avg_dists = [] correct_min_dists = [] incorrect_min_dists = [] incorrect_avg_dists = [] for key in test: row = test[key] actual_class = row[0] class_data = clsfy.knn(row[1:], train, 3, neighbors) predicted_class = class_data[0] if actual_class != predicted_class: wrong += 1 incorrect_min_dists.append(class_data[1]) incorrect_avg_dists.append(class_data[2]) else: right += 1 correct_min_dists.append(class_data[1]) correct_avg_dists.append(class_data[2]) print('\n\nNeighbors: ', neighbors) print("Correct: ", right, '/', len(test), ' = ', right / len(test)) print("Wrong: ", wrong, '/', len(test), ' = ', wrong / len(test))
# w: write params to json if cv2.waitKey(1) & 0xFF == ord('w'): data = { 'digit_spacing': spacing, 'digit_start': digit_start, 'digit_width': width, 'y_offset_bottom': y_offset_bottom, 'y_offset_top': y_offset_top } print(data) with open('segvision.json', 'w+') as f: json.dump(data, f, sort_keys=True, indent=4) # i: identify image on command if cv2.waitKey(1) & 0xFF == ord('i'): identified = knn(cropped_digits, 7) print(identified) # s: save images if cv2.waitKey(1) & 0xFF == ord('s'): for i, digit in enumerate(cropped_digits): cv2.imwrite("digit" + str(i) + ".jpg", roi) # esc: quit if cv2.waitKey(1) & 0xFF == 27: break crop = False except TypeError: pass
def extract_digits(img): y_offset_top = 2 y_offset_bottom = 3 window_w = WINDOW_MIN h, w = img.shape[:2] vals = [] crop_start = [] # find the coords of the first digit digit_start = find_digit_start(img) digits = [] # loop through enitre screen for x in range(digit_start, w, window_w): digit = Digit(img) # digit candidates y0 = 0 + y_offset_top y1 = h - y_offset_bottom x0 = x x1 = x + window_w ret, digit_crop, percent_white = crop_digits(img, y0, y1, x0, x1) if ret == 0: # digits.append(digit_crop) digit.add_crop(digit_crop) digit.add_coords((y0, y1, x0, x1)) digits.append(digit) # display these digits sep_digits = [] results = [] for digit in digits: # account for variance in digit crops more_digits = digit_variance(digit, 2) sep_digits.append(more_digits) ret = knn(digit.crop, k=5) # print('result:', ret) results.append(ret) date = str(datetime.now()) name = "screen" + date[:10] + "_" + date[20:] + ".jpg" # cv2.imshow('digit', digit.crop) # cv2.waitKey(0) # # go through all the digits and run them through knn # print('digit group size', len(sep_digits)) # for digit_group in sep_digits: # results = [] # print('size:', len(digit_group)) # for digit in digit_group: # img = digit.crop # cv2.imshow('digit', img) # cv2.waitKey(0) # ret = knn(cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), k=5) # # print(ret) # for val in ret: # results.append(val) # # identify the digit # if len(results) > 0: # try: # print('digit is:',statistics.mode(results)) # except statistics.StatisticsError: # print('digit is:',results[0]) # # cv2.imshow('digit', digit.crop) # # cv2.waitKey(0) return results
else: english_preprocessor = EnglishProcessor() positional_index = Positional( preprocessor=english_preprocessor) positional_index.add_docs(train['Text']) tf_idf = TF_IDF(positional_index, english_preprocessor) docvecs = np.transpose(tf_idf.tf_idf_matrix) inferred_X_test = [] for doc in test['Text']: _, vec = tf_idf.search(doc, True) inferred_X_test.append(vec.T) k = int(input("Enter the k value")) knn_predict = knn(docvecs, inferred_X_test, y_train, k) knn_precision = precision_score(y_test, knn_predict, average='micro') knn_recall = recall_score(y_test, knn_predict, average='micro') knn_f1 = f1_score(y_test, knn_predict, average='micro') knn_f1_manually = (2 * knn_precision * knn_recall) / (knn_precision + knn_recall) knn_accuracy = accuracy_score(y_test, knn_predict) print('knn result: ', knn_predict) print('knn precision: ', knn_precision) print('knn recall: ', knn_recall) print('knn F1 score: ', knn_f1) print('knn F1 manually: ', knn_f1_manually) print('knn accuracy: ', knn_accuracy) print('metrics:')