def evaluate(output=True): ratio = 0.1 k_value = 3 data_set, labels = get_data_set_from_file("dating.dataset") data_set, m, l = knn.auto_normalize(data_set) row_count = data_set.shape[0] row_test_count = int(ratio * row_count) test_data_set = data_set[0:row_test_count,:] classify_fun = knn.gen_classify_fun( [data_set[row_test_count:row_count,:], labels[row_test_count:row_count]], k_value) def is_right(row, label): result = classify_fun(row) if (result != label): if output: print "the classifier came back with: %d, the real answer is: %d" \ % (result, label) return False return True result = [is_right(row.tolist()[0], label) for label, row in zip(labels, test_data_set)] error_count = result.count(False) error_rate = error_count/float(row_test_count) if output: print "the total error rate is: %f (%d / %d)" \ % (error_rate, error_count, row_test_count) return error_rate
def classify_person(percent_game, fly_miles, ice_cream): k_value = 3 labels_str = ['not at all','in small doses', 'in large doses'] input_point = array([percent_game, fly_miles, ice_cream]) data_set, labels = get_data_set_from_file("dating.dataset") data_set, value_ranges, min_values = knn.auto_normalize(data_set) normalized_point = knn.normalize(input_point, value_ranges, min_values) result = knn.classify(normalized_point, data_set, labels, k_value) return labels_str[result-1]