예제 #1
0
def evaluate(output=True):
    ratio = 0.1
    k_value = 3
    data_set, labels = get_data_set_from_file("dating.dataset")
    data_set, m, l = knn.auto_normalize(data_set)
    row_count = data_set.shape[0]
    row_test_count = int(ratio * row_count)
    test_data_set = data_set[0:row_test_count,:]
    classify_fun = knn.gen_classify_fun(
        [data_set[row_test_count:row_count,:],
        labels[row_test_count:row_count]], k_value)
    def is_right(row, label):
        result = classify_fun(row)
        if (result != label):
            if output:
                print "the classifier came back with: %d, the real answer is: %d" \
                    % (result, label)
            return False
        return True
    result = [is_right(row.tolist()[0], label) 
        for label, row in zip(labels, test_data_set)]
    error_count = result.count(False)
    error_rate = error_count/float(row_test_count)
    if output:
        print "the total error rate is: %f (%d / %d)" \
            % (error_rate, error_count, row_test_count)
    return error_rate
예제 #2
0
def classify_person(percent_game, fly_miles, ice_cream):
    k_value = 3
    labels_str = ['not at all','in small doses', 'in large doses']
    input_point = array([percent_game, fly_miles, ice_cream])
    data_set, labels = get_data_set_from_file("dating.dataset")
    data_set, value_ranges, min_values = knn.auto_normalize(data_set)
    normalized_point = knn.normalize(input_point, value_ranges, min_values)
    result = knn.classify(normalized_point, data_set, labels, k_value)
    return labels_str[result-1]