Esempio n. 1
0
def evaluate(output=True):
    ratio = 0.1
    k_value = 3
    data_set, labels = get_data_set_from_file("dating.dataset")
    data_set, m, l = knn.auto_normalize(data_set)
    row_count = data_set.shape[0]
    row_test_count = int(ratio * row_count)
    test_data_set = data_set[0:row_test_count,:]
    classify_fun = knn.gen_classify_fun(
        [data_set[row_test_count:row_count,:],
        labels[row_test_count:row_count]], k_value)
    def is_right(row, label):
        result = classify_fun(row)
        if (result != label):
            if output:
                print "the classifier came back with: %d, the real answer is: %d" \
                    % (result, label)
            return False
        return True
    result = [is_right(row.tolist()[0], label) 
        for label, row in zip(labels, test_data_set)]
    error_count = result.count(False)
    error_rate = error_count/float(row_test_count)
    if output:
        print "the total error rate is: %f (%d / %d)" \
            % (error_rate, error_count, row_test_count)
    return error_rate
def run_evaluate(output=True):
    k_value = 3
    classify_fun = knn.gen_classify_fun(
        get_handwriting_dataset('training_digits'), k_value)
    test_pathname = 'test_digits'
    test_dataset, test_labels = get_handwriting_dataset(
        test_pathname)

    error_list, result = knn.evaluate(
        classify_fun, test_dataset, test_labels, k_value)
    if output:
        print "error info:", error_list
        pp([(os.listdir(test_pathname)[index], "actual:"+actual, "expected:"+expected) 
            for index, actual, expected, content in result])
    return error_list, result