コード例 #1
0
    # # Test Gaussian PDF
    # print(calculate_probability(1.0, 1.0, 1.0))
    # print(calculate_probability(2.0, 1.0, 1.0))
    # print(calculate_probability(0.0, 1.0, 1.0))

    # # Test calculating class probabilities
    # summaries = summarize_by_class(dataset)
    # row = dataset[0]
    # probabilities = calculate_class_probabilities(summaries, row)
    # print(probabilities)
    # print(predict(summaries, row))

    # Test Naive Bayes on Iris Dataset
    seed(1)

    dataset_base_path = os.path.join(os.path.dirname(os.getcwd()), 'datasets')
    filename = 'iris.csv'
    dataset = load_dataset(os.path.join(dataset_base_path, filename))
    # Convert string to float
    for i in range(len(dataset[0]) - 1):
        str_column_to_float(dataset, i)

    # convert class column to integers
    str_column_to_int(dataset, len(dataset[0]) - 1)

    # evaluate algorithm
    n_folds = 5
    scores = evaluate_algorithm(dataset, naive_bayes, 5, accuracy_metric)
    print('Scores: {}'.format(scores))
    print('Mean Accuracy: {:.3f}'.format(sum(scores) / float(len(scores))))
コード例 #2
0
    # Make a copy of test set to remove predictions while passing to the algorithm
    test_set = deepcopy(test)
    for row in test_set:
        row[-1] = None
    predicted = algorithm(train, test_set, *args)
    actual = [row[-1] for row in test]
    accuracy = metric(actual, predicted)
    return accuracy


# zero rule algorithm for classification
def zero_rule_algorithm_classification(train, test):
    output_values = [row[-1] for row in train]
    prediction = max(set(output_values), key=output_values.count)
    predicted = [prediction for _ in range(len(test))]
    return predicted


if __name__ == '__main__':
    # Test the train/test harness
    seed(1)
    dataset_base_path = os.path.join(os.path.dirname(os.getcwd()), 'datasets')
    filename = 'pima-indians-diabetes.csv'
    dataset = load_dataset(os.path.join(dataset_base_path, filename))
    print('Loaded data file {0} with {1} rows and {2} columns'.format(
        filename, len(dataset), len(dataset[0])))
    for column in range(len(dataset[0])):
        str_column_to_float(dataset, column)
    accuracy = evaluate_algorithm(dataset, zero_rule_algorithm_classification,
                                  0.6, accuracy_metric)
    print('Accuracy {:.3f}'.format(accuracy))