def run_research():
    # a research to exam which feature is best to ignore for highest accuracy

    knn_fac = classifier.knn_factory(1)
    folds = [
        hw3_utils.load_data('ecg_fold_' + str(i + 1) + '.pickle')
        for i in range(2)
    ]
    features_num = len(folds[0][0][0])

    max_accuracy_feature = None
    for run_num in range(1, 8):

        if max_accuracy_feature is not None:
            folds = [(np.delete(data, max_accuracy_feature, 1), labels, test)
                     for data, labels, test in folds]
            features_num = len(folds[0][0][0])

        results = [
            evaluate_comp(knn_fac, folds, feature)
            for feature in range(features_num)
        ]
        max_accuracy_feature = max(results, key=lambda item: item[1])[0]

        with open('my_experiments' + str(run_num) + '.csv',
                  'w+') as result_file:
            for feature, accuracy, error in results:
                line = str(feature) + ',' + str(accuracy) + ',' + str(
                    error) + '\n'
                result_file.write(line)
def experiment_6():
    knn_values_list = [1, 3, 5, 7, 13]
    for k in knn_values_list:
        knn_k = knn_factory(k)
        res_accuracy, res_error = evaluate(knn_k, FOLDS)
        output = str(k) + "," + str(res_accuracy) + "," + str(res_error)
        print(output)
Exemple #3
0
def experiment6(k_values=[1, 3, 5, 7, 13], num_folds=2):
    filename = "experiments6.csv"
    output_file = open(filename, 'w')
    for k in k_values:
        print("Testing classifier with k={}".format(k))
        knn = classifier.knn_factory(k)
        accuracy, error = cross_validation.evaluate(knn, num_folds)
        output_file.write("{},{:.3f},{:.3f}\n".format(k, accuracy, error))
Exemple #4
0
def run_knn():
    ks = [1, 3, 5, 7, 13]

    with open('experiments6.csv', 'w+') as result_file:
        for k in ks:
            knn_fac = classifier.knn_factory(k)
            accuracy, error = utils.evaluate(knn_fac, 2)
            line = str(k) + ',' + str(accuracy) + ',' + str(error) + '\n'
            result_file.write(line)
            print("finished: ", k)

    print("FINISHED")
Exemple #5
0
def KNN_test():
    results = []

    for k in [1, 3, 5, 7, 13]:
        knn_k = knn_factory(k)
        avg_accuracy, avg_error = evaluate(knn_k, 2)
        results.append([k, avg_accuracy, avg_error])

    with open("experiments6.csv", "w") as csv_file:
        writer = csv.writer(csv_file, delimiter=',', lineterminator='\n')
        for row in results:
            writer.writerow(row)
def run_my_classify():
    # predicts the test data with specific features

    data, labels, tests = hw3_utils.load_data()

    # features list to ignore that came from the research before
    features_to_ignore = [90, 23, 90, 103, 36]

    for feature in features_to_ignore:
        data = np.delete(data, feature, 1)
        tests = np.delete(tests, feature, 1)

    clf = classifier.knn_factory(1).train(data, labels)
    results = [clf.classify(test) for test in tests]

    hw3_utils.write_prediction(results)
Exemple #7
0
from sklearn.feature_selection import SelectKBest, f_classif

# question 3.2

patients, labels, test = utils.load_data()
split_crosscheck_groups([patients, labels], 2)

# question 5.1

k_list = [1, 3, 5, 7, 13]
accuracy_list = []

file_name = 'experiments6.csv'
with open(file_name, 'wb') as file:
    for k in k_list:
        knn_f = knn_factory(k)
        accuracy, error = evaluate(knn_f, 2)
        line = str(k) + "," + str(accuracy) + "," + str(error) + "\n"
        accuracy_list.append(accuracy)
        file.write(line.encode())

# question 5.2

plt.plot(k_list, accuracy_list)
plt.xlabel('K value')
plt.ylabel('Average accuracy')
plt.title('Part B, question 5.2')
plt.show()

# questions 7.1, 7.2
Exemple #8
0
from sklearn.ensemble import RandomForestClassifier

examples, labels, test = load_data()
data = []
data.append(examples)
data.append(labels)
data_new = []
data_new.append(SelectKBest(f_classif, 100).fit_transform(examples, labels))
data_new.append(labels)
classifier.split_crosscheck_groups(data_new, 2)

print("using CUT data\n")

decision_tree = classifier.sklearn_factory_wrapper(RandomForestClassifier())
perceptron = classifier.sklearn_factory_wrapper(Perceptron())
knn = classifier.knn_factory(7)
print("knn and perceptron: \n")
ensemble = classifier.ensemble_factory([knn, perceptron])
accuracy, error = classifier.evaluate(ensemble, 2)
print("%.3f, %.3f\n" % (accuracy, error))

print("knn and decision tree: \n")
ensemble = classifier.ensemble_factory([knn, perceptron])
accuracy, error = classifier.evaluate(ensemble, 2)
print("%.3f, %.3f\n" % (accuracy, error))

print("all three: \n")
ensemble = classifier.ensemble_factory([knn, perceptron, decision_tree])
accuracy, error = classifier.evaluate(ensemble, 2)
print("%.3f, %.3f\n" % (accuracy, error))
Exemple #9
0
def main():
    # Variables used for debug
    skip_knn = True
    skip_tree = True
    skip_perc = True

    train_features, train_labels, test_features = load_data('data/Data.pickle')

    # Split once the dataset to two folds.
    folds = 2
    #split_crosscheck_groups(train_features, train_labels, folds)

    if skip_knn != True:
        # Evaluating KNN with different k value:
        k_list = [1, 3, 5, 7, 13]
        acc_list = []
        err_list = []
        with open('experiments6.csv', mode='w', newline='') as csv_file:
            exp_writer = csv.writer(csv_file)
            for k in k_list:
                knn_fac = knn_factory(k)
                err, acc = evaluate(knn_fac, folds)
                print("k=", k, " acc=", acc, " err=", err)
                exp_writer.writerow([k, acc, err])
                acc_list.append(acc)
                err_list.append(err)

        # Plot KNN Results
        plt.subplot(2, 1, 1)
        plt.plot(k_list, acc_list, '--', color='g')
        plt.plot(k_list, acc_list, 'bo')
        plt.ylabel("Accuracy")
        plt.xlabel("k")
        plt.xticks(k_list)
        plt.subplot(2, 1, 2)
        plt.plot(k_list, err_list, '--', color='r')
        plt.plot(k_list, err_list, 'bo')
        plt.ylabel("Error")
        plt.xlabel("k")
        plt.xticks(k_list)
        plt.tight_layout()
        plt.show()

    # Perform classification for Perceptron and Tree and write to files.
    with open('experiments12.csv', mode='w', newline='') as csv_file:
        exp_writer = csv.writer(csv_file)
        if skip_tree != True:
            # Decision Tree experiment
            myTree = tree.DecisionTreeClassifier(criterion="entropy")
            err, acc = evaluate(myTree, folds)
            print("tree acc=", acc, " tree err=", err)
            exp_writer.writerow([1, acc, err])

        if skip_perc != True:
            # Perceptron experiment
            myPerc = Perceptron(tol=1e-3, random_state=0)
            err, acc = evaluate(myPerc, folds)
            print("perceptron acc=", acc, " perceptron err=", err)
            exp_writer.writerow([2, acc, err])

    # Competition: Classify test_features
    print("Triple model")
    my_model = triple_model()
    my_model.fit(train_features, train_labels)
    res = my_model.final_predict(preprocessing.scale(test_features))
    write_prediction(res)