def main(k, data_set_name=None): split_percentage = 0.7 # Load dataset if data_set_name is not None: data_set = read_file_into_dataset( "C:\\Users\\Grant\\Documents\\School\\Winter 2016\\CS 450\\Prove01\\" + data_set_name) data_set = randomize_dataset(data_set) else: data_set_name = "iris" iris = datasets.load_iris() data_set = randomize_dataset(iris) data_set.data = normalize(data_set.data) data_sets = split_dataset(data_set, split_percentage) training_set = data_sets['train'] testing_set = data_sets['test'] # My Classifier knnClassifier = KnnClassifier() knnClassifier.k = k knnClassifier.train(training_set.data, training_set.target, training_set.target_names) predictions = knnClassifier.predict(testing_set.data) my_accuracy = get_accuracy(predictions, testing_set.target) # Better Classifier better_classifier = KNeighborsClassifier(n_neighbors=k) better_classifier.fit(training_set.data, training_set.target) predictions = better_classifier.predict(testing_set.data) better_accuary = get_accuracy(predictions, testing_set.target) print("My results: " + str(my_accuracy) + "%") print("Better results: " + str(better_accuary) + "%") results = "k = " + str(k) + "\nMy results: " + str( my_accuracy) + "%\n" + "Better results: " + str(better_accuary) + "%\n" write_to_results_file( os.getcwd() + os.sep + ".." + os.sep + str(k) + "-" + data_set_name + "_results.txt", results, k)
def main(k, data_set_name=None): split_percentage = 0.7 # Load dataset if data_set_name is not None: data_set = read_file_into_dataset("C:\\Users\\Grant\\Documents\\School\\Winter 2016\\CS 450\\Prove01\\" + data_set_name) data_set = randomize_dataset(data_set) else: data_set_name = "iris" iris = datasets.load_iris() data_set = randomize_dataset(iris) data_set.data = normalize(data_set.data) data_sets = split_dataset(data_set, split_percentage) training_set = data_sets['train'] testing_set = data_sets['test'] # My Classifier knnClassifier = KnnClassifier() knnClassifier.k = k knnClassifier.train(training_set.data, training_set.target, training_set.target_names) predictions = knnClassifier.predict(testing_set.data) my_accuracy = get_accuracy(predictions, testing_set.target) # Better Classifier better_classifier = KNeighborsClassifier(n_neighbors=k) better_classifier.fit(training_set.data, training_set.target) predictions = better_classifier.predict(testing_set.data) better_accuary = get_accuracy(predictions, testing_set.target) print("My results: " + str(my_accuracy) + "%") print("Better results: " + str(better_accuary) + "%") results = "k = " + str(k) + "\nMy results: " + str(my_accuracy) + "%\n" + "Better results: " + str(better_accuary) + "%\n" write_to_results_file(os.getcwd() + os.sep + ".." + os.sep + str(k) + "-" + data_set_name + "_results.txt", results, k)
df.columns[i]].quantile(0.25) != 1: index.append(i) features = df[df.columns[index]] labels = df[df.columns[len(df.columns) - 1]] # Replacing alphabets to numbers for ease of use labels = labels.replace('A', 1).replace('B', 2).replace('C', 3).replace( 'D', 4).replace('E', 5) X = features.values y = labels.values X_train, X_test, Y_train, Y_test = train_test_split(features.values, labels.values, test_size=0.2) # Running knn classifier on the dataset start = timeit.default_timer() knnClf = KnnClassifier(X_train, Y_train) predictions = knnClf.predict(X_test=X_test, k=41) stop = timeit.default_timer() print("Run Time: ", stop - start) # transform the list into an array predictions = np.asarray(predictions) # evaluating accuracy accuracy = accuracy_score(Y_test[0:100], predictions) print('\nThe accuracy of our classifier is %d%%' % accuracy)