def Validate(self, subset): # Ensure that given the subset is valid. for feat in subset: if (0 > feat) or (feat > self.classifier.num_features): print("ERROR: Subset does not match features.") return 0 # Make copy of the dataset for feature selection. # temp_normalized = self.classifier.normalized.copy() to_delete = [] # Trim down to given subset. if subset != [0]: for col in range(self.classifier.num_features + 1)[1:]: if col not in subset: to_delete.append(col) # K-fold vars. fold_list = [] folded = [] num_correct = 0 print("\n--------------------------------------") print("\nPerforming k-fold cross validation.\n") # Perform k-fold cross validation. for leave_out in range(self.classifier.num_points): # Prepare temporary datasets for k-folding. fold_list = self.classifier.normalized.copy() folded = self.classifier.normalized[leave_out].copy() del fold_list[leave_out] # Remove the columns not specified by the subset. fold_list = np.delete(fold_list, to_delete, axis=1) folded = np.delete(folded, to_delete) # Create classifier for folded data. temp_c = Classifier() temp_c.Train(fold_list) result = temp_c.Test(folded) # Increment if classification accurate. if result == folded[0]: num_correct += 1 return num_correct / self.classifier.num_points
def intro(): print("\n--------------------------------------") print("CS170 Project 02 / Part 02\n") print("Otniel Thehumury 862029595") print("Matthew Walsh 862088280") print("--------------------------------------") # Collect the number of features from the user. # num_features = input("Please enter total number of features: ") # print("\nType the number of the algorithm you want to run.") # print("\n1) Forward Selection") # print("\n2) Backward Elimination") # print("\n3) NN Classifier") # choice = input("\nHere: ") # choice = int(choice) # Project 2, Part 2: choice = 3 print("") p = Problem() if choice == 1: print("Chosen Forward Elimination") # p.forward_selection(num_features) elif choice == 2: print("Chosen Backward Elimination") # p.backward_elimination(num_features) elif choice == 3: print("Nearest Neighbor Classifier:\n") training_data = read_dataset() print("Please select a feature subset.\n") print("For example, type: 1 3 5 7 9") print("Enter zero or blank for all features.\n") sub_input = input("Enter your choice here: ") # Separate input into int list. subset = [] for feat in sub_input.split(): subset.append(int(feat)) # Zero indicates all features. if sub_input == "": subset = [0] # Train the classifier. c = Classifier() c.Train(training_data) # c.Test([2.0, 2.1530859, 4.4095784, 3.6216757, 3.8451064, 2.9807186, 2.0171732, 0.5397355, 3.3933456, 2.2950856, 3.0431002]) # Implement validator. v = Validator(c) print(v.Validate(subset)) else: print("Incorrect choice - closing program.")
Q.queryRanker() if arguments.run == 'cluster' or arguments.run == 'clusterclassify' or arguments.run == 'all': print "-- Clustering --" bestRankersFile = 'QueryData/' + dataset + str(iterations) + '.data' KM = KMeans(arguments.fromrangek, arguments.torangek, bestRankersFile, dataset, iterations) (queryToCluster, clusterToRanker) = KM.runScript() #print 'queryToCluster', queryToCluster if arguments.run == 'classify' or arguments.run == 'clusterclassify' or arguments.run == 'all': print "-- Classification --" clusterPath = "ClusterData/" + dataset + str(iterations) + ".data" rankerPath = "QueryData/generalRanker.data" C = Classifier(clusterPath, path_train, rankerPath, iterations) C.Train() if arguments.run == 'compare': print "-- Comparison --" classifierPath = "Classifier/" + dataset + str(iterations) + ".data" basic_ranker_path = "QueryData/generalRanker.data" clusterPath = "ClusterData/" + dataset + str(iterations) + ".data" compare.compareSystems(path_validate, classifierPath, basic_ranker_path, clusterPath, click) if arguments.run == "fake": print "--Fake clustering and learning--" rankerPath = "QueryData/generalRanker.data" bestRankersFile = 'QueryData/' + dataset + str(iterations) + '.data' clusterPath = "ClusterData/" + dataset + str(iterations) + ".data" rankerPath = "QueryData/generalRanker.data"