def test_knn(k, train_data, train_labels, test_data): """ test_knn function Trains a KNN classifier with the given testing set then tests it on the testing data. Outputs as a CSV file. Args ---- k : integer number of neighbors to use for KNN train_data : np.array training dataset train_labels : np.array training dataset labels test_data : np.array testing dataset Returns ------- Tuple (np.array, np.array) """ print("Final k:" + str(k)) knn = KNN(k, train_data, train_labels) # print to CSV with open('predictions_digit_recognizer.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow(['ImageId', 'Label']) for i in range(len(test_data)): data = test_data[i] guess = knn.classify(data) writer.writerow([str(i + 1), str(int(guess))])
from sklearn import datasets from knn import KNN data = datasets.load_iris() trainingData = data['data'] trainingLabels = data['target'] classifier = KNN(k=5) classifier.classify(trainingData, trainingLabels)
# confusion_matrix[A][B] = quantas vezes um documento da classe A foi atribuído à classe B topics = ['baseball', 'christian', 'guns'] confusion_matrix = {topic:{t:0 for t in topics} for topic in topics} print_log = False i = 0 ytrue = [] ypred = [] for topic in topics: for doc in reader.test[topic]: ytrue.append(topic) # classifica os documentos de teste words = parser.process_sent(doc) query = tf_idf_calculator.generate_tf_vector(words) result = knn.classify(query) confusion_matrix[topic][result] += 1 ypred.append(result) i += 1 if print_log: print('') print(i) print(doc) print(words) print(query) print(result) # e imprime os resultados print('#'*40) s = '#'*10 + (' K=%d || dist=%s ' % (k, metric)) + '#'*10 print(s)
class ModelEngineering: def __init__(self, pkg_dir): self.pkg_dir = pkg_dir self.frozen_graph_path = os.path.join(pkg_dir, 'InceptionResNetV1-VGGFace2', '20180402-114759.pb') self.graph = tf.Graph() self.session = tf.Session(graph=self.graph) self.imgs_ph = None self.phase_train_ph = None self.embs_ph = None self.emb_size_ph = None self.initialized = False # we create an instance of Neighbours Classifier and fit the data. self.n_neighbors = 2 # weight function used in prediction. Possible values: 'uniform', 'distance', [callable] self.weights = 'distance' # self.clf = neighbors.KNeighborsClassifier(self.n_neighbors, algorithm='ball_tree', weights=self.weights) self.knn = KNN() #self.gender_model= os.path.join(self.pkg_dir,'pre_trained_gn','gender_detection1.model') #self.gender = Gender(model=self.gender_model) def initialize(self): """ Call load_model method and get input/output tensors :return: True, if everything goes well """ self.imgs_ph, self.phase_train_ph, self.embs_ph, self.emb_size_ph = self.load_model( self.frozen_graph_path) return True def load_model(self, model, input_map=None): """ Load a (frozen) Tensorflow model into memory. :param model: Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file :param input_map: The input map :return: The place holders for input dataset, phase train, embeddings, and the embedding size """ with self.graph.as_default(): # Check if the model is a model directory (containing a metagraph and a checkpoint file) # or if it is a protobuf file with a frozen graph model_exp = os.path.expanduser(model) if os.path.isfile(model_exp): print('Model filename: %s' % model_exp) with gfile.FastGFile(model_exp, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, input_map=input_map, name='') else: print('Model directory: %s' % model_exp) meta_file, ckpt_file = self.get_model_filenames(model_exp) print('Metagraph file: %s' % meta_file) print('Checkpoint file: %s' % ckpt_file) saver = tf.train.import_meta_graph(os.path.join( model_exp, meta_file), input_map=input_map) saver.restore(self.session, os.path.join(model_exp, ckpt_file)) # Get input and output tensors imgs_ph = self.graph.get_tensor_by_name("input:0") embs_ph = self.graph.get_tensor_by_name("embeddings:0") phase_train_ph = self.graph.get_tensor_by_name("phase_train:0") emb_size = embs_ph.get_shape()[1] return imgs_ph, phase_train_ph, embs_ph, emb_size @staticmethod def get_model_filenames(model_dir): """ Get the model file names. :param model_dir: The directory in which the saved checkpoints of the model exists. :return: The meta file name and the checkpoint file name """ files = os.listdir(model_dir) meta_files = [s for s in files if s.endswith('.meta')] if len(meta_files) == 0: raise ValueError('No meta file found in the model directory (%s)' % model_dir) elif len(meta_files) > 1: raise ValueError( 'There should not be more than one meta file in the model directory ({})' .format(model_dir)) meta_file = meta_files[0] ckpt = tf.train.get_checkpoint_state(model_dir) ckpt_file = '' if ckpt and ckpt.model_checkpoint_path: ckpt_file = os.path.basename(ckpt.model_checkpoint_path) return meta_file, ckpt_file max_step = -1 for f in files: step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f) if step_str is not None and len(step_str.groups()) >= 2: step = int(step_str.groups()[1]) if step > max_step: max_step = step ckpt_file = step_str.groups()[0] return meta_file, ckpt_file def encode(self, images): """ Run the forward pass to calculate embeddings. :param images: The input (4D) tensor :return: The 512-vector embeddings """ if not self.initialized: self.initialized = self.initialize() feed_dict = {self.imgs_ph: images, self.phase_train_ph: False} emb_array = self.session.run(self.embs_ph, feed_dict=feed_dict) return emb_array def knn_fit(self, warehouse): """ Fit the KNN classifier using the training data set :param warehouse: :return: None """ emb_array = np.array([]) uid_array = np.array([]) for face in warehouse.get_faces(): if emb_array.ndim == 1: emb_array = face.embedding else: emb_array = np.vstack((emb_array, face.embedding)) uid_array = np.append(uid_array, face.uid) self.knn.fit(emb_array, uid_array) def knn_classify(self, query): """ Supervised KNN :param query: the subject embedding :return: the UID of the subject """ uid = self.knn.classify([query]) # print('proba[index]', proba[index]) # print('detect_uid', uid) return uid def knn_eval(self, warehouse): """ Evaluate the KNN classifier on a test data set :return: the accuracy """ emb_array = np.array([]) uid_array = np.array([]) for face in warehouse.get_faces(): if emb_array.ndim == 1: emb_array = face.embedding else: emb_array = np.vstack((emb_array, face.embedding)) uid_array = np.append(uid_array, face.uid) accuracy = self.knn.evaluate(emb_array, uid_array) return accuracy
def k_fold_cross_validation(training_data, training_labels): """ k_fold_cross_validation function Performs 3-fold cross validation on the training data to determine the best k-value for k-NN. Values tested are [1,5] Args ---- training_data : np.array training data training_labels : np.array Associated training labels Returns ------- integer """ data = np.array_split(training_data, 3) labels = np.array_split(np.array(training_labels), 3) best_accuracy = -1.0 best_k = -1 best_confusion_matrix = None for k in range(1, 6): right = 0 wrong = 0 confusion_matrix = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0] for _ in range(10)] for n in range(3): # 3-fold cross validation # split up data test_data = data[n] test_label = labels[n] if n == 0: train_data = np.concatenate((data[1], data[2])) train_labels = np.concatenate((labels[1], labels[2])) elif n == 1: train_data = np.concatenate((data[0], data[2])) train_labels = np.concatenate((labels[0], labels[2])) elif n == 2: train_data = np.concatenate((data[0], data[1])) train_labels = np.concatenate((labels[0], labels[1])) # train classifier knn = KNN(k, train_data, train_labels) # test classifier for d_index in range(len(test_data)): true_label = test_label[d_index] guess = knn.classify(test_data[d_index]) confusion_matrix[int(true_label)][int(guess)] += 1 if guess == true_label: right += 1.0 else: wrong += 1.0 # determine accuracy accuracy = right / (right + wrong) if accuracy > best_accuracy: best_accuracy = accuracy best_k = k best_confusion_matrix = confusion_matrix print("Accuracy for k=" + str(k) + ": " + str(accuracy)) return best_k, best_confusion_matrix
""" Classification related to part 1. KNN classification with K=1 and euclidean distance. Votes are not distance weighted. @Author: Massimiliano Natale """ from knn import KNN from resultHelper import ResultHelper """ Trigger the classification. Create the output file and the chart to visualize the result. """ if __name__ == "__main__": knn = KNN("data/classification/trainingData.csv", "data/classification/testData.csv") classificationData = knn.buildClassificationData( lambda x: knn.classify(x[:-1], knn._trainingData[:, :-1], 1)) # Save partial result to a file and draw the charts resultHelper = ResultHelper("part1.output.txt") resultHelper.write(classificationData) resultHelper.draw("KNN classification [not-weighted-distance] with K=1")
def main(): ############################################# # Set up the data as per the first Practicum ############################################# spam_values = np.genfromtxt('../input_data/spambase.data', delimiter=',') fl = open('../input_data/spambase.names', 'r') lines = [line.strip() for line in fl] # J : strip from beginning and ending whitespace fl.close() colnames = [line.partition(':')[0] for line in lines if not (len(line) == 0 or line[0] == '|' or line[0] == '1')] colnames.append('spam') spam_df = pd.DataFrame(spam_values,columns=colnames) spam_df['spam']=2*spam_df['spam']-1 # J: Apparently DataFrame.shape is a list or something and the first cell contains the number of samples in the DataFrame nsamples = spam_df.shape[0] ntest = np.floor(.2 * nsamples) ntune = np.floor(.1 * nsamples) # we want to make this reproducible so we seed the random number generator np.random.seed(1) all_indices = np.arange(nsamples) # J: important to shuffle so that you don't know which portion is training, which is testing and which is tuning data np.random.shuffle(all_indices) test_indices = all_indices[:ntest] # J: Get shuffled test indices first tune_indices = all_indices[ntest:(ntest+ntune)] # J: tune indices second train_indices = all_indices[(ntest+ntune):] # J: train indices (the majority) last # J : now that the "*indices" arrays have been shuffled, you can actually draw the relevant data through # DataFrame.ix. The second argument includes all columns, labels included. spam_train = spam_df.ix[train_indices,:] spam_tune = spam_df.ix[tune_indices,:] spam_test = spam_df.ix[test_indices,:] pd.save(spam_train, '../proc_data/training_data/spam_train.pdat') pd.save(spam_tune, '../proc_data/training_data/spam_tune.pdat') pd.save(spam_test, '../proc_data/testing_data/spam_test.pdat') ####################################################################### # See how features are sorted according to their Information Gain score ####################################################################### # atestTree = DecisionTree(spam_train, 5, True) # print atestTree.__sortFeatures__(spam_train, spam_train.columns) ############################################### # Training classifiers and saving them on disk ############################################### # Already trained those two, it took about 4 hours total. # majVoteTree = DecTree.DecisionTree(spam_train, 5, False) # print "Tuning a majority vote classifier on all depths between 1 and 15 inclusive." # majVoteTree.tune(spam_tune,1, 15) # print "Saving this classifier to disk." # majVoteTree.dump("../proc_data/dtreeWithMajVote_1_to_15.pyobj") # # IGTree = DecTree.DecisionTree(spam_train, 5, True) # print "Tuning an information gain classifier on all depths between 1 and 15 inclusive." # IGTree.tune(spam_tune,1, 15) # print "Saving this classifier to disk." # IGTree.dump("../proc_data/dtreeWithIG_1_to_15.pyobj") HectorsKNN = KNN(spam_train, spam_train['spam'], 5) print "Tuning Hector's KNN classifier for all values of K between 1 and 41 inclusive:" HectorsKNN.tune(spam_tune, spam_tune['spam'], k=range(1,42,2)) print "Saving this classifier to disk." HectorsKNN.dump("../proc_data/HectorsKNN_1_to_41.pyobj") ########################################### # Playing with stored classifiers ########################################### # Part 1: A decision tree classifier trained with Majority Vote, depths 1 to 10 # print "Loading a decision tree trained with Majority Vote for depths 1 to 10..." # majVoteTree = load("../proc_data/dtreeWithMajVote_1_to_15.pyobj") # print "According to the tuning set, the optimal depth for this tree is: " + str(majVoteTree.depth) # classifications = majVoteTree.classify(spam_test) # testErrorRate = np.mean ( (spam_test['spam'].values * classifications) < 0) # print 'For this depth, the error on the test set was %0.3f' % testErrorRate # print "We will now test all different hyper-parameters found during tuning on the test data:" # majVoteTree.classifyWithAllDepths(spam_test) # print "\n===========================================================\n" # # # Part 2: A decision tree classifier trained with Information Gain, depths 1 to 10 # # print "Loading a decision tree trained with Information Gain for depths 1 to 10..." # IGTree = load("../proc_data/dtreeWithIG_1_to_15.pyobj") # print "According to the tuning set, the optimal depth for this tree is: " + str(IGTree.depth) # classifications = IGTree.classify(spam_test) # testErrorRate = np.mean ( (spam_test['spam'].values * classifications) < 0) # print 'For this depth, the error on the test set was %0.3f' % testErrorRate # print "We will now test all different hyper-parameters found during tuning on the test data:" # IGTree.classifyWithAllDepths(spam_test) # Part 3: Hector's KNN-classifier print "Reloading Hector's classifier from disk:" HectorsKNN = load("../proc_data/HectorsKNN_1_to_41.pyobj") print "According to the tuning set, the optimal K for this classifier is: " + str(HectorsKNN.k) + "." classifications = HectorsKNN.classify(spam_test) testErrorRate = np.mean ( (spam_test['spam'].values * classifications) < 0) print 'For this value of K, the error on the test set was %0.3f' % testErrorRate print "We will now test all different hyper-parameters found during tuning on the test data:" HectorsKNN.classifyWithAllK(spam_test) # Part 4: Weighted Features KNN print "Exiting..."
def main(): # TODO: test with user input, confirm input with TAs logging.basicConfig(stream=sys.stdout, level=logging.DEBUG if DEBUG else logging.INFO) trainingData = "data/train.txt" testingData = "data/test.txt" if len(sys.argv) > 1: trainingData = sys.argv[1] testingData = sys.argv[2] print( "Taking %s as training data, and %s as testing data" % trainingData, testingData) col_names = [ "index", "ri", "na", "mg", "al", "si", "k", "ca", "ba", "fe", "type" ] train_df = pd.read_csv(trainingData, names=col_names) test_df = pd.read_csv(testingData, names=col_names) # Data stats print("Data characteristics:") print("No. of attributes: ", len(train_df.iloc[0])) print("No. of features usable for classifcation: ", len(train_df.iloc[0]) - 2) print("Size of training data", len(train_df)) print("Size of testing data", len(test_df)) print("No. of unique classes: ", 7) print("Unique classes represented in training data: ", train_df['type'].unique()) print("\t(Histogram of classes in figure 1)") # plot class histogram train_df.hist('type', alpha=.5, bins=7) plt.title("Figure 1: Class (glass type) Histogram") # /Data stats print( "\n******************************** Running KNN classifer ********************************" ) # Run KNN for k = 1, 3, 5, 7 and L1 & L2 norms on training (leave one out) and test sets for k in (1, 3, 5, 7): for order in (1, 2): # order of the norm print("Running KNN of order %d with L-%d norm" % (k, order)) knn = KNN(train_df.iloc[:, -1], train_df.iloc[:, 1:-1], k, distance=lambda a, b: np.linalg.norm(a - b, ord=order), normalize_data=True) for title, filename, data, leave_one_out in (("TEST", "knn_%d_l%d_test" % (k, order), test_df, False), ("TRAIN", "knn_%d_l%d_train" % (k, order), train_df, True)): with open_output_file(filename) as f: f.write("#index,predicted_class,actual_class\n") total = 0 correct = 0 for row in data.values: predicted = knn.classify(row[1:-1], leave_one_out) actual = row[-1] f.write("%d,%d,%d\n" % (row[0], predicted, actual)) total += 1 if actual == predicted: correct += 1 accstr = "Accuracy on %s data: %f" % ( title, float(correct) / total) f.write(accstr + "\n") print(accstr) print( "\n******************************** Running gaussian naive baye's classifer ********************************" ) gb = GaussianBayesClassifier(sigma_depends_on_class=True, verbose=DEBUG) print("Training classifier...") gb.train(train_df.iloc[:, 1:]) print("Training complete") # print("Params:") for title, filename, data in (("test", "bayes_test.txt", test_df), ("train", "bayes_train", train_df)): with open_output_file(filename) as f: print("Running on %sing data" % title) f.write("#index,predicted_class,actual_class\n") categories = gb.classify(data.iloc[:, 1:-1]) total = 0 correct = 0 for idx, predicted_category in enumerate(categories): actual = data.iloc[idx, -1] if predicted_category == actual: correct += 1 total += 1 f.write("%d,%d,%d\n" % (data.iloc[idx, 0], predicted_category, actual)) accstr = "Accuracy on %s data: %f" % (title, float(correct) / total) f.write(accstr + "\n") print(accstr) print( "\n******************************** Running gaussian naive baye's classifer (with sigma independent of class) ********************************" ) gb = GaussianBayesClassifier(sigma_depends_on_class=False, verbose=DEBUG) print("Training classifier...") gb.train(train_df.iloc[:, 1:]) print("Training complete") # print("Params:") for title, filename, data in (("test", "bayes_test_sigmaindependent.txt", test_df), ("train", "bayes_train_sigmaindependent", train_df)): with open_output_file(filename) as f: print("Running on %sing data" % title) f.write("#index,predicted_class,actual_class\n") categories = gb.classify(data.iloc[:, 1:-1]) total = 0 correct = 0 for idx, predicted_category in enumerate(categories): actual = data.iloc[idx, -1] if predicted_category == actual: correct += 1 total += 1 f.write("%d,%d,%d\n" % (data.iloc[idx, 0], predicted_category, actual)) accstr = "Accuracy on %s data: %f" % (title, float(correct) / total) f.write(accstr + "\n") print(accstr) print( "\n******************************** Showing class histogram ********************************" ) plt.show()
def main(): ############################################# # Set up the data as per the first Practicum ############################################# spam_values = np.genfromtxt('../input_data/spambase.data', delimiter=',') fl = open('../input_data/spambase.names', 'r') lines = [line.strip() for line in fl] # J : strip from beginning and ending whitespace fl.close() colnames = [ line.partition(':')[0] for line in lines if not (len(line) == 0 or line[0] == '|' or line[0] == '1') ] colnames.append('spam') spam_df = pd.DataFrame(spam_values, columns=colnames) spam_df['spam'] = 2 * spam_df['spam'] - 1 # J: Apparently DataFrame.shape is a list or something and the first cell contains the number of samples in the DataFrame nsamples = spam_df.shape[0] ntest = np.floor(.2 * nsamples) ntune = np.floor(.1 * nsamples) # we want to make this reproducible so we seed the random number generator np.random.seed(1) all_indices = np.arange(nsamples) # J: important to shuffle so that you don't know which portion is training, which is testing and which is tuning data np.random.shuffle(all_indices) test_indices = all_indices[:ntest] # J: Get shuffled test indices first tune_indices = all_indices[ntest:(ntest + ntune)] # J: tune indices second train_indices = all_indices[( ntest + ntune):] # J: train indices (the majority) last # J : now that the "*indices" arrays have been shuffled, you can actually draw the relevant data through # DataFrame.ix. The second argument includes all columns, labels included. spam_train = spam_df.ix[train_indices, :] spam_tune = spam_df.ix[tune_indices, :] spam_test = spam_df.ix[test_indices, :] pd.save(spam_train, '../proc_data/training_data/spam_train.pdat') pd.save(spam_tune, '../proc_data/training_data/spam_tune.pdat') pd.save(spam_test, '../proc_data/testing_data/spam_test.pdat') ####################################################################### # See how features are sorted according to their Information Gain score ####################################################################### # atestTree = DecisionTree(spam_train, 5, True) # print atestTree.__sortFeatures__(spam_train, spam_train.columns) ############################################### # Training classifiers and saving them on disk ############################################### # Already trained those two, it took about 4 hours total. # majVoteTree = DecTree.DecisionTree(spam_train, 5, False) # print "Tuning a majority vote classifier on all depths between 1 and 15 inclusive." # majVoteTree.tune(spam_tune,1, 15) # print "Saving this classifier to disk." # majVoteTree.dump("../proc_data/dtreeWithMajVote_1_to_15.pyobj") # # IGTree = DecTree.DecisionTree(spam_train, 5, True) # print "Tuning an information gain classifier on all depths between 1 and 15 inclusive." # IGTree.tune(spam_tune,1, 15) # print "Saving this classifier to disk." # IGTree.dump("../proc_data/dtreeWithIG_1_to_15.pyobj") HectorsKNN = KNN(spam_train, spam_train['spam'], 5) print "Tuning Hector's KNN classifier for all values of K between 1 and 41 inclusive:" HectorsKNN.tune(spam_tune, spam_tune['spam'], k=range(1, 42, 2)) print "Saving this classifier to disk." HectorsKNN.dump("../proc_data/HectorsKNN_1_to_41.pyobj") ########################################### # Playing with stored classifiers ########################################### # Part 1: A decision tree classifier trained with Majority Vote, depths 1 to 10 # print "Loading a decision tree trained with Majority Vote for depths 1 to 10..." # majVoteTree = load("../proc_data/dtreeWithMajVote_1_to_15.pyobj") # print "According to the tuning set, the optimal depth for this tree is: " + str(majVoteTree.depth) # classifications = majVoteTree.classify(spam_test) # testErrorRate = np.mean ( (spam_test['spam'].values * classifications) < 0) # print 'For this depth, the error on the test set was %0.3f' % testErrorRate # print "We will now test all different hyper-parameters found during tuning on the test data:" # majVoteTree.classifyWithAllDepths(spam_test) # print "\n===========================================================\n" # # # Part 2: A decision tree classifier trained with Information Gain, depths 1 to 10 # # print "Loading a decision tree trained with Information Gain for depths 1 to 10..." # IGTree = load("../proc_data/dtreeWithIG_1_to_15.pyobj") # print "According to the tuning set, the optimal depth for this tree is: " + str(IGTree.depth) # classifications = IGTree.classify(spam_test) # testErrorRate = np.mean ( (spam_test['spam'].values * classifications) < 0) # print 'For this depth, the error on the test set was %0.3f' % testErrorRate # print "We will now test all different hyper-parameters found during tuning on the test data:" # IGTree.classifyWithAllDepths(spam_test) # Part 3: Hector's KNN-classifier print "Reloading Hector's classifier from disk:" HectorsKNN = load("../proc_data/HectorsKNN_1_to_41.pyobj") print "According to the tuning set, the optimal K for this classifier is: " + str( HectorsKNN.k) + "." classifications = HectorsKNN.classify(spam_test) testErrorRate = np.mean((spam_test['spam'].values * classifications) < 0) print 'For this value of K, the error on the test set was %0.3f' % testErrorRate print "We will now test all different hyper-parameters found during tuning on the test data:" HectorsKNN.classifyWithAllK(spam_test) # Part 4: Weighted Features KNN print "Exiting..."
class KNNTestCase(unittest.TestCase): """ Test cases for the KNN implementation """ def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) self.knn = KNN(k=5) self.train_data = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [7.0, 3.2, 4.7, 1.4], [6.4, 3.2, 4.5, 1.5], [6.9, 3.1, 4.9, 1.5], [5.5, 2.3, 4.0, 1.3]]) self.train_label = np.array([0, 0, 0, 0, 1, 1, 1, 1]) self.test_data = np.array([[5.0, 3.6, 1.4, 0.2], [5.4, 3.9, 1.7, 0.4], [6.5, 2.8, 4.6, 1.5], [5.7, 2.8, 4.5, 1.3], [6.3, 3.3, 6.0, 2.5], [5.8, 2.7, 5.1, 1.9]]) self.test_label = np.array([0, 0, 1, 1, 2, 2]) def test_fit(self): """ The return value of the function should be equal to the number of the classes in the data set :return: None """ num_classes = self.knn.fit(self.test_data, self.test_label) condition = num_classes == 3 self.assertEqual(condition, True) def test_compute_distance(self): """ The distance between the two input vectors should be a floating point value inside the zero to one interval :return: None """ sample0 = self.train_data[0] sample1 = self.train_data[1] distance = self.knn.compute_distance(sample0, sample1) condition = 0.0 <= distance <= 1.0 self.assertEqual(condition, True) def test_get_neighbours(self): """ The returned neighbours should be a list of tuples, each of which contains the label and the distance :return: None """ self.knn.fit(self.train_data, self.train_label) query = self.test_data[0] neighbours = self.knn.get_neighbors(query) condition0 = len(neighbours) == self.knn.k condition1 = len(neighbours[0]) == 2 condition = condition0 and condition1 self.assertEqual(condition, True) def test_classify(self): """ The classified label of the normal sample should correspond to its ground truth label and for the anomaly sample that does not belong to any of the training classes it should be equal to -1 :return: None """ self.knn.fit(self.train_data, self.train_label) normal_data = self.test_data[0] normal_label = self.test_label[0] anomaly_data = self.test_data[-1] normal_pred = self.knn.classify(normal_data) anomaly_pred = self.knn.classify(anomaly_data) condition0 = normal_pred == normal_label condition1 = anomaly_pred == -1 condition = condition0 and condition1 self.assertEqual(condition, True) def test_evaluate(self): """ The returned accuracy should be equal to 1.0 in the case where the training and test set are the same and in the case where the training set and test set are different it should be in the interval zero to one :return: None """ self.knn.fit(self.train_data, self.train_label) accuracy_perfect = self.knn.evaluate(self.train_data, self.train_label) condition0 = accuracy_perfect == 1.0 accuracy_imperfect = self.knn.evaluate(self.test_data, self.test_label) condition1 = 0.0 < accuracy_imperfect < 1.0 condition = condition0 and condition1 self.assertEqual(condition, True)