def test(image_list, model_images): """Performs testing by performing K-nearest neighbors classification Stores the predicted orientation in the object itself :param image_list: List of test images :param model_images: List of training images """ def calculate_distance(image1, image2): """Calculates the Euclidean distance between 2 images :param image1: First image object :param image2: Second image object :return: Euclidean distance between the 2 images """ dist = 0 for f_image1, f_image2 in zip(image1.features, image2.features): dist += (f_image1 - f_image2)**2 return 0.0 + dist # test() starts from here for test_image in image_list: # Traversing all the model images and calculating the distance least_dist = SortedList(K) for model_image in model_images: curr_dist = calculate_distance(test_image, model_image) least_dist.insert(curr_dist, model_image) # Get a voting from all the K nearest neighbors model_dict = defaultdict(lambda: 0) for index in range(K): curr_img = least_dist.get(index) model_dict[curr_img.orientation] += 1 # print 'Printing model_dict:', model_dict max_orientation = max(model_dict, key=model_dict.get) test_image.pred_orientation = max_orientation
def perform_top_n_filtering(similarity_df, n): """Finds the n most similar user/item, and returns it in the form of a list of tuples :param similarity_df: Pandas dataframe representing pairwise similarities between m users/items :param n: The number of neighbors :return: A dictionary that maps a user/item to at most n nearest users/items """ neighbor_dict = dict() for row_idx, row in similarity_df.iterrows(): nearest = SortedList(n) for col_idx, cell in row.iteritems(): if not (math.isnan(cell) or row_idx == col_idx): nearest.insert(cell, col_idx) neighbor_dict[row_idx] = nearest.get_all() # print neighbor_list return neighbor_dict
def perform_threshold_filtering(similarity_df, threshold): """Finds the user/item whose similarity >= threshold, and returns it in the form of a list of tuples :param similarity_df: Pandas dataframe representing pairwise similarities between m users/items :param threshold: The similarity threshold :return: A list of tuples that maps a user/item to at most n nearest users/items """ neighbor_dict = dict() cols = len(similarity_df.columns) for row_idx, row in similarity_df.iterrows(): nearest = SortedList(cols) for col_idx, cell in row.iteritems(): if not (math.isnan(cell) or row_idx == col_idx or cell < threshold): nearest.insert(cell, col_idx) neighbor_dict[row_idx] = nearest.get_all() # print neighbor_list return neighbor_dict