def perform_top_n_filtering(similarity_df, n): """Finds the n most similar user/item, and returns it in the form of a list of tuples :param similarity_df: Pandas dataframe representing pairwise similarities between m users/items :param n: The number of neighbors :return: A dictionary that maps a user/item to at most n nearest users/items """ neighbor_dict = dict() for row_idx, row in similarity_df.iterrows(): nearest = SortedList(n) for col_idx, cell in row.iteritems(): if not (math.isnan(cell) or row_idx == col_idx): nearest.insert(cell, col_idx) neighbor_dict[row_idx] = nearest.get_all() # print neighbor_list return neighbor_dict
def perform_threshold_filtering(similarity_df, threshold): """Finds the user/item whose similarity >= threshold, and returns it in the form of a list of tuples :param similarity_df: Pandas dataframe representing pairwise similarities between m users/items :param threshold: The similarity threshold :return: A list of tuples that maps a user/item to at most n nearest users/items """ neighbor_dict = dict() cols = len(similarity_df.columns) for row_idx, row in similarity_df.iterrows(): nearest = SortedList(cols) for col_idx, cell in row.iteritems(): if not (math.isnan(cell) or row_idx == col_idx or cell < threshold): nearest.insert(cell, col_idx) neighbor_dict[row_idx] = nearest.get_all() # print neighbor_list return neighbor_dict