Esempio n. 1
0
def perform_top_n_filtering(similarity_df, n):
    """Finds the n most similar user/item, and returns it in the form of a list of tuples
    :param similarity_df: Pandas dataframe representing pairwise similarities between m users/items
    :param n: The number of neighbors
    :return: A dictionary that maps a user/item to at most n nearest users/items
    """
    neighbor_dict = dict()
    for row_idx, row in similarity_df.iterrows():
        nearest = SortedList(n)
        for col_idx, cell in row.iteritems():
            if not (math.isnan(cell) or row_idx == col_idx):
                nearest.insert(cell, col_idx)
        neighbor_dict[row_idx] = nearest.get_all()
    # print neighbor_list
    return neighbor_dict
Esempio n. 2
0
def perform_threshold_filtering(similarity_df, threshold):
    """Finds the user/item whose similarity >= threshold, and returns it in the form of a list of tuples
    :param similarity_df: Pandas dataframe representing pairwise similarities between m users/items
    :param threshold: The similarity threshold
    :return: A list of tuples that maps a user/item to at most n nearest users/items
    """
    neighbor_dict = dict()
    cols = len(similarity_df.columns)
    for row_idx, row in similarity_df.iterrows():
        nearest = SortedList(cols)
        for col_idx, cell in row.iteritems():
            if not (math.isnan(cell) or row_idx == col_idx
                    or cell < threshold):
                nearest.insert(cell, col_idx)
        neighbor_dict[row_idx] = nearest.get_all()
    # print neighbor_list
    return neighbor_dict