Beispiel #1
0
    def count_items_in_common(self):
        """
        Counts the number of items each user has in common with every other user
        and stores the results in a dictionary.

        :return: a dictionary with the count of the number of times users have
        a certain number of items in common. For example, the dictionary
        {0:4, 1:10, 2:6, 3:3, 4:1} means that there a 4 users who have 0 items
        in common with the rest of users, there are 10 users who have rated 1
        item in common with the rest of users, 6 users who have rated 6 items in
        common with the rest of the users, and so on.
        """
        common_item_counts = {}
        user_dictionary = extractor.initialize_users(self.reviews, False)

        for i in range(self.num_users):
            for j in range(i + 1, self.num_users):

                user1 = self.user_ids[i]
                user2 = self.user_ids[j]

                num_common_items = len(
                    extractor.get_common_items(user_dictionary, user1, user2))

                if num_common_items in common_item_counts:
                    common_item_counts[num_common_items] += 1
                else:
                    common_item_counts[num_common_items] = 1

        return common_item_counts
    def count_items_in_common(self):
        """
        Counts the number of items each user has in common with every other user
        and stores the results in a dictionary.

        :return: a dictionary with the count of the number of times users have
        a certain number of items in common. For example, the dictionary
        {0:4, 1:10, 2:6, 3:3, 4:1} means that there a 4 users who have 0 items
        in common with the rest of users, there are 10 users who have rated 1
        item in common with the rest of users, 6 users who have rated 6 items in
        common with the rest of the users, and so on.
        """
        common_item_counts = {}
        user_dictionary = extractor.initialize_users(self.reviews, False)

        for i in range(self.num_users):
            for j in range(i + 1, self.num_users):

                user1 = self.user_ids[i]
                user2 = self.user_ids[j]

                num_common_items = len(extractor.get_common_items(user_dictionary, user1, user2))

                if num_common_items in common_item_counts:
                    common_item_counts[num_common_items] += 1
                else:
                    common_item_counts[num_common_items] = 1

        return common_item_counts
Beispiel #3
0
    def calculate_users_similarity(self, user_dictionary, user1, user2):

        common_items = extractor.get_common_items(user_dictionary, user1,
                                                  user2)

        if not common_items:
            return None

        if self._min_common_items is not None and len(
                common_items) < self._min_common_items:
            return None

        user1_overall_ratings = user_dictionary[user1].item_ratings
        user1_multi_ratings = user_dictionary[user1].item_multi_ratings

        user2_overall_ratings = user_dictionary[user1].item_ratings
        user2_multi_ratings = user_dictionary[user2].item_multi_ratings

        similarity_sum = 0.

        for item in common_items:
            user1_item_ratings = list(user1_multi_ratings[item])
            user1_item_ratings.insert(0, user1_overall_ratings[item])
            user2_item_ratings = list(user2_multi_ratings[item])
            user2_item_ratings.insert(0, user2_overall_ratings[item])

            similarity_sum += similarity_calculator.calculate_similarity(
                user1_item_ratings, user2_item_ratings,
                self._similarity_metric)

        similarity = similarity_sum / len(common_items)

        return similarity
Beispiel #4
0
    def calculate_pearson_similarity(self, user1, user2):

        common_items =\
            extractor.get_common_items(self.user_dictionary, user1, user2)

        if not common_items:
            return None

        numerator = 0
        denominator1 = 0
        denominator2 = 0

        user1_average = self.user_dictionary[user1].average_overall_rating
        user2_average = self.user_dictionary[user2].average_overall_rating

        for item in common_items:
            user1_rating = self.user_dictionary[user1].item_ratings[item]
            user2_rating = self.user_dictionary[user2].item_ratings[item]

            # print('user average', user1_average)

            numerator +=\
                (user1_rating - user1_average) * (user2_rating - user2_average)
            denominator1 += (user1_rating - user1_average) ** 2
            denominator2 += (user2_rating - user2_average) ** 2

        denominator = math.sqrt(denominator1 * denominator2)

        if denominator == 0:
            return 0

        return numerator / denominator
Beispiel #5
0
    def calculate_cosine_similarity(self, user1, user2):

        common_items =\
            extractor.get_common_items(self.user_dictionary, user1, user2)

        if not common_items:
            return None

        numerator = 0
        denominator1 = 0
        denominator2 = 0

        for item in common_items:
            user1_rating = self.user_dictionary[user1].item_ratings[item]
            user2_rating = self.user_dictionary[user2].item_ratings[item]

            numerator += user1_rating * user2_rating
            denominator1 += user1_rating ** 2
            denominator2 += user2_rating ** 2

        denominator = math.sqrt(denominator1) * math.sqrt(denominator2)

        # if denominator == 0:
        #     pass

        return numerator / denominator
    def calculate_users_similarity(self, user_dictionary, user1, user2):

        common_items = extractor.get_common_items(user_dictionary, user1, user2)

        if not common_items:
            return None

        if self._min_common_items is not None and len(
                common_items) < self._min_common_items:
            return None

        user1_overall_ratings = user_dictionary[user1].item_ratings
        user1_multi_ratings = user_dictionary[user1].item_multi_ratings

        user2_overall_ratings = user_dictionary[user1].item_ratings
        user2_multi_ratings = user_dictionary[user2].item_multi_ratings

        similarity_sum = 0.

        for item in common_items:
            user1_item_ratings = list(user1_multi_ratings[item])
            user1_item_ratings.insert(0, user1_overall_ratings[item])
            user2_item_ratings = list(user2_multi_ratings[item])
            user2_item_ratings.insert(0, user2_overall_ratings[item])

            similarity_sum += similarity_calculator.calculate_similarity(
                user1_item_ratings, user2_item_ratings, self._similarity_metric)

        similarity = similarity_sum / len(common_items)

        return similarity
    def calculate_user_similarity(self, user1, user2, threshold):

        common_items = extractor.get_common_items(
            self.user_dictionary, user1, user2)

        if not common_items:
            return None

        filtered_items = {}

        for item in common_items:
            context1 = self.user_dictionary[user1].item_contexts[item]
            context2 = self.user_dictionary[user2].item_contexts[item]
            context_similarity = context_utils.get_context_similarity(
                context1, context2, self.context_rich_topics)
            if context_similarity > threshold:
                filtered_items[item] = context_similarity

        numerator = 0
        denominator1 = 0
        denominator2 = 0
        denominator3 = 0
        user1_average = self.user_dictionary[user1].average_overall_rating
        user2_average = self.user_dictionary[user2].average_overall_rating

        for item in filtered_items.keys():
            context_similarity = filtered_items[item]
            user1_rating = self.user_dictionary[user1].item_ratings[item]
            user2_rating = self.user_dictionary[user2].item_ratings[item]

            numerator +=\
                (user1_rating - user1_average) *\
                (user2_rating - user2_average) *\
                context_similarity
            denominator1 += (user1_rating - user1_average) ** 2
            denominator2 += (user2_rating - user2_average) ** 2
            denominator3 += context_similarity ** 2

        denominator = math.sqrt(denominator1 * denominator2 * denominator3)

        if denominator == 0:
            return None

        return numerator / denominator
Beispiel #8
0
    def calculate_user_similarity(self, user1, user2, threshold):

        common_items = extractor.get_common_items(self.user_dictionary, user1,
                                                  user2)

        if not common_items:
            return None

        filtered_items = {}

        for item in common_items:
            context1 = self.user_dictionary[user1].item_contexts[item]
            context2 = self.user_dictionary[user2].item_contexts[item]
            context_similarity = context_utils.get_context_similarity(
                context1, context2, self.context_rich_topics)
            if context_similarity > threshold:
                filtered_items[item] = context_similarity

        numerator = 0
        denominator1 = 0
        denominator2 = 0
        denominator3 = 0
        user1_average = self.user_dictionary[user1].average_overall_rating
        user2_average = self.user_dictionary[user2].average_overall_rating

        for item in filtered_items.keys():
            context_similarity = filtered_items[item]
            user1_rating = self.user_dictionary[user1].item_ratings[item]
            user2_rating = self.user_dictionary[user2].item_ratings[item]

            numerator +=\
                (user1_rating - user1_average) *\
                (user2_rating - user2_average) *\
                context_similarity
            denominator1 += (user1_rating - user1_average)**2
            denominator2 += (user2_rating - user2_average)**2
            denominator3 += context_similarity**2

        denominator = math.sqrt(denominator1 * denominator2 * denominator3)

        if denominator == 0:
            return None

        return numerator / denominator
Beispiel #9
0
    def calculate_users_similarity(self, user_dictionary, user1, user2):
        common_items = extractor.get_common_items(user_dictionary, user1, user2)

        if not common_items:
            return None

        if self._min_common_items is not None and len(
                common_items) < self._min_common_items:
            return None

        user1_ratings =\
            extractor.get_user_ratings(user_dictionary, user1, common_items)
        user2_ratings =\
            extractor.get_user_ratings(user_dictionary, user2, common_items)

        similarity_value = similarity_calculator.calculate_similarity(
            user1_ratings, user2_ratings, self._similarity_metric)

        return similarity_value
    def calculate_users_similarity(self, user_dictionary, user1, user2):

        common_items = extractor.get_common_items(user_dictionary, user1,
                                                  user2)

        if not common_items:
            return None

        if self._min_common_items is not None and len(
                common_items) < self._min_common_items:
            return None

        user1_overall_ratings =\
            extractor.get_user_ratings(user_dictionary, user1, common_items)
        user1_multi_ratings =\
            extractor.get_user_multi_ratings(user_dictionary, user1, common_items)

        user2_overall_ratings =\
            extractor.get_user_ratings(user_dictionary, user2, common_items)
        user2_multi_ratings =\
            extractor.get_user_multi_ratings(user_dictionary, user2, common_items)

        num_criteria = len(user1_multi_ratings[0])
        total_similarity = 0.

        for i in xrange(0, num_criteria):
            user1_criterion_item_ratings =\
                extractor.get_matrix_column(user1_multi_ratings, i)
            user2_criterion_item_ratings =\
                extractor.get_matrix_column(user2_multi_ratings, i)

            total_similarity += similarity_calculator.calculate_similarity(
                user1_criterion_item_ratings, user2_criterion_item_ratings,
                self._similarity_metric)

        # We also add the overall similarity
        total_similarity += similarity_calculator.calculate_similarity(
            user1_overall_ratings, user2_overall_ratings,
            self._similarity_metric)

        average_similarity = total_similarity / (num_criteria + 1)

        return average_similarity
    def calculate_users_similarity(self, user_dictionary, user1, user2):

        common_items = extractor.get_common_items(user_dictionary, user1, user2)

        if not common_items:
            return None

        if self._min_common_items is not None and len(
                common_items) < self._min_common_items:
            return None

        user1_overall_ratings =\
            extractor.get_user_ratings(user_dictionary, user1, common_items)
        user1_multi_ratings =\
            extractor.get_user_multi_ratings(user_dictionary, user1, common_items)

        user2_overall_ratings =\
            extractor.get_user_ratings(user_dictionary, user2, common_items)
        user2_multi_ratings =\
            extractor.get_user_multi_ratings(user_dictionary, user2, common_items)

        num_criteria = len(user1_multi_ratings[0])
        total_similarity = 0.

        for i in xrange(0, num_criteria):
            user1_criterion_item_ratings =\
                extractor.get_matrix_column(user1_multi_ratings, i)
            user2_criterion_item_ratings =\
                extractor.get_matrix_column(user2_multi_ratings, i)

            total_similarity += similarity_calculator.calculate_similarity(
                user1_criterion_item_ratings, user2_criterion_item_ratings,
                self._similarity_metric)

        # We also add the overall similarity
        total_similarity += similarity_calculator.calculate_similarity(
            user1_overall_ratings, user2_overall_ratings, self._similarity_metric)

        average_similarity = total_similarity / (num_criteria + 1)

        return average_similarity
    def calculate_user_similarity(self, user_id1, user_id2, threshold):
        common_items =\
            extractor.get_common_items(self.user_dictionary, user_id1, user_id2)

        if not common_items:
            return None

        numerator = 0
        denominator1 = 0
        denominator2 = 0

        for item in common_items:
            user1_rating = self.user_dictionary[user_id1].item_ratings[item]
            user2_rating = self.user_dictionary[user_id2].item_ratings[item]

            numerator += user1_rating * user2_rating
            denominator1 += user1_rating**2
            denominator2 += user2_rating**2

        denominator = math.sqrt(denominator1) * math.sqrt(denominator2)

        return numerator / denominator
    def calculate_user_similarity(self, user_id1, user_id2, threshold):
        common_items =\
            extractor.get_common_items(self.user_dictionary, user_id1, user_id2)

        if not common_items:
            return None

        numerator = 0
        denominator1 = 0
        denominator2 = 0

        for item in common_items:
            user1_rating = self.user_dictionary[user_id1].item_ratings[item]
            user2_rating = self.user_dictionary[user_id2].item_ratings[item]

            numerator += user1_rating * user2_rating
            denominator1 += user1_rating ** 2
            denominator2 += user2_rating ** 2

        denominator = math.sqrt(denominator1) * math.sqrt(denominator2)

        return numerator / denominator