Ejemplo n.º 1
0
    def estimate(self, u, i):

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unkown.')

        rating = self.predictedRatings[u, i]

        if rating < 0.001:
            raise PredictionImpossible('No valid prediction exists.')

        return rating
Ejemplo n.º 2
0
    def estimate(self, user_index, item_index): 
        if not self.trainset.knows_user(user_index) or not self.trainset.knows_item(item_index):
            raise PredictionImpossible("User and item are unknown")

        user_info = self.user_features[:, user_index]
        item_info = self.item_features[:, item_index]
        return user_info.T.dot(item_info)
Ejemplo n.º 3
0
    def estimate(self, u, i):
        if self.trainset.knows_item(i):
            item_id = self.trainset.to_raw_iid(i)
        else:
            # Strip off "UNK__" prefix to obtain the raw iid
            item_id = i[5:]

        try:
            published_date = self.dates[item_id]
        except KeyError:
            raise PredictionImpossible('No publication date registered')

        # Is there an upper bound? We use this to avoid recommending
        # "impossible" articles
        if self.cut_after:
            if self.cut_after < published_date:
                return self.lower

        if self.threshold_date < published_date:
            return self.upper

        diff = published_date - self.oldest_date
        # rating should be in domain [0, 1]
        rating = diff / self.date_scale

        # We may not want to straight up exclude the oldest stuff
        weighted_rating = (1.0 - self.weight) + (rating * self.weight)
        # Convert to scale used by the rest of the algorithms
        return self.lower + (self.range * weighted_rating)
Ejemplo n.º 4
0
    def estimate(self, u, i):

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unkown.')

        x, y = self.switch(u, i)

        neighbors = [(x2, self.sim[x, x2], r) for (x2, r) in self.yr[y]]
        k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[1])

        est = self.means[x]

        # compute weighted average
        sum_sim = sum_ratings = actual_k = 0
        for (nb, sim, r) in k_neighbors:
            if sim > 0:
                sum_sim += sim
                sum_ratings += sim * (r - self.means[nb]) / self.sigmas[nb]
                actual_k += 1

        if actual_k < self.min_k:
            sum_ratings = 0

        try:
            est += sum_ratings / sum_sim * self.sigmas[x]
        except ZeroDivisionError:
            pass  # return mean

        details = {'actual_k': actual_k}
        return est, details
Ejemplo n.º 5
0
 def estimate(self, u ,i):
     #sum_means = self.trainset.global_mean
     #div = 1
     #if self.trainset.knows_user(u):
     #    sum_means += np.mean([r for (_,r) in self.trainset.ur[u]])
     #    div += 1
     #if self.trainset.knows_item(i):
     #    sum_means += np.mean([r for (_,r) in self.trainset.ir[u]])
     #    div += 1
     #return sum_means / div
     
     #return self.the_mean
     div = 0
     est = 0
     if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
         raise PredictionImpossible('User and/or item is unkown .')
     neighbors = [(v,self.sim[u,v],r) for (v,r) in self.trainset.ir[i]]
     neighbors = sorted(neighbors,key=lambda x: x[1], reverse=True)
     print('The  3 nearest neighbors of user', str(u), 'are:')
     for v, sim_uv,r in neighbors[:3]:
         print('user{0:} with sim {1:1.2f}'.format(v,sim_uv))
         est += r
         div +=1
     est = est/div
     
     if self.trainset.knows_user(u):
         est += self.bu[u]
     if self.trainset.knows_item(i):
         est += self.bi[i]
     return est
Ejemplo n.º 6
0
    def estimate(self, u, i):
        # Should we cythonize this as well?

        known_user = self.trainset.knows_user(u)
        known_item = self.trainset.knows_item(i)

        if self.biased:
            est = self.trainset.global_mean

            if known_user:
                est += self.bu[u]

            if known_item:
                est += self.bi[i]

            if known_user and known_item:
                est += np.dot(self.qi[i], self.pu[u])

        else:
            if known_user and known_item:
                est = np.dot(self.qi[i], self.pu[u])
            else:
                raise PredictionImpossible('User and item are unkown.')

        return est
Ejemplo n.º 7
0
 def estimate(self, u, i):
     known_user = self.trainset.knows_user(u)
     known_item = self.trainset.knows_item(i)
     if known_user and known_item:
         return self.predictions[u, i]
     else:
         raise PredictionImpossible('User and item are unkown.')
Ejemplo n.º 8
0
    def estimate(self, u, i):
        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unknown.')

        # Compute similarities between the test track and all of the tracks
        # the user has listened to
        neighbours = sorted([self.sim_matrix[track_id, i] for track_id, rating \
                             in self.trainset.ur[u]], reverse=True)[:self.k]

        total_sim = sum(neighbours)

        if total_sim == 0:
            raise PredictionImpossible(
                'There are no neighbours for this track!')

        pred_rating = total_sim / len(neighbours)

        return pred_rating
Ejemplo n.º 9
0
 def estimate(self, user, item):
     """Estima el rating que un usuario dará a un ítem"""
     known_user = self.trainset.knows_user(user)
     known_item = self.trainset.knows_item(item)
     est = self.trainset.global_mean
     if known_user and known_item:
         est = self.kernel_a + self.kernel_c * np.dot(
             self.qi[item], self.pu[user])
     else:
         raise PredictionImpossible("User and item are unknown.")
     return est
Ejemplo n.º 10
0
    def estimate(self, u, i):

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unkown.')

        x, y = self.switch(u, i)

        #self.yr = self.trainset.ir if ub else self.trainset.ur

        #neighbors = [(self.sim[x, x2], r) for (x2, r) in self.yr[y]]
        k_neighbors = self.get_neighbors_flock(self.trainset.to_raw_uid(u), self.k)
        #print('USER: '******'item' + str(y))
        #print(k_neighbors)
        #print('##')

        # compute weighted average
        sum_sim = sum_ratings = actual_k = 0

        if k_neighbors:
            for (neighbor, sim) in k_neighbors.items():
                #print(self.trainset.ur[self.trainset.to_inner_uid(neighbor)])
                for (item, r) in self.trainset.ur[self.trainset.to_inner_uid(neighbor)]:
                    #print(self.trainset.to_raw_iid(item))
                    #print(self.trainset.to_raw_iid(y))
                    #print('item' + str(item) + 'el_mio' + str(y))
                    if item == y:
                        #print('entra')
                        sum_ratings += r * sim
                        actual_k += 1
                        #print(r)
                        #print(sum_ratings, actual_k)


        if actual_k < self.min_k:
            raise PredictionImpossible('Not enough neighbors.')

        est = sum_ratings

        details = {'actual_k': actual_k}

        return est, details
Ejemplo n.º 11
0
    def estimate(self, u, i):

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unkown.')

        # Build up similarity scores between this item and everything the user rated
        neighbors = []
        try:
            cluster_item_i = self.oas[self.oas[:, 0] == i][0][-1]
        except:
            raise PredictionImpossible('No neighbors')

        for rating in self.trainset.ur[u]:
            #get "m" oas that belongs to cluster of oa "i"
            similar_oas = self.oas[self.oas[:, -1] == cluster_item_i]
            for similar_idoa in similar_oas:
                c = similar_idoa[0]
                try:
                    similitud_oas = self.similarities[int(similar_idoa[0]),
                                                      rating[0]]
                    neighbors.append((similitud_oas, rating[1]))
                except:
                    continue

        # Extract the top-K most-similar ratings
        k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])

        # Compute average sim score of K neighbors weighted by user ratings
        simTotal = weightedSum = 0
        for (simScore, rating) in k_neighbors:
            if (simScore > 0):
                simTotal += simScore
                weightedSum += simScore * rating

        if (simTotal == 0):
            raise PredictionImpossible('No neighbors')

        predictedRating = weightedSum / simTotal

        return predictedRating
Ejemplo n.º 12
0
    def estimate(self, u, i):
        """ Estimate a rating when given an user and an item.

        Parameters
        ----------
        u: int
            User id
        i: int
            Item id

        Returns
        -------
        rating: float
            Return a predicted rating for user, item pair.
        """

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unkown.')

        # Build up similarity scores between this item and everything the user rated
        neighbors = []
        for rating in self.trainset.ur[u]:
            genre_similarity = self.similarities[i, rating[0]]
            neighbors.append((genre_similarity, rating[1]))

        # Extract the top-K most-similar ratings
        k_neighbors = nlargest(self.k, neighbors, key=lambda t: t[0])

        # Compute average sim score of K neighbors weighted by user ratings
        sim_total = weighted_sum = 0
        for sim_score, rating in k_neighbors:
            if sim_score > 0:
                sim_total += sim_score
                weighted_sum += sim_score * rating

        if sim_total == 0:
            raise PredictionImpossible('No neighbors')

        return weighted_sum / sim_total
Ejemplo n.º 13
0
    def estimate(self, u, i):
        known_user = self.trainset.knows_user(u)
        known_item = self.trainset.knows_item(i)
        if self.biased:
            est = self.trainset.global_mean
            if known_user:
                est += self.bu[u]
            if known_item:
                est += self.bi[i]
        else:
            raise PredictionImpossible('User and item are unkown.')

        return est
    def estimate(self, u, i):
        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unknown.')

        neighbours = []
        for rating in self.trainset.ur[u]:
            genreSimilarity = self.similarities[i, rating[0]]
            neighbours.append((genreSimilarity, rating[1]))

        k_neighbours = heapq.nlargest(self.k, neighbours, key=lambda t: t[0])

        simTotal = weightedSum = 0
        for (simScore, rating) in k_neighbours:
            if (simScore > 0):
                simTotal += simScore
                weightedSum += simScore * rating

        if (simTotal == 0):
            raise PredictionImpossible('No neighbours')

        predictedRating = weightedSum / simTotal

        return predictedRating
    def estimate(self, u, i):
        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unkown.')

        # convertion to real ids
        # print("user: "******"item: " + str(self.trainset.to_raw_iid(i)))

        movieRawId = trainset.to_raw_iid(i)
        movie_vector = self.movies[movieRawId]["embedding"]

        result = 1 - spatial.distance.cosine(self.mean_user_vectors[u],
                                             movie_vector)
        result = 2.25 * result + 2.75  # from [-1, 1] to [0.5, 5]

        return result
Ejemplo n.º 16
0
 def _estimate(self, trainset, uid, iid, top_k=10):   #一般不使用
     """
     :param trainset:
     :param uid:  均使用的inner_id
     :param iid:  使用的内部的id
     :param top_k:
     :return:
     """
     if not (trainset.knows_user(uid=uid) and trainset.knows_item(iid=iid)):
         raise PredictionImpossible('User and/or item is unkown.')
     neighbors = [(vid, self.sim[uid, vid]) for (vid, r) in trainset.ir[iid]]
     # 计算u和v之间的相似性,其中v描述了所有其他用户,他们也对项目I进行了评级。
     neighbors = sorted(neighbors, key=lambda x: x[1], reverse=True)  # 降序
     # 相似度排序操作
     for v, sim_uv in neighbors[:top_k]:
         print('user {0:} with sim {1:1.2f}'.format(v, sim_uv))
Ejemplo n.º 17
0
    def estimate(self, u, i):

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unkown.')

        # Compute similarities between u and v, where v describes all other
        # users that have also rated item i.
        neighbors = [(v, self.sim[u, v]) for (v, r) in self.trainset.ir[i]]
        # Sort these neighbors by similarity
        neighbors = sorted(neighbors, key=lambda x: x[1], reverse=True)

        print('The 3 nearest neighbors of user', str(u), 'are:')
        for v, sim_uv in neighbors[:3]:
            print('user {0:} with sim {1:1.2f}'.format(v, sim_uv))

        return sim_uv
Ejemplo n.º 18
0
    def estimate(self, u, i):
        # details = {}
        # # 基于Bg的评分
        # est = 10 * self.bg_modify_factor(u, i)
        # actual_k = 0
        # if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
        #     # 如果训练集中没有该用户或商品, 就设置为平均分
        #     est += self.trainset.global_mean
        #     details['was_impossible'] = True
        #     details['reason'] = 'User and/or item is unkown'
        # else:
        #     x, y = self.switch(u, i)
        #
        #     neighbors = [(x2, self.sim[x, x2], r) for (x2, r) in self.yr[y]]
        #
        #     # sort neighbors by similarity
        #     neighbors = sorted(neighbors, key=lambda tple: tple[1], reverse=True)
        #
        #     # compute weighted average
        #     sum_sim = sum_ratings = 0
        #     for (_, sim, r) in neighbors[:self.k]:
        #         if sim > 0:
        #             sum_sim += sim
        #             sum_ratings += sim * r
        #             actual_k += 1
        #
        #     if actual_k < self.min_k:
        #         print('not enough neighbors')
        #         est += self.trainset.global_mean
        #         details['was_impossible'] = True
        #         details['reason'] = 'Not enough neighbors'
        #         # raise PredictionImpossible('Not enough neighbors.')
        #     else:
        #         est += sum_ratings / sum_sim
        #
        # details = {'actual_k': actual_k}
        # return est, details

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            # 如果训练集中没有该用户或商品, 就设置为平均分, 在父类中Catch该异常,设置平均分
            raise PredictionImpossible('User and/or item is unknown')

        est1, details1 = self.compute_by_traditional_cf(i, u)
        est2, details2 = self.compute_by_professional(i, u)
        est = 0.5 * est1 + 0.5 * est2
        return est, details1
Ejemplo n.º 19
0
    def estimate(self, u, i):
        predicted = current = max = 0

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unkown.')

        x, y = self.switch(u, i)

        #for all the users to have rated the given item,
        neighbors = [(self.similarity_matrix[x, x2], r)
                     for (x2, r) in self.yr[y]]

        for i in range(self.trainset.rating_scale[0],
                       self.trainset.rating_scale[1]):
            print(i)

        return 3
Ejemplo n.º 20
0
 def compute_by_traditional_cf(self, i, u):
     print('compute_by_traditional_cf')
     # 得到所以评价过商品i的用户
     neighbors = [(v, self.sim[u, v], r) for (v, r) in self.trainset.ir[i]]
     # sort neighbors by similarity
     neighbors = sorted(neighbors, key=lambda tple: tple[1], reverse=True)
     # compute weighted average
     sum_sim = sum_ratings = actual_k = 0
     for (_, sim, r) in neighbors[:self.k]:
         if sim > 0:
             sum_sim += sim
             sum_ratings += sim * r
             actual_k += 1
     if actual_k < self.min_k:
         raise PredictionImpossible('Not enough neighbors.')
     est = sum_ratings / sum_sim
     details = {'actual_k': actual_k}
     return est, details
Ejemplo n.º 21
0
    def estimate(self, u, i):

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unknown.')

        numerator = 0

        # ratings of item i by all users
        item_rating_by_user = self.trainset.ir[i]

        # average rating by user u
        avg_rating_by_user_u = sum(y[1] for y in self.trainset.ur[u]) / len(
            self.trainset.ur[u])

        # Compute similarities between u and v, where v describes all other
        # users that have also rated item i.
        neighbors = [(v, self.sim[u, v]) for (v, r) in item_rating_by_user]

        # getting top k similar users for a user u
        top_k = sorted(neighbors, key=lambda x: x[1], reverse=True)[:self.n]

        # calculating numerator part of the resnick prediction function
        for j, (v, _) in enumerate(top_k):
            avg_rating_by_user_v = sum(
                y[1] for y in self.trainset.ur[v]) / len(self.trainset.ur[v])
            numerator += top_k[j][1] * (
                list(filter(lambda x: v in x, item_rating_by_user))[0][1] -
                avg_rating_by_user_v)

        # calculating denominator part of the resnick prediction function
        denominator = sum(abs(y[1]) for y in top_k)

        # getting prediction from the resnick prediction function
        # adding a small value epsilon to denominator to avoid division by 0
        prediction = avg_rating_by_user_u + (numerator /
                                             (denominator + self.epsilon))

        return prediction
Ejemplo n.º 22
0
    def run_child_algos_on_jobs(self, jobs):
        """
        Collect each algorithm's prediction for each job.

        Args:
            jobs: List of JobRequest. These are the user/item pairs we want to
                collect predictions for.

        Returns:
            Dict where key is (inner user ID, inner item ID) and value is a
            dictionary consisting of results, total_weights and
            rejected_results, as expected by combine().
        """
        def create_empty_result_dict():
            return {
                'results': [],
                'total_weights': self.sum_weights,
                'rejected_results': []
            }
        # TODO: Use a list instead with indices matching those of jobs,
        # since the same user ID and item ID pair may appear multiple times
        results = defaultdict(create_empty_result_dict)

        # Go though one algorithm at a time
        for algorithm, weight, _ in self.all_algorithms():
            # Don't fetch the name for every job
            algorithm_name = self._get_algorithm_name(algorithm)
            # Iterate through the job requests, and make a prediction for each
            for job in jobs:
                u = job.iuid
                i = job.iiid
                key = (u, i)

                try:
                    this_result = algorithm.estimate(u, i)
                    # Did we get just prediction or prediction and extras?
                    extras = None
                    if isinstance(this_result, tuple):
                        this_result, extras = this_result

                    if this_result == self.trainset.global_mean:
                        # Though the algorithm did not admit it, it failed to
                        # produce a result different than the global mean (a
                        # symptom that a prediction was impossible)
                        raise PredictionImpossible(
                            'Algorithm prediction equals global mean'
                        )

                    # If we are here, the algorithm managed to produce a result!
                    results[key]['results'].append(AlgorithmResult(
                        algorithm_name,
                        weight,
                        this_result,
                        extras
                    ))
                except PredictionImpossible as e:
                    # The algorithm failed! Register it as such
                    results[key]['rejected_results'].append(AlgorithmResult(
                        algorithm_name,
                        weight,
                        None,
                        e
                    ))
                    # Don't use this algorithm's weight when weighting
                    if weight != float('inf'):
                        results[key]['total_weights'] -= weight
        # Make it so results throws KeyError when non-existing key is accessed
        results.default_factory = None
        return results
Ejemplo n.º 23
0
    def estimate(self, User, index):

        #Catch Impossible predictions
        if not (self.UserData.knows_user(User)
                and self.UserData.knows_item(i)):
            raise PredictionImpossible('User and or Item are unknown')
        elif User.currentIndex == a and not (index == b):
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree')
        elif User.currentIndex == b and not (index == c):
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree')
        elif User.currentIndex == c and not (index == d):
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree')
        elif User.currentIndex == d and not (index == g):
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree')
        elif User.currentIndex == e and not (index == h or index == i):
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree')
        elif User.currentIndex == f and not (index == j or index == k):
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree')
        elif User.currentIndex == g:
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree - End Index'
            )
        elif User.currentIndex == h and not (index == n):
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree')
        elif User.currentIndex == i and not (index == n or index == l):
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree')
        elif User.currentIndex == j:
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree - End Index'
            )
        elif User.currentIndex == k:
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree - End Index'
            )
        elif User.currentIndex == n:
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree - End Index'
            )
        elif User.currentIndex == m:
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree - End Index'
            )
        elif User.currentIndex == l:
            raise PredictionImpossible(
                'User current Index and Prediction invalid due to tree - End Index'
            )

        neighbours = [(v, self.sim[User, v])
                      for (v, r) in self.UserData.ir[index]]
        neighbours = sorted(neighbours, key=lambda x: x[1], reverse=True)

        print('The 3 nearest neighbours of user', str(User.userID), 'are:')
        for v, sim_Userv in neighbours[:3]:
            print('user {0:} with sim {1:1.2f}'.format(v, sim_Userv))

        prediction = mean(sim_Userv for (v, sim_Userv) in neighbours[:3])
        return prediction