Exemple #1
0
def get_reco_algo(request, algo_name='knn', category='all'):
    chrono = Chrono(is_enabled=CHRONO_ENABLED)
    already_rated_works = list(current_user_ratings(request))
    if request.user.is_anonymous:
        assert request.user.id is None
        # We only support KNN for anonymous users, since the offline models did
        # not learn anything about them.
        # FIXME: We should also force KNN for new users for which we have no
        # offline trained model available.
        algo_name = 'knn'

    chrono.save('get rated works')

    try:
        algo = get_algo_backup(algo_name)
        dataset = get_dataset_backup(algo_name)
    except FileNotFoundError:
        triplets = list(
            Rating.objects.values_list('user_id', 'work_id', 'choice'))
        chrono.save('get all %d interesting ratings' % len(triplets))
        dataset, algo = fit_algo(algo_name, triplets)

    if algo_name == 'knn':
        framed_rated_works = pd.DataFrame(list(current_user_ratings(request).items()), columns=['work_id', 'choice'])
        framed_rated_works['work_id'] = dataset.encode_works(framed_rated_works['work_id'])
        framed_rated_works['rating'] = framed_rated_works['choice'].map(rating_values)
        nb_rated_works = len(framed_rated_works['work_id'])
        ratings_from_user = coo_matrix((framed_rated_works['rating'],([0.] * nb_rated_works, framed_rated_works['work_id'])), shape=(1, algo.nb_works))
        ratings_from_user = ratings_from_user.tocsr()

        #Expands knn.M with current user ratings (vstack is too slow)
        algo.M.data = np.hstack((algo.M.data, ratings_from_user.data))
        algo.M.indices = np.hstack((algo.M.indices, ratings_from_user.indices))
        algo.M.indptr = np.hstack((algo.M.indptr, (ratings_from_user.indptr + algo.M.nnz)[1:]))
        algo.M._shape = (algo.M.shape[0] + ratings_from_user.shape[0], ratings_from_user.shape[1])

        chrono.save('loading knn and expanding with current user ratings')

    chrono.save('fit %s' % algo.get_shortname())

    if category != 'all':
        category_filter = set(Work.objects.filter(category__slug=category).values_list('id', flat=True))
    else:
        category_filter = dataset.interesting_works

    filtered_works = list((dataset.interesting_works & category_filter) - set(already_rated_works))
    chrono.save('remove already rated')

    pos_of_best = get_pos_of_best_works_for_user_via_algo(algo, dataset, request.user.id, filtered_works, limit=NB_RECO)
    best_work_ids = [filtered_works[pos] for pos in pos_of_best]

    chrono.save('compute every prediction')

    works = Work.objects.in_bulk(best_work_ids)
    # Some of the works may have been deleted since the algo backup was created.
    ranked_work_ids = [work_id for work_id in best_work_ids if work_id in works]

    chrono.save('get bulk')

    return {'work_ids': ranked_work_ids, 'works': works}
Exemple #2
0
def build_profile_compare_function(algo_name: Optional[str],
                                   ratings: List[Rating], user: User):
    ordering = ['favorite', 'willsee', 'like', 'neutral', 'dislike', 'wontsee']

    # By default, sort by rating then name
    def default_compare_function(item):
        return ordering.index(item.choice), item.work.title.lower()

    if algo_name is not None:
        try:
            work_ids = [rating.work_id for rating in ratings]
            algo = get_algo_backup(algo_name)
            dataset = get_dataset_backup(algo_name)
            best_pos = get_pos_of_best_works_for_user_via_algo(
                algo, dataset, user.id, work_ids)
            ranking = defaultdict(lambda: len(ratings))
            for rank, pos in enumerate(best_pos):
                ranking[ratings[pos].id] = rank

            def special_compare_function(item):
                return ordering.index(item.choice), ranking[item.id]

            return special_compare_function
        except:  # Two possible reasons: no backup or user not in backup
            pass

    return default_compare_function
Exemple #3
0
 def load_from_algo(self, algo_name):
     algo = get_algo_backup(algo_name)
     dataset = get_dataset_backup(algo_name)
     available_work_ids = list(
         set(self.work_ids) & set(dataset.encode_work.keys()))
     self.work_ids = np.array(available_work_ids)
     self.vectors = algo.VT.T[dataset.encode_works(available_work_ids)]
     self.preprocess()
Exemple #4
0
def user_exists_in_backup(user, algo_name):
    try:
        dataset = get_dataset_backup(algo_name)
        return user.id in dataset.encode_user
    except FileNotFoundError:
        return False