コード例 #1
0
ファイル: dpp.py プロジェクト: lnceballosz/mangaki
 def load_from_algo(self, algo_name):
     algo = get_algo_backup(algo_name)
     available_work_ids = list(
         set(self.work_ids) & set(algo.dataset.encode_work.keys()))
     self.work_ids = np.array(available_work_ids)
     self.vectors = algo.VT.T[algo.dataset.encode_works(available_work_ids)]
     self.preprocess()
コード例 #2
0
ファイル: profile.py プロジェクト: subhrajit95/mangaki
def build_profile_compare_function(algo_name: Optional[str],
                                   ratings: List[Rating], user: User):
    ordering = ['favorite', 'willsee', 'like', 'neutral', 'dislike', 'wontsee']

    # By default, sort by rating then name
    def default_compare_function(item):
        return ordering.index(item.choice), item.work.title.lower()

    if algo_name is not None:
        try:
            work_ids = [rating.work_id for rating in ratings]
            algo = get_algo_backup(algo_name)
            best_pos = get_personalized_ranking(algo, user.id, work_ids)
            ranking = defaultdict(lambda: len(ratings))
            for rank, pos in enumerate(best_pos):
                ranking[ratings[pos].id] = rank

            def special_compare_function(item):
                return ordering.index(item.choice), ranking[item.id]

            return special_compare_function
        except Exception as e:  # Two possible reasons: no backup or user not in backup
            pass

    return default_compare_function
コード例 #3
0
def get_algo_backup_or_fit_knn(algo_name):
    try:
        algo = get_algo_backup(algo_name)
    except FileNotFoundError:
        triplets = list(
            Rating.objects.values_list('user_id', 'work_id', 'choice'))
        # In the future, we should warn the user it's gonna take a while
        algo_name = 'knn'
        algo = fit_algo('knn', triplets)
    return algo
コード例 #4
0
 def handle(self, *args, **options):
     algo_name = options.get('algo_name')
     algo = get_algo_backup(algo_name)
     if algo.M is None:
         algo.unzip()
         if algo.is_serializable:
             algo.save(settings.ML_SNAPSHOT_ROOT)
         self.stdout.write(
             self.style.SUCCESS('Successfully unzipped %s (%.1f MB)' %
                                (algo_name, algo.size / 1e6)))
     else:
         self.stdout.write(
             self.style.WARNING('Pickle of %s is already unzipped' %
                                algo_name))
コード例 #5
0
def get_reco_algo(request, algo_name='knn', category='all'):
    chrono = Chrono(is_enabled=CHRONO_ENABLED)
    already_rated_works = list(current_user_ratings(request))
    if request.user.is_anonymous:
        assert request.user.id is None
        # We only support KNN for anonymous users, since the offline models did
        # not learn anything about them.
        # FIXME: We should also force KNN for new users for which we have no
        # offline trained model available.
        algo_name = 'knn'

    chrono.save('get rated works')

    try:
        algo = get_algo_backup(algo_name)
    except FileNotFoundError:
        triplets = list(
            Rating.objects.values_list('user_id', 'work_id', 'choice'))
        chrono.save('get all %d interesting ratings' % len(triplets))
        algo = fit_algo(algo_name, triplets)

    if algo_name == 'knn':
        available_works = set(algo.dataset.encode_work.keys())
        framed_rated_works = (pd.DataFrame(list(current_user_ratings(request).items()), columns=['work_id', 'choice'])
                              .query('work_id in @available_works'))
        framed_rated_works['encoded_work_id'] = algo.dataset.encode_works(framed_rated_works['work_id'])
        framed_rated_works['rating'] = framed_rated_works['choice'].map(rating_values)
        nb_rated_works = len(framed_rated_works)
        ratings_from_user = coo_matrix((framed_rated_works['rating'],([0.] * nb_rated_works, framed_rated_works['encoded_work_id'])), shape=(1, algo.nb_works))
        ratings_from_user = ratings_from_user.tocsr()

        #Expands knn.M with current user ratings (vstack is too slow)
        algo.M.data = np.hstack((algo.M.data, ratings_from_user.data))
        algo.M.indices = np.hstack((algo.M.indices, ratings_from_user.indices))
        algo.M.indptr = np.hstack((algo.M.indptr, (ratings_from_user.indptr + algo.M.nnz)[1:]))
        algo.M._shape = (algo.M.shape[0] + ratings_from_user.shape[0], ratings_from_user.shape[1])

        chrono.save('loading knn and expanding with current user ratings')

    chrono.save('fit %s' % algo.get_shortname())

    if category != 'all':
        category_filter = set(Work.objects.filter(category__slug=category).values_list('id', flat=True))
    else:
        category_filter = algo.dataset.interesting_works

    filtered_works = list((algo.dataset.interesting_works & category_filter) - set(already_rated_works))
    chrono.save('remove already rated, left {:d}'.format(len(filtered_works)))

    pos_of_best = get_pos_of_best_works_for_user_via_algo(algo, request.user.id, filtered_works, limit=NB_RECO)
    best_work_ids = [filtered_works[pos] for pos in pos_of_best]

    chrono.save('compute every prediction')

    works = Work.objects.in_bulk(best_work_ids)
    # Some of the works may have been deleted since the algo backup was created.
    ranked_work_ids = [work_id for work_id in best_work_ids if work_id in works]

    chrono.save('get bulk')

    return {'work_ids': ranked_work_ids, 'works': works}
コード例 #6
0
def user_exists_in_backup(user, algo_name):
    try:
        algo = get_algo_backup(algo_name)
        return user.id in algo.dataset.encode_user
    except FileNotFoundError:
        return False