def handle(self, *args, **options): chrono = Chrono(False) category = sys.argv[2] c = Counter() values = {'favorite': 10, 'like': 2, 'neutral': 0.5, 'dislike': -1} nb_ratings = Counter() nb_stars = Counter() for choice, contestant_id in Rating.objects.values_list( 'choice', 'work__anime__' + category): if contestant_id and contestant_id > 1: # Artiste non inconnu nb_ratings[contestant_id] += 1 if choice == 'favorite': nb_stars[contestant_id] += 1 c[contestant_id] += values.get(choice, 0) chrono.save('enter contestants') artist_ids = [] for artist_id, _ in c.most_common(20): artist_ids.append(artist_id) artist_by_id = Artist.objects.in_bulk(artist_ids) top = [] for i, (artist_id, score) in enumerate(c.most_common(20)): top.append( dict(rank=i + 1, name=str(artist_by_id[artist_id]), id=artist_id, score=score, nb_ratings=nb_ratings[artist_id], nb_stars=nb_stars[artist_id])) chrono.save('get results') # print('%d queries' % len(connection.queries)) print(json.dumps(top))
def handle(self, *args, **options): chrono = Chrono(False) category = options.get('category')[0] c = Counter() values = {'favorite': 10, 'like': 2, 'neutral': 0.5, 'dislike': -1} nb_ratings = Counter() nb_stars = Counter() for choice, contestant_id in Rating.objects.values_list( 'choice', 'work__anime__' + category): if contestant_id and contestant_id > 1: # Artiste non inconnu nb_ratings[contestant_id] += 1 if choice == 'favorite': nb_stars[contestant_id] += 1 c[contestant_id] += values.get(choice, 0) chrono.save('enter contestants') artist_ids = [] for artist_id, _ in c.most_common(20): artist_ids.append(artist_id) artist_by_id = Artist.objects.in_bulk(artist_ids) choice = category + 's' if choice not in dict(TOP_CATEGORY_CHOICES): raise CommandError("Invalid top category '{}'".format(choice)) top = Top.objects.create(category=choice) Ranking.objects.bulk_create([ Ranking( top=top, content_object=artist_by_id[artist_id], score=score, nb_ratings=nb_ratings[artist_id], nb_stars=nb_stars[artist_id], ) for (artist_id, score) in c.most_common(20) ]) chrono.save('get results')
def get_reco_algo(request, algo_name='knn', category='all'): chrono = Chrono(is_enabled=CHRONO_ENABLED) already_rated_works = list(current_user_ratings(request)) if request.user.is_anonymous: assert request.user.id is None # We only support KNN for anonymous users, since the offline models did # not learn anything about them. # FIXME: We should also force KNN for new users for which we have no # offline trained model available. algo_name = 'knn' chrono.save('get rated works') try: algo = get_algo_backup(algo_name) dataset = get_dataset_backup(algo_name) except FileNotFoundError: triplets = list( Rating.objects.values_list('user_id', 'work_id', 'choice')) chrono.save('get all %d interesting ratings' % len(triplets)) dataset, algo = fit_algo(algo_name, triplets) if algo_name == 'knn': framed_rated_works = pd.DataFrame(list(current_user_ratings(request).items()), columns=['work_id', 'choice']) framed_rated_works['work_id'] = dataset.encode_works(framed_rated_works['work_id']) framed_rated_works['rating'] = framed_rated_works['choice'].map(rating_values) nb_rated_works = len(framed_rated_works['work_id']) ratings_from_user = coo_matrix((framed_rated_works['rating'],([0.] * nb_rated_works, framed_rated_works['work_id'])), shape=(1, algo.nb_works)) ratings_from_user = ratings_from_user.tocsr() #Expands knn.M with current user ratings (vstack is too slow) algo.M.data = np.hstack((algo.M.data, ratings_from_user.data)) algo.M.indices = np.hstack((algo.M.indices, ratings_from_user.indices)) algo.M.indptr = np.hstack((algo.M.indptr, (ratings_from_user.indptr + algo.M.nnz)[1:])) algo.M._shape = (algo.M.shape[0] + ratings_from_user.shape[0], ratings_from_user.shape[1]) chrono.save('loading knn and expanding with current user ratings') chrono.save('fit %s' % algo.get_shortname()) if category != 'all': category_filter = set(Work.objects.filter(category__slug=category).values_list('id', flat=True)) else: category_filter = dataset.interesting_works filtered_works = list((dataset.interesting_works & category_filter) - set(already_rated_works)) chrono.save('remove already rated') pos_of_best = get_pos_of_best_works_for_user_via_algo(algo, dataset, request.user.id, filtered_works, limit=NB_RECO) best_work_ids = [filtered_works[pos] for pos in pos_of_best] chrono.save('compute every prediction') works = Work.objects.in_bulk(best_work_ids) # Some of the works may have been deleted since the algo backup was created. ranked_work_ids = [work_id for work_id in best_work_ids if work_id in works] chrono.save('get bulk') return {'work_ids': ranked_work_ids, 'works': works}
def get_card(request, category, sort_id=1): chrono = Chrono(True) deja_vu = request.GET.get('dejavu', '').split(',') sort_mode = ['popularity', 'controversy', 'top', 'random'][int(sort_id) - 1] queryset = Work.objects.filter(category__slug=category) if sort_mode == 'popularity': queryset = queryset.popular() elif sort_mode == 'controversy': queryset = queryset.controversial() elif sort_mode == 'top': queryset = queryset.top() else: queryset = queryset.random().order_by('?') if request.user.is_authenticated(): rated_works = Rating.objects.filter( user=request.user).values('work_id') queryset = queryset.exclude(id__in=rated_works) queryset = queryset[:54] cards = [] for work in queryset.values('id', 'title', 'poster', 'synopsis', 'nsfw'): update_poster_if_nsfw_dict(work, request.user) work['category'] = category cards.append(work) return HttpResponse(json.dumps(cards), content_type='application/json')
def get_reco_algo(request, algo_name='als', category='all'): chrono = Chrono(is_enabled=CHRONO_ENABLED) user_ratings = current_user_ratings(request) already_rated_works = list(user_ratings) chrono.save('get rated works') algo = get_algo_backup_or_fit_knn(algo_name) available_works = set(algo.dataset.encode_work.keys()) df_rated_works = (pd.DataFrame( list(user_ratings.items()), columns=['work_id', 'choice']).query('work_id in @available_works')) enc_rated_works = df_rated_works['work_id'].map(algo.dataset.encode_work) user_rating_values = df_rated_works['choice'].map(rating_values) # User gave the same rating to all works considered in the reco if algo_name == 'als' and len(set(user_rating_values)) == 1: algo = get_algo_backup_or_fit_knn('knn') chrono.save('retrieve or fit %s' % algo.get_shortname()) category_filter = algo.dataset.interesting_works if category != 'all': category_filter &= set( Work.objects.filter(category__slug=category).values_list( 'id', flat=True)) filtered_works = list((algo.dataset.interesting_works & category_filter) - set(already_rated_works)) chrono.save('remove already rated, left {:d}'.format(len(filtered_works))) pos_of_best = get_personalized_ranking(algo, request.user.id, filtered_works, enc_rated_works, user_rating_values, limit=NB_RECO) best_work_ids = [filtered_works[pos] for pos in pos_of_best] chrono.save('compute every prediction') works = Work.objects.in_bulk(best_work_ids) # Some of the works may have been deleted since the algo backup was created ranked_work_ids = [ work_id for work_id in best_work_ids if work_id in works ] chrono.save('get bulk') return {'work_ids': ranked_work_ids, 'works': works}
def handle(self, *args, **options): chrono = Chrono(False) categories = [] if options.get('category'): categories = set(options.get('category')) if options.get('all'): categories = {'director', 'composer', 'author'} for category in categories: self.stdout.write('Refreshing top for {}s'.format(category)) c = Counter() values = {'favorite': 10, 'like': 2, 'neutral': 0.5, 'dislike': -1} nb_ratings = Counter() nb_stars = Counter() for choice, contestant_id in Rating.objects.filter( work__staff__role__slug=category).values_list( 'choice', 'work__staff__artist'): if contestant_id and contestant_id > 1: # Artiste non inconnu nb_ratings[contestant_id] += 1 if choice == 'favorite': nb_stars[contestant_id] += 1 c[contestant_id] += values.get(choice, 0) chrono.save('enter contestants') artist_ids = [] for artist_id, _ in c.most_common(20): artist_ids.append(artist_id) artist_by_id = Artist.objects.in_bulk(artist_ids) choice = category + 's' if choice not in dict(TOP_CATEGORY_CHOICES): raise CommandError("Invalid top category '{}'".format(choice)) top = Top.objects.create(category=choice) Ranking.objects.bulk_create([ Ranking( top=top, content_object=artist_by_id[artist_id], score=score, nb_ratings=nb_ratings[artist_id], nb_stars=nb_stars[artist_id], ) for (artist_id, score) in c.most_common(20) ]) chrono.save('get results') self.stdout.write( self.style.SUCCESS('Refreshed top for {}s'.format(category)))
def get_card(request, category, sort_id=1): chrono = Chrono(True) deja_vu = request.GET.get('dejavu', '').split(',') sort_mode = ['popularity', 'controversy', 'top', 'random'][int(sort_id) - 1] my_rated_works = get_rated_works( request.user) if request.user.is_authenticated() else {} chrono.save('got rated works') if Deck.objects.filter(category=category, sort_mode=sort_mode): deck = Deck.objects.get(category=category, sort_mode=sort_mode).content.split(',') else: # Temporary data if category == 'anime': bundle = Anime.objects.all() elif category == 'manga': bundle = Manga.objects.all() deck = [str(work.id) for work in bundle] Deck(category=category, sort_mode=sort_mode, content=','.join(deck)).save() filtered_deck = filter_deck(deck, my_rated_works, deja_vu) chrono.save('filter deck') data = {} for work_id, title, poster, synopsis, nsfw in Work.objects.filter( id__in=filtered_deck).values_list('id', 'title', 'poster', 'synopsis', 'nsfw'): data[work_id] = { 'title': title, 'poster': poster, 'synopsis': synopsis, 'nsfw': nsfw } # display_queries() cards = [] for work_id in filtered_deck: work = data[int(work_id)] update_poster_if_nsfw_dict(work, request.user) card = { 'id': work_id, 'title': work['title'], 'poster': work['poster'], 'category': category, 'synopsis': work['synopsis'] } cards.append(card) # return render(request, 'about.html') return HttpResponse(json.dumps(cards), content_type='application/json')
def get_recommendations(user, category, editor): # What if user is not authenticated? We will see soon. chrono = Chrono(CHRONO_ENABLED) chrono.save('[%dQ] begin' % len(connection.queries)) rated_works = {} for work_id, choice in Rating.objects.filter(user=user).values_list('work_id', 'choice'): rated_works[work_id] = choice willsee = set() if user.profile.reco_willsee_ok: banned_works = set() for work_id in rated_works: if rated_works[work_id] != 'willsee': banned_works.add(work_id) else: willsee.add(work_id) else: banned_works = set(rated_works.keys()) mangas = Manga.objects.all() if editor == 'otototaifu': mangas = mangas.filter(editor__in=['Ototo Manga', 'Taifu comics']) elif editor != 'unspecified': mangas = mangas.filter(editor__icontains=editor) manga_ids = mangas.values_list('id', flat=True) kept_works = None if category == 'anime': banned_works |= set(manga_ids) elif category == 'manga': kept_works = set(manga_ids) chrono.save('[%dQ] retrieve her %d ratings' % (len(connection.queries), len(rated_works))) values = { 'favorite': 4, 'like': 2, 'dislike': -2, 'neutral': 0.1, 'willsee': 0.5, 'wontsee': -0.5 } final_works = Counter() nb_ratings = {} c = 0 neighbors = Counter() for user_id, work_id, choice in Rating.objects.filter(work__in=rated_works.keys()).values_list('user_id', 'work_id', 'choice'): c += 1 neighbors[user_id] += values[rated_works[work_id]] * values[choice] chrono.save('[%dQ] fill neighbors with %d ratings' % (len(connection.queries), c)) score_of_neighbor = {} # print('Neighbors:') # nbr = [] for user_id, score in neighbors.most_common(NB_NEIGHBORS): # print(User.objects.get(id=user_id).username, score) score_of_neighbor[user_id] = score # nbr.append(user_id) # print(nbr) sum_ratings = Counter() nb_ratings = Counter() sum_scores = Counter() i = 0 for work_id, user_id, choice in Rating.objects.filter(user__id__in=score_of_neighbor.keys()).exclude(choice__in=['willsee', 'wontsee']).values_list('work_id', 'user_id', 'choice'): i += 1 if work_id in banned_works or (kept_works and work_id not in kept_works): continue sum_ratings[work_id] += values[choice] nb_ratings[work_id] += 1 sum_scores[work_id] += score_of_neighbor[user_id] chrono.save('[%dQ] compute and filter all ratings from %d sources' % (len(connection.queries), i)) i = 0 k = 0 for work_id in nb_ratings: # Adding interesting works to the arena (rated at least MIN_RATINGS by neighbors) if nb_ratings[work_id] >= MIN_RATINGS: k += 1 final_works[(work_id, work_id in manga_ids, work_id in willsee)] = (float(sum_ratings[work_id]) / nb_ratings[work_id], sum_scores[work_id]) i += 1 chrono.save('[%dQ] rank %d %d works' % (len(connection.queries), k, i)) reco = [] rank = 0 rank_of = {} for (work_id, is_manga, in_willsee), _ in final_works.most_common(4): # Retrieving top 4 rank_of[work_id] = rank reco.append([work_id, is_manga, in_willsee]) rank += 1 works = Work.objects.filter(id__in=rank_of.keys()) for work in works: reco[rank_of[work.id]][0] = work # print(len(connection.queries), 'queries') """for line in connection.queries: print(line)""" chrono.save('[%dQ] retrieve top 4' % len(connection.queries)) return reco
def get_reco_algo(user, algo_name='knn', category='all'): chrono = Chrono(is_enabled=CHRONO_ENABLED, connection=connection) already_rated_works = Rating.objects.filter(user=user).values_list( 'work_id', flat=True) chrono.save('get rated works') if algo_name == 'knn': queryset = Rating.objects.filter(work__in=already_rated_works) dataset = Dataset() anonymized = dataset.make_anonymous_data(queryset) chrono.save('make first anonymous data') algo = ALGOS['knn']() algo.set_parameters(anonymized.nb_users, anonymized.nb_works) algo.fit(anonymized.X, anonymized.y) chrono.save('prepare first fit') encoded_neighbors = algo.get_neighbors([dataset.encode_user[user.id]]) neighbors = dataset.decode_users( encoded_neighbors[0]) # We only want for the first user chrono.save('get neighbors') # Only keep useful ratings for recommendation queryset = Rating.objects.filter(user__id__in=neighbors + [user.id]).exclude( choice__in=['willsee', 'wontsee']) else: # Every rating is useful queryset = Rating.objects.all() chrono.save('get all %d interesting ratings' % queryset.count()) dataset = Dataset() backup_filename = '%s.pickle' % algo_name if os.path.isfile( os.path.join('pickles', backup_filename) ): # When Algo class will be there: 'if algo.has_backup():' algo = ALGOS[algo_name]() algo.load(backup_filename) dataset.load('ratings-' + backup_filename) else: dataset, algo = fit_algo(algo_name, queryset, backup_filename) chrono.save('fit %s' % algo.get_shortname()) if category != 'all': category_filter = set( Work.objects.filter(category__slug=category).values_list( 'id', flat=True)) else: category_filter = dataset.interesting_works filtered_works = (dataset.interesting_works & category_filter) - set(already_rated_works) encoded_works = dataset.encode_works(filtered_works) nb_test = len(encoded_works) chrono.save('remove already rated') encoded_request_user_id = dataset.encode_user[user.id] X_test = np.asarray([[encoded_request_user_id, encoded_work_id] for encoded_work_id in encoded_works]) y_pred = algo.predict(X_test) pos = y_pred.argsort( )[-NB_RECO:][::-1] # Get top NB_RECO work indices in decreasing value chrono.save('compute every prediction') best_work_ids = [ dataset.decode_work[encoded_work_id] for _, encoded_work_id in X_test[pos] ] works = Work.objects.in_bulk(best_work_ids) chrono.save('get bulk') return {'work_ids': best_work_ids, 'works': works}
def __init__(self, NB_COMPONENTS=10): self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True) self.VT = None
def __init__(self, NB_COMPONENTS=10, NB_ITERATIONS=10, LAMBDA=0.1): self.NB_COMPONENTS = NB_COMPONENTS self.NB_ITERATIONS = NB_ITERATIONS self.LAMBDA = LAMBDA self.chrono = Chrono(True)
def __init__(self, NB_COMPONENTS=10, NB_ITERATIONS=10): self.NB_COMPONENTS = NB_COMPONENTS self.NB_ITERATIONS = NB_ITERATIONS self.chrono = Chrono(True)
def __init__(self, NB_COMPONENTS=10): self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True) with open(os.path.join(settings.BASE_DIR, '../data/works.csv')) as f: self.works = [x for _, x in csv.reader(f)]
def __init__(self): self.verbose_level = settings.RECO_ALGORITHMS_VERBOSE_LEVEL self.chrono = Chrono(self.verbose_level) self.nb_users = None self.nb_works = None self.size = 0 # For backup files
def __init__(self, NB_COMPONENTS=20): """An implementation of the Weighted Alternate Least Squares. NB_COMPONENTS: the number of components in the factorization""" self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True) sess = tf.InteractiveSession()
def __init__(self): self.chrono = Chrono(False)
def __init__(self): self.chrono = Chrono(True)
def __init__(self, NB_COMPONENTS=20): self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True)
def get_profile(request, username): chrono = Chrono(True) try: is_shared = Profile.objects.get(user__username=username).is_shared except Profile.DoesNotExist: Profile(user=request.user).save( ) # À supprimer à terme # Tu parles, maintenant ça va être encore plus compliqué is_shared = True # chrono.save('get profile') user = User.objects.get(username=username) category = request.GET.get('category', 'anime') ordering = ['favorite', 'willsee', 'like', 'neutral', 'dislike', 'wontsee'] seen_anime_list = [] unseen_anime_list = [] seen_manga_list = [] unseen_manga_list = [] c = 0 """for work_id, work_title, is_anime, choice in Rating.objects.filter(user__username=username).select_related('work', 'work__anime', 'work__manga').values_list('work_id', 'work__title', 'work__anime', 'choice'): # print(work_id, work_title, is_anime, choice) seen = choice in ['favorite', 'like', 'neutral', 'dislike'] rating = {'work': {'id': work_id, 'title': work_title}, 'choice': choice} # print(rating) if is_anime: if seen: seen_anime_list.append(rating) else: unseen_anime_list.append(rating) else: if seen: seen_manga_list.append(rating) else: unseen_manga_list.append(rating) c += 1 if c >= 200: break""" rating_list = sorted( Rating.objects.filter(user__username=username).select_related( 'work', 'work__anime', 'work__manga'), key=lambda x: (ordering.index(x.choice), x.work.title)) # Tri par note puis nom # , key=lambda x: (ordering.index(x['choice']), 1)) # Tri par note puis nom # print(rating_list[:5]) # chrono.save('get ratings %d queries' % len(connection.queries)) received_recommendation_list = [] sent_recommendation_list = [] if category == 'recommendation': received_recommendations = Recommendation.objects.filter( target_user__username=username) sent_recommendations = Recommendation.objects.filter( user__username=username) for reco in received_recommendations: try: reco.work.anime if Rating.objects.filter( work=reco.work, user__username=username, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: received_recommendation_list.append({ 'category': 'anime', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.user.username }) except Anime.DoesNotExist: if Rating.objects.filter( work=reco.work, user__username=username, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: received_recommendation_list.append({ 'category': 'manga', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.user.username }) for reco in sent_recommendations: try: reco.work.anime if Rating.objects.filter( work=reco.work, user=reco.target_user, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: sent_recommendation_list.append({ 'category': 'anime', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.target_user.username }) except Anime.DoesNotExist: if Rating.objects.filter( work=reco.work, user=reco.target_user, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: sent_recommendation_list.append({ 'category': 'manga', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.target_user.username }) # chrono.save('get reco %d queries' % len(connection.queries)) for r in rating_list: seen = r.choice in ['favorite', 'like', 'neutral', 'dislike'] rating = r #{'work': {'id': r.work.id, 'title': r.work.title}, 'choice': r.choice} try: r.work.anime if seen: seen_anime_list.append(rating) else: unseen_anime_list.append(rating) except Anime.DoesNotExist: if seen: seen_manga_list.append(rating) else: unseen_manga_list.append(rating) # chrono.save('categorize ratings') member_time = datetime.datetime.now().replace( tzinfo=utc) - user.date_joined seen_list = seen_anime_list if category == 'anime' else seen_manga_list unseen_list = unseen_anime_list if category == 'anime' else unseen_manga_list # Events events = [{ 'id': attendee.event_id, 'anime_id': attendee.event.anime_id, 'attending': True, 'type': attendee.event.get_event_type_display(), 'channel': attendee.event.channel, 'date': attendee.event.get_date(), 'link': attendee.event.link, 'location': attendee.event.location, 'title': attendee.event.anime.title, } for attendee in user.attendee_set.filter( event__date__gte=timezone.now()).select_related( 'event', 'event__anime__title')] data = { 'username': username, 'score': user.profile.score, 'is_shared': is_shared, 'category': category, 'avatar_url': user.profile.get_avatar_url(), 'member_days': member_time.days, 'anime_count': len(seen_anime_list), 'manga_count': len(seen_manga_list), 'reco_count': len(received_recommendation_list), 'seen_list': seen_list if is_shared else [], 'unseen_list': unseen_list if is_shared else [], 'received_recommendation_list': received_recommendation_list if is_shared else [], 'sent_recommendation_list': sent_recommendation_list if is_shared else [], } for key in data: try: print(key, len(data[key])) except: print(key, '->', data[key]) chrono.save('get request') return render( request, 'profile.html', { 'username': username, 'score': user.profile.score, 'is_shared': is_shared, 'category': category, 'avatar_url': user.profile.get_avatar_url(), 'member_days': member_time.days, 'anime_count': len(seen_anime_list), 'manga_count': len(seen_manga_list), 'reco_count': len(received_recommendation_list), 'seen_list': seen_list if is_shared else [], 'unseen_list': unseen_list if is_shared else [], 'received_recommendation_list': received_recommendation_list if is_shared else [], 'sent_recommendation_list': sent_recommendation_list if is_shared else [], 'events': events, })
def __init__(self): self.verbose = settings.RECO_ALGORITHMS_DEFAULT_VERBOSE self.chrono = Chrono(self.verbose) self.nb_users = None self.nb_works = None