def get_card(request, category, sort_id=1): chrono = Chrono(True) deja_vu = request.GET.get('dejavu', '').split(',') sort_mode = ['popularity', 'controversy', 'top', 'random'][int(sort_id) - 1] my_rated_works = get_rated_works(request.user) if request.user.is_authenticated() else {} chrono.save('got rated works') if Deck.objects.filter(category=category, sort_mode=sort_mode): deck = Deck.objects.get(category=category, sort_mode=sort_mode).content.split(',') else: # Temporary data if category == 'anime': bundle = Anime.objects.all() elif category == 'manga': bundle = Manga.objects.all() deck = [str(work.id) for work in bundle] Deck(category=category, sort_mode=sort_mode, content=','.join(deck)).save() filtered_deck = filter_deck(deck, my_rated_works, deja_vu) chrono.save('filter deck') data = {} for work_id, title, poster, synopsis, nsfw in Work.objects.filter(id__in=filtered_deck).values_list('id', 'title', 'poster', 'synopsis', 'nsfw'): data[work_id] = {'title': title, 'poster': poster, 'synopsis': synopsis, 'nsfw': nsfw} # display_queries() cards = [] for work_id in filtered_deck: work = data[int(work_id)] update_poster_if_nsfw_dict(work, request.user) card = {'id': work_id, 'title': work['title'], 'poster': work['poster'], 'category': category, 'synopsis': work['synopsis']} cards.append(card) # return render(request, 'about.html') return HttpResponse(json.dumps(cards), content_type='application/json')
def handle(self, *args, **options): chrono = Chrono(False) category = options.get('category')[0] c = Counter() values = {'favorite': 10, 'like': 2, 'neutral': 0.5, 'dislike': -1} nb_ratings = Counter() nb_stars = Counter() for choice, contestant_id in Rating.objects.values_list('choice', 'work__anime__' + category): if contestant_id and contestant_id > 1: # Artiste non inconnu nb_ratings[contestant_id] += 1 if choice == 'favorite': nb_stars[contestant_id] += 1 c[contestant_id] += values.get(choice, 0) chrono.save('enter contestants') artist_ids = [] for artist_id, _ in c.most_common(20): artist_ids.append(artist_id) artist_by_id = Artist.objects.in_bulk(artist_ids) choice = category + 's' if choice not in dict(TOP_CATEGORY_CHOICES): raise CommandError("Invalid top category '{}'".format(choice)) top = Top.objects.create(category=choice) Ranking.objects.bulk_create([ Ranking( top=top, content_object=artist_by_id[artist_id], score=score, nb_ratings=nb_ratings[artist_id], nb_stars=nb_stars[artist_id], ) for (artist_id, score) in c.most_common(20) ]) chrono.save('get results')
class MangakiNMF(object): M = None W = None H = None def __init__(self, NB_COMPONENTS=10): self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True) with open(os.path.join(settings.BASE_DIR, '../data/works.csv')) as f: self.works = [x for _, x in csv.reader(f)] def set_parameters(self, nb_users, nb_works): self.nb_users = nb_users self.nb_works = nb_works def make_matrix(self, X, y): matrix = lil_matrix((self.nb_users, self.nb_works)) for (user, work), rating in zip(X, y): matrix[user, work] = rating return matrix def fit(self, X, y): print("Computing M: (%i × %i)" % (self.nb_users, self.nb_works)) matrix = self.make_matrix(X, y) model = NMF(n_components=self.NB_COMPONENTS, random_state=42) self.W = model.fit_transform(matrix) self.H = model.components_ print('Shapes', self.W.shape, self.H.shape) self.M = self.W.dot(self.H) self.chrono.save('factor matrix') # self.display_components() def predict(self, X): return self.M[X[:, 0].astype(np.int64), X[:, 1].astype(np.int64)] def display_components(self): for i in range(self.NB_COMPONENTS): if self.W[PIG_ID][i]: percentage = round( self.W[PIG_ID][i] * 100 / self.W[PIG_ID].sum(), 1) print('# Composante %d : %s (%.1f %%)' % (i, explanation.get(i), percentage)) """for _, title in sorted((-self.H[i][j], self.works[j]) for j in range(self.nb_works))[:10]: print(title) print()""" def __str__(self): return '[NMF]' def get_shortname(self): return 'nmf'
class MangakiEFA(RecommendationAlgorithm): '''Factor Analysis: See http://scikit-learn.org/stable/modules/decomposition.html#factor-analysis Better way to get interpretable components, see MangakiNMF''' M = None W = None H = None def __init__(self, NB_COMPONENTS=20): self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True) def set_parameters(self, nb_users, nb_works): self.nb_users = nb_users self.nb_works = nb_works def make_matrix(self, X, y): rows = X[:, 0].astype(np.int64) cols = X[:, 1].astype(np.int64) data = y.astype(np.int64) return csr_matrix((data, (rows, cols)), shape=(self.nb_users, self.nb_works)) def fit(self, X, y, truncated=None): print("Computing M: (%i × %i)" % (self.nb_users, self.nb_works)) matrix = self.make_matrix(X, y) model = FactorAnalysis(n_components=self.NB_COMPONENTS) matrix = matrix.toarray() self.matrix = matrix if truncated is not None: matrix = matrix[:, :truncated] self.W = model.fit_transform(matrix) self.H = model.components_ print('Shapes', self.W.shape, self.H.shape) self.M = self.W.dot(self.H) + model.mean_ self.model = model self.chrono.save('factor matrix') def fit_user(self, user_id, sparse_matrix_dict): pass def predict(self, X): return self.M[X[:, 0].astype(np.int64), X[:, 1].astype(np.int64)] def __str__(self): return '[EFA]' def get_shortname(self): return 'efa'
class MangakiNMF(object): M = None W = None H = None def __init__(self, NB_COMPONENTS=10): self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True) with open(os.path.join(settings.BASE_DIR, '../data/works.csv')) as f: self.works = [x for _, x in csv.reader(f)] def set_parameters(self, nb_users, nb_works): self.nb_users = nb_users self.nb_works = nb_works def make_matrix(self, X, y): matrix = lil_matrix((self.nb_users, self.nb_works)) for (user, work), rating in zip(X, y): matrix[user, work] = rating return matrix def fit(self, X, y): print("Computing M: (%i × %i)" % (self.nb_users, self.nb_works)) matrix = self.make_matrix(X, y) model = NMF(n_components=self.NB_COMPONENTS, random_state=42) self.W = model.fit_transform(matrix) self.H = model.components_ print('Shapes', self.W.shape, self.H.shape) self.M = self.W.dot(self.H) self.chrono.save('factor matrix') self.display_components() def predict(self, X): return self.M[X[:, 0].astype(np.int64), X[:, 1].astype(np.int64)] def display_components(self): for i in range(self.NB_COMPONENTS): if self.W[PIG_ID][i]: percentage = round(self.W[PIG_ID][i] * 100 / self.W[PIG_ID].sum(), 1) print('# Composante %d : %s (%.1f %%)' % (i, explanation.get(i), percentage)) """for _, title in sorted((-self.H[i][j], self.works[j]) for j in range(self.nb_works))[:10]: print(title) print()""" def __str__(self): return '[NMF]' def get_shortname(self): return 'nmf'
def get_card(request, category, sort_id=1): chrono = Chrono(True) deja_vu = request.GET.get('dejavu', '').split(',') sort_mode = ['popularity', 'controversy', 'top', 'random'][int(sort_id) - 1] queryset = Work.objects.filter(category__slug=category) if sort_mode == 'popularity': queryset = queryset.popular() elif sort_mode == 'controversy': queryset = queryset.controversial() elif sort_mode == 'top': queryset = queryset.top() else: queryset = queryset.random().order_by('?') if request.user.is_authenticated(): rated_works = Rating.objects.filter( user=request.user).values('work_id') queryset = queryset.exclude(id__in=rated_works) queryset = queryset[:54] cards = [] for work in queryset.values('id', 'title', 'poster', 'synopsis', 'nsfw'): update_poster_if_nsfw_dict(work, request.user) work['category'] = category cards.append(work) return HttpResponse(json.dumps(cards), content_type='application/json')
def handle(self, *args, **options): chrono = Chrono(False) category = sys.argv[2] c = Counter() values = {'favorite': 10, 'like': 2, 'neutral': 0.5, 'dislike': -1} nb_ratings = Counter() nb_stars = Counter() for choice, contestant_id in Rating.objects.values_list( 'choice', 'work__anime__' + category): if contestant_id and contestant_id > 1: # Artiste non inconnu nb_ratings[contestant_id] += 1 if choice == 'favorite': nb_stars[contestant_id] += 1 c[contestant_id] += values.get(choice, 0) chrono.save('enter contestants') artist_ids = [] for artist_id, _ in c.most_common(20): artist_ids.append(artist_id) artist_by_id = Artist.objects.in_bulk(artist_ids) top = [] for i, (artist_id, score) in enumerate(c.most_common(20)): top.append( dict(rank=i + 1, name=str(artist_by_id[artist_id]), id=artist_id, score=score, nb_ratings=nb_ratings[artist_id], nb_stars=nb_stars[artist_id])) chrono.save('get results') # print('%d queries' % len(connection.queries)) print(json.dumps(top))
def handle(self, *args, **options): chrono = Chrono(False) category = options.get('category')[0] c = Counter() values = {'favorite': 10, 'like': 2, 'neutral': 0.5, 'dislike': -1} nb_ratings = Counter() nb_stars = Counter() for choice, contestant_id in Rating.objects.values_list( 'choice', 'work__anime__' + category): if contestant_id and contestant_id > 1: # Artiste non inconnu nb_ratings[contestant_id] += 1 if choice == 'favorite': nb_stars[contestant_id] += 1 c[contestant_id] += values.get(choice, 0) chrono.save('enter contestants') artist_ids = [] for artist_id, _ in c.most_common(20): artist_ids.append(artist_id) artist_by_id = Artist.objects.in_bulk(artist_ids) choice = category + 's' if choice not in dict(TOP_CATEGORY_CHOICES): raise CommandError("Invalid top category '{}'".format(choice)) top = Top.objects.create(category=choice) Ranking.objects.bulk_create([ Ranking( top=top, content_object=artist_by_id[artist_id], score=score, nb_ratings=nb_ratings[artist_id], nb_stars=nb_stars[artist_id], ) for (artist_id, score) in c.most_common(20) ]) chrono.save('get results')
def handle(self, *args, **options): chrono = Chrono(False) category = sys.argv[2] c = Counter() values = {'favorite': 10, 'like': 2, 'neutral': 0.5, 'dislike': -1} nb_ratings = Counter() nb_stars = Counter() for choice, contestant_id in Rating.objects.values_list('choice', 'work__anime__' + category): if contestant_id and contestant_id > 1: # Artiste non inconnu nb_ratings[contestant_id] += 1 if choice == 'favorite': nb_stars[contestant_id] += 1 c[contestant_id] += values.get(choice, 0) chrono.save('enter contestants') artist_ids = [] for artist_id, _ in c.most_common(20): artist_ids.append(artist_id) artist_by_id = Artist.objects.in_bulk(artist_ids) top = [] for i, (artist_id, score) in enumerate(c.most_common(20)): top.append(dict(rank=i + 1, name=str(artist_by_id[artist_id]), id=artist_id, score=score, nb_ratings=nb_ratings[artist_id], nb_stars=nb_stars[artist_id])) chrono.save('get results') # print('%d queries' % len(connection.queries)) print(json.dumps(top))
def handle(self, *args, **options): chrono = Chrono(False) categories = [] if options.get('category'): categories = set(options.get('category')) if options.get('all'): categories = {'director', 'composer', 'author'} for category in categories: self.stdout.write('Refreshing top for {}s'.format(category)) c = Counter() values = {'favorite': 10, 'like': 2, 'neutral': 0.5, 'dislike': -1} nb_ratings = Counter() nb_stars = Counter() for choice, contestant_id in Rating.objects.filter( work__staff__role__slug=category).values_list( 'choice', 'work__staff__artist'): if contestant_id and contestant_id > 1: # Artiste non inconnu nb_ratings[contestant_id] += 1 if choice == 'favorite': nb_stars[contestant_id] += 1 c[contestant_id] += values.get(choice, 0) chrono.save('enter contestants') artist_ids = [] for artist_id, _ in c.most_common(20): artist_ids.append(artist_id) artist_by_id = Artist.objects.in_bulk(artist_ids) choice = category + 's' if choice not in dict(TOP_CATEGORY_CHOICES): raise CommandError("Invalid top category '{}'".format(choice)) top = Top.objects.create(category=choice) Ranking.objects.bulk_create([ Ranking( top=top, content_object=artist_by_id[artist_id], score=score, nb_ratings=nb_ratings[artist_id], nb_stars=nb_stars[artist_id], ) for (artist_id, score) in c.most_common(20) ]) chrono.save('get results') self.stdout.write( self.style.SUCCESS('Refreshed top for {}s'.format(category)))
def get_card(request, category, sort_id=1): chrono = Chrono(True) deja_vu = request.GET.get('dejavu', '').split(',') sort_mode = ['popularity', 'controversy', 'top', 'random'][int(sort_id) - 1] my_rated_works = get_rated_works( request.user) if request.user.is_authenticated() else {} chrono.save('got rated works') if Deck.objects.filter(category=category, sort_mode=sort_mode): deck = Deck.objects.get(category=category, sort_mode=sort_mode).content.split(',') else: # Temporary data if category == 'anime': bundle = Anime.objects.all() elif category == 'manga': bundle = Manga.objects.all() deck = [str(work.id) for work in bundle] Deck(category=category, sort_mode=sort_mode, content=','.join(deck)).save() filtered_deck = filter_deck(deck, my_rated_works, deja_vu) chrono.save('filter deck') data = {} for work_id, title, poster, synopsis, nsfw in Work.objects.filter( id__in=filtered_deck).values_list('id', 'title', 'poster', 'synopsis', 'nsfw'): data[work_id] = { 'title': title, 'poster': poster, 'synopsis': synopsis, 'nsfw': nsfw } # display_queries() cards = [] for work_id in filtered_deck: work = data[int(work_id)] update_poster_if_nsfw_dict(work, request.user) card = { 'id': work_id, 'title': work['title'], 'poster': work['poster'], 'category': category, 'synopsis': work['synopsis'] } cards.append(card) # return render(request, 'about.html') return HttpResponse(json.dumps(cards), content_type='application/json')
class MangakiALS(object): M = None U = None VT = None def __init__(self, NB_COMPONENTS=10, NB_ITERATIONS=10, LAMBDA=0.1): self.NB_COMPONENTS = NB_COMPONENTS self.NB_ITERATIONS = NB_ITERATIONS self.LAMBDA = LAMBDA self.chrono = Chrono(True) def save(self, filename): with open(os.path.join('pickles', filename), 'wb') as f: pickle.dump(self, f, pickle.HIGHEST_PROTOCOL) def load(self, filename): with open(os.path.join('pickles', filename), 'rb') as f: backup = pickle.load(f) self.M = backup.M self.U = backup.U self.VT = backup.VT self.means = backup.means def set_parameters(self, nb_users, nb_works): self.nb_users = nb_users self.nb_works = nb_works def make_matrix(self, X, y): matrix = defaultdict(dict) means = np.zeros((self.nb_users,)) for (user, work), rating in zip(X, y): matrix[user][work] = rating means[user] += rating for user in matrix: means[user] /= len(matrix[user]) for (user, work) in X: matrix[user][work] -= means[user] return matrix, means def fit_user(self, user, matrix): Ru = np.array(list(matrix[user].values()), ndmin=2).T Vu = self.VT[:,list(matrix[user].keys())] Gu = self.LAMBDA * len(matrix[user]) * np.eye(self.NB_COMPONENTS) self.U[[user],:] = np.linalg.solve(Vu.dot(Vu.T) + Gu, Vu.dot(Ru)).T def fit_work(self, work, matrixT): Ri = np.array(list(matrixT[work].values()), ndmin=2).T Ui = self.U[list(matrixT[work].keys()),:].T Gi = self.LAMBDA * len(matrixT[work]) * np.eye(self.NB_COMPONENTS) self.VT[:,[work]] = np.linalg.solve(Ui.dot(Ui.T) + Gi, Ui.dot(Ri)) def factorize(self, matrix, random_state): # Preprocessings matrixT = defaultdict(dict) for user in matrix: for work in matrix[user]: matrixT[work][user] = matrix[user][work] # Init self.U = np.random.rand(self.nb_users, self.NB_COMPONENTS) self.VT = np.random.rand(self.NB_COMPONENTS, self.nb_works) # ALS for i in range(self.NB_ITERATIONS): #print('Step {}'.format(i)) for user in matrix: self.fit_user(user, matrix) for work in matrixT: self.fit_work(work, matrixT) def fit(self, X, y): print("Computing M: (%i × %i)" % (self.nb_users, self.nb_works)) matrix, self.means = self.make_matrix(X, y) self.chrono.save('fill and center matrix') self.factorize(matrix, random_state=42) print('Shapes', self.U.shape, self.VT.shape) self.M = self.U.dot(self.VT) #self.save('backup.pickle') self.chrono.save('factor matrix') def predict(self, X): return self.M[X[:, 0].astype(np.int64), X[:, 1].astype(np.int64)] + self.means[X[:, 0].astype(np.int64)] def __str__(self): return '[ALS]' def get_shortname(self): return 'als'
def __init__(self): self.verbose_level = settings.RECO_ALGORITHMS_VERBOSE_LEVEL self.chrono = Chrono(self.verbose_level) self.nb_users = None self.nb_works = None self.size = 0 # For backup files
def __init__(self): self.chrono = Chrono(True)
def __init__(self): self.chrono = Chrono(False)
class MangakiSVD(object): M = None U = None sigma = None VT = None chrono = None inv_work = None inv_user = None work_titles = None def __init__(self): self.chrono = Chrono(True) def save(self, filename): with open(filename, 'wb') as f: pickle.dump(self, f) def load(self, filename): with open(filename, 'rb') as f: backup = pickle.load(f) self.M = backup.M self.U = backup.U self.sigma = backup.sigma self.VT = backup.VT self.inv_work = backup.inv_work self.inv_user = backup.inv_user self.work_titles = backup.work_titles def fit(self, X, y): self.work_titles = {} for work in Work.objects.values('id', 'title'): self.work_titles[work['id']] = work['title'] work_ids = list(Rating.objects.values_list('work_id', flat=True).distinct()) nb_works = len(work_ids) self.inv_work = {work_ids[i]: i for i in range(nb_works)} user_ids = list(User.objects.values_list('id', flat=True)) nb_users = len(user_ids) self.inv_user = {user_ids[i]: i for i in range(nb_users)} self.chrono.save('get_work_ids') # print("Computing M: (%i × %i)" % (nb_users, nb_works)) self.M = lil_matrix((nb_users, nb_works)) """ratings_of = {} for (user_id, work_id), rating in zip(X, y): ratings_of.setdefault(user_id, []).append(rating)""" for (user_id, work_id), rating in zip(X, y): self.M[self.inv_user[user_id], self.inv_work[work_id]] = rating #- np.mean(ratings_of[user_id]) # np.save('backupM', self.M) self.chrono.save('fill matrix') # Ranking computation self.U, self.sigma, self.VT = randomized_svd(self.M, NB_COMPONENTS, n_iter=3, random_state=42) # print('Formes', self.U.shape, self.sigma.shape, self.VT.shape) self.save('backup.pickle') self.chrono.save('factor matrix') def predict(self, X): y = [] for user_id, work_id in X: i = self.inv_user[user_id] j = self.inv_work[work_id] y.append(self.U[i].dot(np.diag(self.sigma)).dot(self.VT.transpose()[j])) return np.array(y) def get_reco(self, username, sending=False): target_user = User.objects.get(username=username) the_user_id = target_user.id svd_user = User.objects.get(username='******') work_ids = {self.inv_work[work_id]: work_id for work_id in self.inv_work} nb_works = len(work_ids) seen_works = set(Rating.objects.filter(user__id=the_user_id).exclude(choice='willsee').values_list('work_id', flat=True)) the_i = self.inv_user[the_user_id] self.chrono.save('get_seen_works') print('mon vecteur (taille %d)' % len(self.U[the_i]), self.U[the_i]) print(self.sigma) for i, line in enumerate(self.VT): print('=> Ligne %d' % (i + 1), '(ma note : %f)' % self.U[the_i][i]) sorted_line = sorted((line[j], self.work_titles[work_ids[j]]) for j in range(nb_works))[::-1] top5 = sorted_line[:10] bottom5 = sorted_line[-10:] for anime in top5: print(anime) for anime in bottom5: print(anime) """if i == 0 or i == 1: # First two vectors explaining variance with open('vector%d.json' % (i + 1), 'w') as f: vi = X.dot(line).tolist() x_norm = [np.dot(X.data[k], X.data[k]) / (nb_works + 1) for k in range(nb_users + 1)] f.write(json.dumps({'v': [v / math.sqrt(x_norm[k]) if x_norm[k] != 0 else float('inf') for k, v in enumerate(vi)]}))""" # print(VT.dot(VT.transpose())) # return the_ratings = self.predict((the_user_id, work_ids[j]) for j in range(nb_works)) ranking = sorted(zip(the_ratings, [(work_ids[j], self.work_titles[work_ids[j]]) for j in range(nb_works)]), reverse=True) # Summarize the results of the ranking for the_user_id: # “=> rank, title, score” c = 0 for i, (rating, (work_id, title)) in enumerate(ranking, start=1): if work_id not in seen_works: print('=>', i, title, rating, self.predict([(the_user_id, work_id)])) if Recommendation.objects.filter(user=svd_user, target_user__id=the_user_id, work__id=work_id).count() == 0: Recommendation.objects.create(user=svd_user, target_user_id=the_user_id, work_id=work_id) c += 1 elif i < TOP: print(i, title, rating) if c >= TOP: break """print(len(connection.queries), 'queries') for line in connection.queries: print(line)""" self.chrono.save('complete') def __str__(self): return '[SVD]' def get_shortname(self): return 'svd'
def get_reco_algo(user, algo_name='knn', category='all'): chrono = Chrono(is_enabled=CHRONO_ENABLED, connection=connection) already_rated_works = Rating.objects.filter(user=user).values_list( 'work_id', flat=True) chrono.save('get rated works') if algo_name == 'knn': queryset = Rating.objects.filter(work__in=already_rated_works) dataset = Dataset() anonymized = dataset.make_anonymous_data(queryset) chrono.save('make first anonymous data') algo = ALGOS['knn']() algo.set_parameters(anonymized.nb_users, anonymized.nb_works) algo.fit(anonymized.X, anonymized.y) chrono.save('prepare first fit') encoded_neighbors = algo.get_neighbors([dataset.encode_user[user.id]]) neighbors = dataset.decode_users( encoded_neighbors[0]) # We only want for the first user chrono.save('get neighbors') # Only keep useful ratings for recommendation queryset = Rating.objects.filter(user__id__in=neighbors + [user.id]).exclude( choice__in=['willsee', 'wontsee']) else: # Every rating is useful queryset = Rating.objects.all() chrono.save('get all %d interesting ratings' % queryset.count()) dataset = Dataset() backup_filename = '%s.pickle' % algo_name if os.path.isfile( os.path.join('pickles', backup_filename) ): # When Algo class will be there: 'if algo.has_backup():' algo = ALGOS[algo_name]() algo.load(backup_filename) dataset.load('ratings-' + backup_filename) else: dataset, algo = fit_algo(algo_name, queryset, backup_filename) chrono.save('fit %s' % algo.get_shortname()) if category != 'all': category_filter = set( Work.objects.filter(category__slug=category).values_list( 'id', flat=True)) else: category_filter = dataset.interesting_works filtered_works = (dataset.interesting_works & category_filter) - set(already_rated_works) encoded_works = dataset.encode_works(filtered_works) nb_test = len(encoded_works) chrono.save('remove already rated') encoded_request_user_id = dataset.encode_user[user.id] X_test = np.asarray([[encoded_request_user_id, encoded_work_id] for encoded_work_id in encoded_works]) y_pred = algo.predict(X_test) pos = y_pred.argsort( )[-NB_RECO:][::-1] # Get top NB_RECO work indices in decreasing value chrono.save('compute every prediction') best_work_ids = [ dataset.decode_work[encoded_work_id] for _, encoded_work_id in X_test[pos] ] works = Work.objects.in_bulk(best_work_ids) chrono.save('get bulk') return {'work_ids': best_work_ids, 'works': works}
def __init__(self, NB_COMPONENTS=10): self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True) with open(os.path.join(settings.BASE_DIR, '../data/works.csv')) as f: self.works = [x for _, x in csv.reader(f)]
def __init__(self, NB_COMPONENTS=10, NB_ITERATIONS=10): self.NB_COMPONENTS = NB_COMPONENTS self.NB_ITERATIONS = NB_ITERATIONS self.chrono = Chrono(True)
class MangakiSVD(object): M = None U = None sigma = None VT = None chrono = None inv_work = None inv_user = None work_titles = None def __init__(self, NB_COMPONENTS=10, NB_ITERATIONS=10): self.NB_COMPONENTS = NB_COMPONENTS self.NB_ITERATIONS = NB_ITERATIONS self.chrono = Chrono(True) def save(self, filename): with open(filename, 'wb') as f: pickle.dump(self, f) def load(self, filename): with open(filename, 'rb') as f: backup = pickle.load(f) self.M = backup.M self.U = backup.U self.sigma = backup.sigma self.VT = backup.VT self.inv_work = backup.inv_work self.inv_user = backup.inv_user self.work_titles = backup.work_titles def set_parameters(self, nb_users, nb_works): self.nb_users = nb_users self.nb_works = nb_works def make_matrix(self, X, y): matrix = np.zeros((self.nb_users, self.nb_works), dtype=np.float64) for (user, work), rating in zip(X, y): matrix[user][work] = rating means = np.zeros((self.nb_users,)) for i in range(self.nb_users): means[i] = np.sum(matrix[i]) / np.sum(matrix[i] != 0) if np.isnan(means[i]): means[i] = 0 matrix[i][matrix[i] != 0] -= means[i] return matrix, means def fit(self, X, y): print("Computing M: (%i × %i)" % (self.nb_users, self.nb_works)) matrix, self.means = self.make_matrix(X, y) self.chrono.save('fill and center matrix') self.U, self.sigma, self.VT = randomized_svd(matrix, self.NB_COMPONENTS, n_iter=self.NB_ITERATIONS, random_state=42) print('Shapes', self.U.shape, self.sigma.shape, self.VT.shape) self.M = self.U.dot(np.diag(self.sigma)).dot(self.VT) self.save('backup.pickle') self.chrono.save('factor matrix') def predict(self, X): return self.M[X[:, 0].astype(np.int64), X[:, 1].astype(np.int64)] + self.means[X[:, 0].astype(np.int64)] def get_reco(self, username, sending=False): target_user = User.objects.get(username=username) the_user_id = target_user.id svd_user = User.objects.get(username='******') work_ids = {self.inv_work[work_id]: work_id for work_id in self.inv_work} nb_works = len(work_ids) seen_works = set(Rating.objects.filter(user__id=the_user_id).exclude(choice='willsee').values_list('work_id', flat=True)) the_i = self.inv_user[the_user_id] self.chrono.save('get_seen_works') print('mon vecteur (taille %d)' % len(self.U[the_i]), self.U[the_i]) print(self.sigma) for i, line in enumerate(self.VT): print('=> Ligne %d' % (i + 1), '(ma note : %f)' % self.U[the_i][i]) sorted_line = sorted((line[j], self.work_titles[work_ids[j]]) for j in range(nb_works))[::-1] top5 = sorted_line[:10] bottom5 = sorted_line[-10:] for anime in top5: print(anime) for anime in bottom5: print(anime) """if i == 0 or i == 1: # First two vectors explaining variance with open('vector%d.json' % (i + 1), 'w') as f: vi = X.dot(line).tolist() x_norm = [np.dot(X.data[k], X.data[k]) / (nb_works + 1) for k in range(nb_users + 1)] f.write(json.dumps({'v': [v / math.sqrt(x_norm[k]) if x_norm[k] != 0 else float('inf') for k, v in enumerate(vi)]}))""" # print(VT.dot(VT.transpose())) # return the_ratings = self.predict((the_user_id, work_ids[j]) for j in range(nb_works)) ranking = sorted(zip(the_ratings, [(work_ids[j], self.work_titles[work_ids[j]]) for j in range(nb_works)]), reverse=True) # Summarize the results of the ranking for the_user_id: # “=> rank, title, score” c = 0 for i, (rating, (work_id, title)) in enumerate(ranking, start=1): if work_id not in seen_works: print('=>', i, title, rating, self.predict([(the_user_id, work_id)])) if Recommendation.objects.filter(user=svd_user, target_user__id=the_user_id, work__id=work_id).count() == 0: Recommendation.objects.create(user=svd_user, target_user_id=the_user_id, work_id=work_id) c += 1 elif i < TOP: print(i, title, rating) if c >= TOP: break """print(len(connection.queries), 'queries') for line in connection.queries: print(line)""" self.chrono.save('complete') def __str__(self): return '[SVD]' def get_shortname(self): return 'svd'
class MangakiSVD(object): M = None U = None sigma = None VT = None chrono = None inv_work = None inv_user = None work_titles = None def __init__(self, NB_COMPONENTS=10, NB_ITERATIONS=10): self.NB_COMPONENTS = NB_COMPONENTS self.NB_ITERATIONS = NB_ITERATIONS self.chrono = Chrono(True) def save(self, filename): with open(filename, 'wb') as f: pickle.dump(self, f) def load(self, filename): with open(filename, 'rb') as f: backup = pickle.load(f) self.M = backup.M self.U = backup.U self.sigma = backup.sigma self.VT = backup.VT self.inv_work = backup.inv_work self.inv_user = backup.inv_user self.work_titles = backup.work_titles def set_parameters(self, nb_users, nb_works): self.nb_users = nb_users self.nb_works = nb_works def make_matrix(self, X, y): matrix = np.zeros((self.nb_users, self.nb_works), dtype=np.float64) for (user, work), rating in zip(X, y): matrix[user][work] = rating means = np.zeros((self.nb_users, )) for i in range(self.nb_users): means[i] = np.sum(matrix[i]) / np.sum(matrix[i] != 0) if np.isnan(means[i]): means[i] = 0 matrix[i][matrix[i] != 0] -= means[i] return matrix, means def fit(self, X, y): print("Computing M: (%i × %i)" % (self.nb_users, self.nb_works)) matrix, self.means = self.make_matrix(X, y) self.chrono.save('fill and center matrix') self.U, self.sigma, self.VT = randomized_svd(matrix, self.NB_COMPONENTS, n_iter=self.NB_ITERATIONS, random_state=42) print('Shapes', self.U.shape, self.sigma.shape, self.VT.shape) self.M = self.U.dot(np.diag(self.sigma)).dot(self.VT) self.save('backup.pickle') self.chrono.save('factor matrix') def predict(self, X): return self.M[X[:, 0].astype(np.int64), X[:, 1].astype(np.int64)] + self.means[X[:, 0].astype( np.int64)] def get_reco(self, username, sending=False): target_user = User.objects.get(username=username) the_user_id = target_user.id svd_user = User.objects.get(username='******') work_ids = { self.inv_work[work_id]: work_id for work_id in self.inv_work } nb_works = len(work_ids) seen_works = set( Rating.objects.filter(user__id=the_user_id).exclude( choice='willsee').values_list('work_id', flat=True)) the_i = self.inv_user[the_user_id] self.chrono.save('get_seen_works') print('mon vecteur (taille %d)' % len(self.U[the_i]), self.U[the_i]) print(self.sigma) for i, line in enumerate(self.VT): print('=> Ligne %d' % (i + 1), '(ma note : %f)' % self.U[the_i][i]) sorted_line = sorted((line[j], self.work_titles[work_ids[j]]) for j in range(nb_works))[::-1] top5 = sorted_line[:10] bottom5 = sorted_line[-10:] for anime in top5: print(anime) for anime in bottom5: print(anime) """if i == 0 or i == 1: # First two vectors explaining variance with open('vector%d.json' % (i + 1), 'w') as f: vi = X.dot(line).tolist() x_norm = [np.dot(X.data[k], X.data[k]) / (nb_works + 1) for k in range(nb_users + 1)] f.write(json.dumps({'v': [v / math.sqrt(x_norm[k]) if x_norm[k] != 0 else float('inf') for k, v in enumerate(vi)]}))""" # print(VT.dot(VT.transpose())) # return the_ratings = self.predict( (the_user_id, work_ids[j]) for j in range(nb_works)) ranking = sorted(zip(the_ratings, [(work_ids[j], self.work_titles[work_ids[j]]) for j in range(nb_works)]), reverse=True) # Summarize the results of the ranking for the_user_id: # “=> rank, title, score” c = 0 for i, (rating, (work_id, title)) in enumerate(ranking, start=1): if work_id not in seen_works: print('=>', i, title, rating, self.predict([(the_user_id, work_id)])) if Recommendation.objects.filter( user=svd_user, target_user__id=the_user_id, work__id=work_id).count() == 0: Recommendation.objects.create(user=svd_user, target_user_id=the_user_id, work_id=work_id) c += 1 elif i < TOP: print(i, title, rating) if c >= TOP: break """print(len(connection.queries), 'queries') for line in connection.queries: print(line)""" self.chrono.save('complete') def __str__(self): return '[SVD]' def get_shortname(self): return 'svd'
def get_reco_algo(request, algo_name='als', category='all'): chrono = Chrono(is_enabled=CHRONO_ENABLED) user_ratings = current_user_ratings(request) already_rated_works = list(user_ratings) chrono.save('get rated works') algo = get_algo_backup_or_fit_knn(algo_name) available_works = set(algo.dataset.encode_work.keys()) df_rated_works = (pd.DataFrame( list(user_ratings.items()), columns=['work_id', 'choice']).query('work_id in @available_works')) enc_rated_works = df_rated_works['work_id'].map(algo.dataset.encode_work) user_rating_values = df_rated_works['choice'].map(rating_values) # User gave the same rating to all works considered in the reco if algo_name == 'als' and len(set(user_rating_values)) == 1: algo = get_algo_backup_or_fit_knn('knn') chrono.save('retrieve or fit %s' % algo.get_shortname()) category_filter = algo.dataset.interesting_works if category != 'all': category_filter &= set( Work.objects.filter(category__slug=category).values_list( 'id', flat=True)) filtered_works = list((algo.dataset.interesting_works & category_filter) - set(already_rated_works)) chrono.save('remove already rated, left {:d}'.format(len(filtered_works))) pos_of_best = get_personalized_ranking(algo, request.user.id, filtered_works, enc_rated_works, user_rating_values, limit=NB_RECO) best_work_ids = [filtered_works[pos] for pos in pos_of_best] chrono.save('compute every prediction') works = Work.objects.in_bulk(best_work_ids) # Some of the works may have been deleted since the algo backup was created ranked_work_ids = [ work_id for work_id in best_work_ids if work_id in works ] chrono.save('get bulk') return {'work_ids': ranked_work_ids, 'works': works}
class MangakiWALS(object): M = None U = None VT = None def __init__(self, NB_COMPONENTS=20): """An implementation of the Weighted Alternate Least Squares. NB_COMPONENTS: the number of components in the factorization""" self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True) sess = tf.InteractiveSession() def save(self, filename): with open(os.path.join('pickles', filename), 'wb') as f: pickle.dump(self, f) def load(self, filename): with open(os.path.join('pickles', filename), 'rb') as f: backup = pickle.load(f) self.M = backup.M self.U = backup.U self.VT = backup.VT self.means = backup.means def set_parameters(self, nb_users, nb_works): self.nb_users = nb_users self.nb_works = nb_works def make_matrix(self, X, y): matrix = defaultdict(dict) nb_ratings = Counter() means = np.zeros((self.nb_users,)) users = set() for (user, work), rating in zip(X, y): matrix[(user, work)] = rating means[user] += rating nb_ratings[user] += 1 users.add(user) for user in users: means[user] /= nb_ratings[user] indices = [] values = [] for (user, work) in X: matrix[(user, work)] -= means[user] indices.append((user, work)) values.append(matrix[(user, work)]) return indices, values, means def factorize(self, indices, values): rows = self.nb_users cols = self.nb_works dims = self.NB_COMPONENTS row_wts = 0.1 + np.random.rand(rows) col_wts = 0.1 + np.random.rand(cols) inp = sparse_tensor.SparseTensor(indices, values, [rows, cols]) use_factors_weights_cache = True model = factorization_ops.WALSModel( rows, cols, dims, unobserved_weight=1,#.1, regularization=0.001,#001, row_weights=None,#row_wts, col_weights=None,#col_wts, use_factors_weights_cache=use_factors_weights_cache) simple_train(model, inp, 25) row_factor = model.row_factors[0].eval() print('Shape', row_factor.shape) col_factor = model.col_factors[0].eval() print('Shape', col_factor.shape) out = np.dot(row_factor, np.transpose(col_factor)) return out def fit(self, X, y): print("Computing M: (%i × %i)" % (self.nb_users, self.nb_works)) indices, values, self.means = self.make_matrix(X, y) self.chrono.save('fill and center matrix') self.M = self.factorize(indices, values) self.chrono.save('factor matrix') def predict(self, X): return self.M[X[:, 0].astype(np.int64), X[:, 1].astype(np.int64)] + self.means[X[:, 0].astype(np.int64)] def __str__(self): return '[WALS]' def get_shortname(self): return 'wals'
def __init__(self, NB_COMPONENTS=10, NB_ITERATIONS=10, LAMBDA=0.1): self.NB_COMPONENTS = NB_COMPONENTS self.NB_ITERATIONS = NB_ITERATIONS self.LAMBDA = LAMBDA self.chrono = Chrono(True)
class MangakiALS(object): M = None U = None VT = None def __init__(self, NB_COMPONENTS=10, NB_ITERATIONS=10, LAMBDA=0.1): self.NB_COMPONENTS = NB_COMPONENTS self.NB_ITERATIONS = NB_ITERATIONS self.LAMBDA = LAMBDA self.chrono = Chrono(True) def save(self, filename): with open(filename, 'wb') as f: pickle.dump(self, f) def load(self, filename): with open(filename, 'rb') as f: backup = pickle.load(f) self.M = backup.M self.U = backup.U self.VT = backup.VT def set_parameters(self, nb_users, nb_works): self.nb_users = nb_users self.nb_works = nb_works def make_matrix(self, X, y): matrix = defaultdict(dict) means = np.zeros((self.nb_users,)) for (user, work), rating in zip(X, y): matrix[user][work] = rating means[user] += rating for user in matrix: means[user] /= len(matrix[user]) for (user, work) in X: matrix[user][work] -= means[user] return matrix, means def fit_user(self, user, matrix): Ru = np.array(list(matrix[user].values()), ndmin=2).T Vu = self.VT[:,list(matrix[user].keys())] Gu = self.LAMBDA * len(matrix[user]) * np.eye(self.NB_COMPONENTS) self.U[[user],:] = np.linalg.solve(Vu.dot(Vu.T) + Gu, Vu.dot(Ru)).T def fit_work(self, work, matrixT): Ri = np.array(list(matrixT[work].values()), ndmin=2).T Ui = self.U[list(matrixT[work].keys()),:].T Gi = self.LAMBDA * len(matrixT[work]) * np.eye(self.NB_COMPONENTS) self.VT[:,[work]] = np.linalg.solve(Ui.dot(Ui.T) + Gi, Ui.dot(Ri)) def factorize(self, matrix, random_state): # Preprocessings matrixT = defaultdict(dict) for user in matrix: for work in matrix[user]: matrixT[work][user] = matrix[user][work] # Init self.U = np.random.rand(self.nb_users, self.NB_COMPONENTS) self.VT = np.random.rand(self.NB_COMPONENTS, self.nb_works) # ALS for i in range(self.NB_ITERATIONS): #print('Step {}'.format(i)) for user in matrix: self.fit_user(user, matrix) for work in matrixT: self.fit_work(work, matrixT) def fit(self, X, y): print("Computing M: (%i × %i)" % (self.nb_users, self.nb_works)) matrix, self.means = self.make_matrix(X, y) self.chrono.save('fill and center matrix') self.factorize(matrix, random_state=42) print('Shapes', self.U.shape, self.VT.shape) self.M = self.U.dot(self.VT) #self.save('backup.pickle') self.chrono.save('factor matrix') def predict(self, X): return self.M[X[:, 0].astype(np.int64), X[:, 1].astype(np.int64)] + self.means[X[:, 0].astype(np.int64)] def __str__(self): return '[ALS]' def get_shortname(self): return 'als'
def __init__(self): self.verbose = settings.RECO_ALGORITHMS_DEFAULT_VERBOSE self.chrono = Chrono(self.verbose) self.nb_users = None self.nb_works = None
def __init__(self, NB_COMPONENTS=10): self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True) self.VT = None
def __init__(self, NB_COMPONENTS=10): self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True) self.works = pandas.read_csv('data/works.csv', header=None).as_matrix()[:, 1]
def get_recommendations(user, category, editor): # What if user is not authenticated? We will see soon. chrono = Chrono(CHRONO_ENABLED) chrono.save('[%dQ] begin' % len(connection.queries)) rated_works = {} for work_id, choice in Rating.objects.filter(user=user).values_list('work_id', 'choice'): rated_works[work_id] = choice willsee = set() if user.profile.reco_willsee_ok: banned_works = set() for work_id in rated_works: if rated_works[work_id] != 'willsee': banned_works.add(work_id) else: willsee.add(work_id) else: banned_works = set(rated_works.keys()) mangas = Manga.objects.all() if editor == 'otototaifu': mangas = mangas.filter(editor__in=['Ototo Manga', 'Taifu comics']) elif editor != 'unspecified': mangas = mangas.filter(editor__icontains=editor) manga_ids = mangas.values_list('id', flat=True) kept_works = None if category == 'anime': banned_works |= set(manga_ids) elif category == 'manga': kept_works = set(manga_ids) chrono.save('[%dQ] retrieve her %d ratings' % (len(connection.queries), len(rated_works))) values = { 'favorite': 4, 'like': 2, 'dislike': -2, 'neutral': 0.1, 'willsee': 0.5, 'wontsee': -0.5 } final_works = Counter() nb_ratings = {} c = 0 neighbors = Counter() for user_id, work_id, choice in Rating.objects.filter(work__in=rated_works.keys()).values_list('user_id', 'work_id', 'choice'): c += 1 neighbors[user_id] += values[rated_works[work_id]] * values[choice] chrono.save('[%dQ] fill neighbors with %d ratings' % (len(connection.queries), c)) score_of_neighbor = {} # print('Neighbors:') # nbr = [] for user_id, score in neighbors.most_common(NB_NEIGHBORS): # print(User.objects.get(id=user_id).username, score) score_of_neighbor[user_id] = score # nbr.append(user_id) # print(nbr) sum_ratings = Counter() nb_ratings = Counter() sum_scores = Counter() i = 0 for work_id, user_id, choice in Rating.objects.filter(user__id__in=score_of_neighbor.keys()).exclude(choice__in=['willsee', 'wontsee']).values_list('work_id', 'user_id', 'choice'): i += 1 if work_id in banned_works or (kept_works and work_id not in kept_works): continue sum_ratings[work_id] += values[choice] nb_ratings[work_id] += 1 sum_scores[work_id] += score_of_neighbor[user_id] chrono.save('[%dQ] compute and filter all ratings from %d sources' % (len(connection.queries), i)) i = 0 k = 0 for work_id in nb_ratings: # Adding interesting works to the arena (rated at least MIN_RATINGS by neighbors) if nb_ratings[work_id] >= MIN_RATINGS: k += 1 final_works[(work_id, work_id in manga_ids, work_id in willsee)] = (float(sum_ratings[work_id]) / nb_ratings[work_id], sum_scores[work_id]) i += 1 chrono.save('[%dQ] rank %d %d works' % (len(connection.queries), k, i)) reco = [] rank = 0 rank_of = {} for (work_id, is_manga, in_willsee), _ in final_works.most_common(4): # Retrieving top 4 rank_of[work_id] = rank reco.append([work_id, is_manga, in_willsee]) rank += 1 works = Work.objects.filter(id__in=rank_of.keys()) for work in works: reco[rank_of[work.id]][0] = work # print(len(connection.queries), 'queries') """for line in connection.queries: print(line)""" chrono.save('[%dQ] retrieve top 4' % len(connection.queries)) return reco
def __init__(self, NB_COMPONENTS=20): self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True)
def get_profile(request, username): chrono = Chrono(True) try: is_shared = Profile.objects.get(user__username=username).is_shared except Profile.DoesNotExist: Profile(user=request.user).save() # À supprimer à terme # Tu parles, maintenant ça va être encore plus compliqué is_shared = True # chrono.save('get profile') user = User.objects.get(username=username) category = request.GET.get('category', 'anime') ordering = ['favorite', 'willsee', 'like', 'neutral', 'dislike', 'wontsee'] seen_anime_list = [] unseen_anime_list = [] seen_manga_list = [] unseen_manga_list = [] c = 0 """for work_id, work_title, is_anime, choice in Rating.objects.filter(user__username=username).select_related('work', 'work__anime', 'work__manga').values_list('work_id', 'work__title', 'work__anime', 'choice'): # print(work_id, work_title, is_anime, choice) seen = choice in ['favorite', 'like', 'neutral', 'dislike'] rating = {'work': {'id': work_id, 'title': work_title}, 'choice': choice} # print(rating) if is_anime: if seen: seen_anime_list.append(rating) else: unseen_anime_list.append(rating) else: if seen: seen_manga_list.append(rating) else: unseen_manga_list.append(rating) c += 1 if c >= 200: break""" rating_list = sorted(Rating.objects.filter(user__username=username).select_related('work', 'work__anime', 'work__manga'), key=lambda x: (ordering.index(x.choice), x.work.title)) # Tri par note puis nom # , key=lambda x: (ordering.index(x['choice']), 1)) # Tri par note puis nom # print(rating_list[:5]) # chrono.save('get ratings %d queries' % len(connection.queries)) received_recommendation_list = [] sent_recommendation_list = [] if category == 'recommendation': received_recommendations = Recommendation.objects.filter(target_user__username=username) sent_recommendations = Recommendation.objects.filter(user__username=username) for reco in received_recommendations: try: reco.work.anime if Rating.objects.filter(work=reco.work, user__username=username, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: received_recommendation_list.append({'category': 'anime', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.user.username}) except Anime.DoesNotExist: if Rating.objects.filter(work=reco.work, user__username=username, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: received_recommendation_list.append({'category': 'manga', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.user.username}) for reco in sent_recommendations: try: reco.work.anime if Rating.objects.filter(work=reco.work, user=reco.target_user, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: sent_recommendation_list.append({'category': 'anime', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.target_user.username}) except Anime.DoesNotExist: if Rating.objects.filter(work=reco.work, user=reco.target_user, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: sent_recommendation_list.append({'category': 'manga', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.target_user.username}) # chrono.save('get reco %d queries' % len(connection.queries)) for r in rating_list: seen = r.choice in ['favorite', 'like', 'neutral', 'dislike'] rating = r#{'work': {'id': r.work.id, 'title': r.work.title}, 'choice': r.choice} try: r.work.anime if seen: seen_anime_list.append(rating) else: unseen_anime_list.append(rating) except Anime.DoesNotExist: if seen: seen_manga_list.append(rating) else: unseen_manga_list.append(rating) # chrono.save('categorize ratings') member_time = datetime.datetime.now().replace(tzinfo=utc) - user.date_joined seen_list = seen_anime_list if category == 'anime' else seen_manga_list unseen_list = unseen_anime_list if category == 'anime' else unseen_manga_list data = { 'username': username, 'score': user.profile.score, 'is_shared': is_shared, 'category': category, 'avatar_url': user.profile.get_avatar_url(), 'member_days': member_time.days, 'anime_count': len(seen_anime_list), 'manga_count': len(seen_manga_list), 'reco_count': len(received_recommendation_list), 'seen_list': seen_list if is_shared else [], 'unseen_list': unseen_list if is_shared else [], 'received_recommendation_list': received_recommendation_list if is_shared else [], 'sent_recommendation_list': sent_recommendation_list if is_shared else [], } for key in data: try: print(key, len(data[key])) except: print(key, '->', data[key]) chrono.save('get request') return render(request, 'profile.html', { 'username': username, 'score': user.profile.score, 'is_shared': is_shared, 'category': category, 'avatar_url': user.profile.get_avatar_url(), 'member_days': member_time.days, 'anime_count': len(seen_anime_list), 'manga_count': len(seen_manga_list), 'reco_count': len(received_recommendation_list), 'seen_list': seen_list if is_shared else [], 'unseen_list': unseen_list if is_shared else [], 'received_recommendation_list': received_recommendation_list if is_shared else [], 'sent_recommendation_list': sent_recommendation_list if is_shared else [], })
class MangakiSVD(object): M = None U = None sigma = None VT = None chrono = None inv_work = None inv_user = None work_titles = None def __init__(self): self.chrono = Chrono(True) def save(self, filename): with open(filename, 'wb') as f: pickle.dump(self, f) def load(self, filename): with open(filename, 'rb') as f: backup = pickle.load(f) self.M = backup.M self.U = backup.U self.sigma = backup.sigma self.VT = backup.VT self.inv_work = backup.inv_work self.inv_user = backup.inv_user self.work_titles = backup.work_titles def fit(self, X, y): self.work_titles = {} for work in Work.objects.values('id', 'title'): self.work_titles[work['id']] = work['title'] work_ids = list( Rating.objects.values_list('work_id', flat=True).distinct()) nb_works = len(work_ids) self.inv_work = {work_ids[i]: i for i in range(nb_works)} user_ids = list(User.objects.values_list('id', flat=True)) nb_users = len(user_ids) self.inv_user = {user_ids[i]: i for i in range(nb_users)} self.chrono.save('get_work_ids') # print("Computing M: (%i × %i)" % (nb_users, nb_works)) self.M = lil_matrix((nb_users, nb_works)) """ratings_of = {} for (user_id, work_id), rating in zip(X, y): ratings_of.setdefault(user_id, []).append(rating)""" for (user_id, work_id), rating in zip(X, y): self.M[self.inv_user[user_id], self. inv_work[work_id]] = rating #- np.mean(ratings_of[user_id]) # np.save('backupM', self.M) self.chrono.save('fill matrix') # Ranking computation self.U, self.sigma, self.VT = randomized_svd(self.M, NB_COMPONENTS, n_iter=3, random_state=42) # print('Formes', self.U.shape, self.sigma.shape, self.VT.shape) self.save('backup.pickle') self.chrono.save('factor matrix') def predict(self, X): y = [] for user_id, work_id in X: i = self.inv_user[user_id] j = self.inv_work[work_id] y.append(self.U[i].dot(np.diag(self.sigma)).dot( self.VT.transpose()[j])) return np.array(y) def get_reco(self, username, sending=False): target_user = User.objects.get(username=username) the_user_id = target_user.id svd_user = User.objects.get(username='******') work_ids = { self.inv_work[work_id]: work_id for work_id in self.inv_work } nb_works = len(work_ids) seen_works = set( Rating.objects.filter(user__id=the_user_id).exclude( choice='willsee').values_list('work_id', flat=True)) the_i = self.inv_user[the_user_id] self.chrono.save('get_seen_works') print('mon vecteur (taille %d)' % len(self.U[the_i]), self.U[the_i]) print(self.sigma) for i, line in enumerate(self.VT): print('=> Ligne %d' % (i + 1), '(ma note : %f)' % self.U[the_i][i]) sorted_line = sorted((line[j], self.work_titles[work_ids[j]]) for j in range(nb_works))[::-1] top5 = sorted_line[:10] bottom5 = sorted_line[-10:] for anime in top5: print(anime) for anime in bottom5: print(anime) """if i == 0 or i == 1: # First two vectors explaining variance with open('vector%d.json' % (i + 1), 'w') as f: vi = X.dot(line).tolist() x_norm = [np.dot(X.data[k], X.data[k]) / (nb_works + 1) for k in range(nb_users + 1)] f.write(json.dumps({'v': [v / math.sqrt(x_norm[k]) if x_norm[k] != 0 else float('inf') for k, v in enumerate(vi)]}))""" # print(VT.dot(VT.transpose())) # return the_ratings = self.predict( (the_user_id, work_ids[j]) for j in range(nb_works)) ranking = sorted(zip(the_ratings, [(work_ids[j], self.work_titles[work_ids[j]]) for j in range(nb_works)]), reverse=True) # Summarize the results of the ranking for the_user_id: # “=> rank, title, score” c = 0 for i, (rating, (work_id, title)) in enumerate(ranking, start=1): if work_id not in seen_works: print('=>', i, title, rating, self.predict([(the_user_id, work_id)])) if Recommendation.objects.filter( user=svd_user, target_user__id=the_user_id, work__id=work_id).count() == 0: Recommendation.objects.create(user=svd_user, target_user_id=the_user_id, work_id=work_id) c += 1 elif i < TOP: print(i, title, rating) if c >= TOP: break """print(len(connection.queries), 'queries') for line in connection.queries: print(line)""" self.chrono.save('complete') def __str__(self): return '[SVD]' def get_shortname(self): return 'svd'
def get_reco_algo(request, algo_name='knn', category='all'): chrono = Chrono(is_enabled=CHRONO_ENABLED) already_rated_works = list(current_user_ratings(request)) if request.user.is_anonymous: assert request.user.id is None # We only support KNN for anonymous users, since the offline models did # not learn anything about them. # FIXME: We should also force KNN for new users for which we have no # offline trained model available. algo_name = 'knn' chrono.save('get rated works') try: algo = get_algo_backup(algo_name) dataset = get_dataset_backup(algo_name) except FileNotFoundError: triplets = list( Rating.objects.values_list('user_id', 'work_id', 'choice')) chrono.save('get all %d interesting ratings' % len(triplets)) dataset, algo = fit_algo(algo_name, triplets) if algo_name == 'knn': available_works = set(dataset.encode_work.keys()) framed_rated_works = (pd.DataFrame( list(current_user_ratings(request).items()), columns=['work_id', 'choice']).query('work_id in @available_works')) framed_rated_works['encoded_work_id'] = dataset.encode_works( framed_rated_works['work_id']) framed_rated_works['rating'] = framed_rated_works['choice'].map( rating_values) nb_rated_works = len(framed_rated_works) ratings_from_user = coo_matrix( (framed_rated_works['rating'], ([0.] * nb_rated_works, framed_rated_works['encoded_work_id'])), shape=(1, algo.nb_works)) ratings_from_user = ratings_from_user.tocsr() #Expands knn.M with current user ratings (vstack is too slow) algo.M.data = np.hstack((algo.M.data, ratings_from_user.data)) algo.M.indices = np.hstack((algo.M.indices, ratings_from_user.indices)) algo.M.indptr = np.hstack( (algo.M.indptr, (ratings_from_user.indptr + algo.M.nnz)[1:])) algo.M._shape = (algo.M.shape[0] + ratings_from_user.shape[0], ratings_from_user.shape[1]) chrono.save('loading knn and expanding with current user ratings') chrono.save('fit %s' % algo.get_shortname()) if category != 'all': category_filter = set( Work.objects.filter(category__slug=category).values_list( 'id', flat=True)) else: category_filter = dataset.interesting_works filtered_works = list((dataset.interesting_works & category_filter) - set(already_rated_works)) chrono.save('remove already rated') pos_of_best = get_pos_of_best_works_for_user_via_algo(algo, dataset, request.user.id, filtered_works, limit=NB_RECO) best_work_ids = [filtered_works[pos] for pos in pos_of_best] chrono.save('compute every prediction') works = Work.objects.in_bulk(best_work_ids) # Some of the works may have been deleted since the algo backup was created. ranked_work_ids = [ work_id for work_id in best_work_ids if work_id in works ] chrono.save('get bulk') return {'work_ids': ranked_work_ids, 'works': works}
def __init__(self, NB_COMPONENTS=20): """An implementation of the Weighted Alternate Least Squares. NB_COMPONENTS: the number of components in the factorization""" self.NB_COMPONENTS = NB_COMPONENTS self.chrono = Chrono(True) sess = tf.InteractiveSession()
def get_profile(request, username): chrono = Chrono(True) try: is_shared = Profile.objects.get(user__username=username).is_shared except Profile.DoesNotExist: Profile(user=request.user).save( ) # À supprimer à terme # Tu parles, maintenant ça va être encore plus compliqué is_shared = True # chrono.save('get profile') user = User.objects.get(username=username) category = request.GET.get('category', 'anime') ordering = ['favorite', 'willsee', 'like', 'neutral', 'dislike', 'wontsee'] seen_anime_list = [] unseen_anime_list = [] seen_manga_list = [] unseen_manga_list = [] c = 0 """for work_id, work_title, is_anime, choice in Rating.objects.filter(user__username=username).select_related('work', 'work__anime', 'work__manga').values_list('work_id', 'work__title', 'work__anime', 'choice'): # print(work_id, work_title, is_anime, choice) seen = choice in ['favorite', 'like', 'neutral', 'dislike'] rating = {'work': {'id': work_id, 'title': work_title}, 'choice': choice} # print(rating) if is_anime: if seen: seen_anime_list.append(rating) else: unseen_anime_list.append(rating) else: if seen: seen_manga_list.append(rating) else: unseen_manga_list.append(rating) c += 1 if c >= 200: break""" rating_list = sorted( Rating.objects.filter(user__username=username).select_related( 'work', 'work__anime', 'work__manga'), key=lambda x: (ordering.index(x.choice), x.work.title)) # Tri par note puis nom # , key=lambda x: (ordering.index(x['choice']), 1)) # Tri par note puis nom # print(rating_list[:5]) # chrono.save('get ratings %d queries' % len(connection.queries)) received_recommendation_list = [] sent_recommendation_list = [] if category == 'recommendation': received_recommendations = Recommendation.objects.filter( target_user__username=username) sent_recommendations = Recommendation.objects.filter( user__username=username) for reco in received_recommendations: try: reco.work.anime if Rating.objects.filter( work=reco.work, user__username=username, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: received_recommendation_list.append({ 'category': 'anime', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.user.username }) except Anime.DoesNotExist: if Rating.objects.filter( work=reco.work, user__username=username, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: received_recommendation_list.append({ 'category': 'manga', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.user.username }) for reco in sent_recommendations: try: reco.work.anime if Rating.objects.filter( work=reco.work, user=reco.target_user, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: sent_recommendation_list.append({ 'category': 'anime', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.target_user.username }) except Anime.DoesNotExist: if Rating.objects.filter( work=reco.work, user=reco.target_user, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: sent_recommendation_list.append({ 'category': 'manga', 'id': reco.work.id, 'title': reco.work.title, 'username': reco.target_user.username }) # chrono.save('get reco %d queries' % len(connection.queries)) for r in rating_list: seen = r.choice in ['favorite', 'like', 'neutral', 'dislike'] rating = r #{'work': {'id': r.work.id, 'title': r.work.title}, 'choice': r.choice} try: r.work.anime if seen: seen_anime_list.append(rating) else: unseen_anime_list.append(rating) except Anime.DoesNotExist: if seen: seen_manga_list.append(rating) else: unseen_manga_list.append(rating) # chrono.save('categorize ratings') member_time = datetime.datetime.now().replace( tzinfo=utc) - user.date_joined seen_list = seen_anime_list if category == 'anime' else seen_manga_list unseen_list = unseen_anime_list if category == 'anime' else unseen_manga_list # Events events = [{ 'id': attendee.event_id, 'anime_id': attendee.event.anime_id, 'attending': True, 'type': attendee.event.get_event_type_display(), 'channel': attendee.event.channel, 'date': attendee.event.get_date(), 'link': attendee.event.link, 'location': attendee.event.location, 'title': attendee.event.anime.title, } for attendee in user.attendee_set.filter( event__date__gte=timezone.now()).select_related( 'event', 'event__anime__title')] data = { 'username': username, 'score': user.profile.score, 'is_shared': is_shared, 'category': category, 'avatar_url': user.profile.get_avatar_url(), 'member_days': member_time.days, 'anime_count': len(seen_anime_list), 'manga_count': len(seen_manga_list), 'reco_count': len(received_recommendation_list), 'seen_list': seen_list if is_shared else [], 'unseen_list': unseen_list if is_shared else [], 'received_recommendation_list': received_recommendation_list if is_shared else [], 'sent_recommendation_list': sent_recommendation_list if is_shared else [], } for key in data: try: print(key, len(data[key])) except: print(key, '->', data[key]) chrono.save('get request') return render( request, 'profile.html', { 'username': username, 'score': user.profile.score, 'is_shared': is_shared, 'category': category, 'avatar_url': user.profile.get_avatar_url(), 'member_days': member_time.days, 'anime_count': len(seen_anime_list), 'manga_count': len(seen_manga_list), 'reco_count': len(received_recommendation_list), 'seen_list': seen_list if is_shared else [], 'unseen_list': unseen_list if is_shared else [], 'received_recommendation_list': received_recommendation_list if is_shared else [], 'sent_recommendation_list': sent_recommendation_list if is_shared else [], 'events': events, })
def get_profile(request, username): chrono = Chrono(True) try: is_shared = Profile.objects.get(user__username=username).is_shared except Profile.DoesNotExist: Profile(user=request.user).save() # À supprimer à terme # Tu parles, maintenant ça va être encore plus compliqué is_shared = True # chrono.save('get profile') user = User.objects.get(username=username) category = request.GET.get('category', 'anime') ordering = ['favorite', 'willsee', 'like', 'neutral', 'dislike', 'wontsee'] c = 0 rating_list = natsorted(Rating.objects.filter(user__username=username).select_related('work'), key=lambda x: (ordering.index(x.choice), x.work.title.lower())) # Tri par note puis nom # , key=lambda x: (ordering.index(x['choice']), 1)) # Tri par note puis nom # print(rating_list[:5]) # chrono.save('get ratings %d queries' % len(connection.queries)) received_recommendation_list = [] sent_recommendation_list = [] if category == 'recommendation': received_recommendations = Recommendation.objects.filter(target_user__username=username).select_related('work', 'work__category') sent_recommendations = Recommendation.objects.filter(user__username=username).select_related('work', 'work__category') for reco in received_recommendations: if Rating.objects.filter(work=reco.work, user__username=username, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: received_recommendation_list.append({'category': reco.work.category.slug, 'id': reco.work.id, 'title': reco.work.title, 'username': reco.user.username}) for reco in sent_recommendations: if Rating.objects.filter(work=reco.work, user=reco.target_user, choice__in=['favorite', 'like', 'neutral', 'dislike']).count() == 0: sent_recommendation_list.append({'category': reco.work.category.slug, 'id': reco.work.id, 'title': reco.work.title, 'username': reco.target_user.username}) # chrono.save('get reco %d queries' % len(connection.queries)) seen_lists = {'anime': [], 'manga': [], 'album': 0} unseen_lists = {'anime': [], 'manga': [], 'album': []} for r in rating_list: if r.choice in ['favorite', 'like', 'neutral', 'dislike']: seen_lists[r.work.category.slug].append(r) else: unseen_list[r.work.category.slug].append(r) # chrono.save('categorize ratings') member_time = datetime.datetime.now().replace(tzinfo=utc) - user.date_joined # Events events = [ { 'id': attendee.event_id, 'work_id': attendee.event.work_id, 'attending': True, 'type': attendee.event.get_event_type_display(), 'channel': attendee.event.channel, 'date': attendee.event.get_date(), 'link': attendee.event.link, 'location': attendee.event.location, 'title': attendee.event.work.title, } for attendee in user.attendee_set.filter(event__date__gte=timezone.now(), attending=True).select_related('event', 'event__work__title') ] data = { 'username': username, 'score': user.profile.score, 'is_shared': is_shared, 'category': category, 'avatar_url': user.profile.get_avatar_url(), 'member_days': member_time.days, 'anime_count': len(seen_lists['anime']), 'manga_count': len(seen_lists['manga']), 'reco_count': len(received_recommendation_list), 'seen_list': seen_lists.get(category, []) if is_shared else [], 'unseen_list': unseen_lists.get(category, []) if is_shared else [], 'received_recommendation_list': received_recommendation_list if is_shared else [], 'sent_recommendation_list': sent_recommendation_list if is_shared else [], 'events': events, } for key in data: try: print(key, len(data[key])) except: print(key, '->', data[key]) chrono.save('get request') return render(request, 'profile.html', data)