def get_similar_questions(self): """ Get 10 similar questions for given one. Questions with the individual tags will be added to list if above questions are not full. This function has a limitation that it will retrieve only 100 records then select 10 most similar from that list as querying entire database may be very expensive - this function will benefit from some sort of optimization """ #print datetime.datetime.now() def get_data(): tags_list = self.tags.all() similar_questions = self.__class__.objects.filter( tags__in=self.tags.all()).exclude( id=self.id, ).exclude(deleted=True).distinct()[:100] similar_questions = list(similar_questions) output = list() for question in similar_questions: question.similarity = self.get_similarity( other_question=question) #sort in reverse order - x and y are interchanged in cmp() call similar_questions.sort( lambda x, y: cmp(y.similarity, x.similarity)) if len(similar_questions) > 10: return similar_questions[:10] else: return similar_questions return LazyList(get_data)
def get_similar_threads(self): """ Get 10 similar threads for given one. Threads with the individual tags will be added to list if above questions are not full. This function has a limitation that it will retrieve only 100 records then select 10 most similar from that list as querying entire database may be very expensive - this function will benefit from some sort of optimization """ def get_data(): tags_list = self.get_tag_names() similar_threads = Thread.objects.filter( tags__name__in=tags_list).exclude(id=self.id).exclude( posts__post_type='question', posts__deleted=True).distinct()[:100] similar_threads = list(similar_threads) for thread in similar_threads: thread.similarity = self.get_similarity(other_thread=thread) similar_threads.sort(key=operator.attrgetter('similarity'), reverse=True) similar_threads = similar_threads[:10] # Denormalize questions to speed up template rendering thread_map = dict([(thread.id, thread) for thread in similar_threads]) questions = Post.objects.get_questions() questions = questions.select_related('thread').filter( thread__in=similar_threads) for q in questions: thread_map[q.thread_id].question_denorm = q # Postprocess data similar_threads = [{ 'url': thread.question_denorm.get_absolute_url(), 'title': thread.get_title(thread.question_denorm) } for thread in similar_threads] return similar_threads def get_cached_data(): """similar thread data will expire with the default expiration delay """ key = 'similar-threads-%s' % self.id data = cache.cache.get(key) if data is None: data = get_data() cache.cache.set(key, data) return data return LazyList(get_cached_data)
def get_similar_threads(self): """ Get 10 similar threads for given one. Threads with the individual tags will be added to list if above questions are not full. This function has a limitation that it will retrieve only 100 records then select 10 most similar from that list as querying entire database may be very expensive - this function will benefit from some sort of optimization """ def get_data(): # todo: code in this function would be simpler if # we had question post id denormalized on the thread tags_list = self.get_tag_names() similar_threads = Thread.objects.filter( tags__name__in=tags_list ).exclude( id = self.id ).exclude( posts__post_type='question', posts__deleted = True ).distinct()[:100] similar_threads = list(similar_threads) for thread in similar_threads: thread.similarity = self.get_similarity(other_thread=thread) similar_threads.sort(key=operator.attrgetter('similarity'), reverse=True) similar_threads = similar_threads[:10] # Denormalize questions to speed up template rendering # todo: just denormalize question_post_id on the thread! thread_map = dict([(thread.id, thread) for thread in similar_threads]) questions = Post.objects.get_questions() questions = questions.select_related('thread').filter(thread__in=similar_threads) for q in questions: thread_map[q.thread_id].question_denorm = q # Postprocess data for the final output result = list() for thread in similar_threads: question_post = getattr(thread, 'question_denorm', None) # unfortunately the if statement below is necessary due to # a possible bug # all this proves that it's wrong to reference threads by # the question post id in the question page urls!!! # this is a "legacy" problem inherited from the old models if question_post: url = question_post.get_absolute_url() title = thread.get_title(question_post) result.append({'url': url, 'title': title}) return result def get_cached_data(): """similar thread data will expire with the default expiration delay """ key = 'similar-threads-%s' % self.id data = cache.cache.get(key) if data is None: data = get_data() cache.cache.set(key, data) return data return LazyList(get_cached_data)