Exemple #1
0
    def get_similar_questions(self):
        """
        Get 10 similar questions for given one.
        Questions with the individual tags will be added to list if above questions are not full.

        This function has a limitation that it will
        retrieve only 100 records then select 10 most similar
        from that list as querying entire database may
        be very expensive - this function will benefit from
        some sort of optimization
        """

        #print datetime.datetime.now()

        def get_data():

            tags_list = self.tags.all()
            similar_questions = self.__class__.objects.filter(
                tags__in=self.tags.all()).exclude(
                    id=self.id, ).exclude(deleted=True).distinct()[:100]
            similar_questions = list(similar_questions)
            output = list()
            for question in similar_questions:
                question.similarity = self.get_similarity(
                    other_question=question)
            #sort in reverse order - x and y are interchanged in cmp() call
            similar_questions.sort(
                lambda x, y: cmp(y.similarity, x.similarity))
            if len(similar_questions) > 10:
                return similar_questions[:10]
            else:
                return similar_questions

        return LazyList(get_data)
Exemple #2
0
    def get_similar_threads(self):
        """
        Get 10 similar threads for given one.
        Threads with the individual tags will be added to list if above questions are not full.

        This function has a limitation that it will
        retrieve only 100 records then select 10 most similar
        from that list as querying entire database may
        be very expensive - this function will benefit from
        some sort of optimization
        """
        def get_data():
            tags_list = self.get_tag_names()
            similar_threads = Thread.objects.filter(
                tags__name__in=tags_list).exclude(id=self.id).exclude(
                    posts__post_type='question',
                    posts__deleted=True).distinct()[:100]
            similar_threads = list(similar_threads)

            for thread in similar_threads:
                thread.similarity = self.get_similarity(other_thread=thread)

            similar_threads.sort(key=operator.attrgetter('similarity'),
                                 reverse=True)
            similar_threads = similar_threads[:10]

            # Denormalize questions to speed up template rendering
            thread_map = dict([(thread.id, thread)
                               for thread in similar_threads])
            questions = Post.objects.get_questions()
            questions = questions.select_related('thread').filter(
                thread__in=similar_threads)
            for q in questions:
                thread_map[q.thread_id].question_denorm = q

            # Postprocess data
            similar_threads = [{
                'url':
                thread.question_denorm.get_absolute_url(),
                'title':
                thread.get_title(thread.question_denorm)
            } for thread in similar_threads]
            return similar_threads

        def get_cached_data():
            """similar thread data will expire
            with the default expiration delay
            """
            key = 'similar-threads-%s' % self.id
            data = cache.cache.get(key)
            if data is None:
                data = get_data()
                cache.cache.set(key, data)
            return data

        return LazyList(get_cached_data)
Exemple #3
0
    def get_similar_threads(self):
        """
        Get 10 similar threads for given one.
        Threads with the individual tags will be added to list if above questions are not full.

        This function has a limitation that it will
        retrieve only 100 records then select 10 most similar
        from that list as querying entire database may
        be very expensive - this function will benefit from
        some sort of optimization
        """

        def get_data():
            # todo: code in this function would be simpler if
            # we had question post id denormalized on the thread
            tags_list = self.get_tag_names()
            similar_threads = Thread.objects.filter(
                                        tags__name__in=tags_list
                                    ).exclude(
                                        id = self.id
                                    ).exclude(
                                        posts__post_type='question',
                                        posts__deleted = True
                                    ).distinct()[:100]
            similar_threads = list(similar_threads)

            for thread in similar_threads:
                thread.similarity = self.get_similarity(other_thread=thread)

            similar_threads.sort(key=operator.attrgetter('similarity'), reverse=True)
            similar_threads = similar_threads[:10]

            # Denormalize questions to speed up template rendering
            # todo: just denormalize question_post_id on the thread!
            thread_map = dict([(thread.id, thread) for thread in similar_threads])
            questions = Post.objects.get_questions()
            questions = questions.select_related('thread').filter(thread__in=similar_threads)
            for q in questions:
                thread_map[q.thread_id].question_denorm = q

            # Postprocess data for the final output
            result = list()
            for thread in similar_threads:
                question_post = getattr(thread, 'question_denorm', None)
                # unfortunately the if statement below is necessary due to
                # a possible bug
                # all this proves that it's wrong to reference threads by
                # the question post id in the question page urls!!!
                # this is a "legacy" problem inherited from the old models
                if question_post:
                    url = question_post.get_absolute_url()
                    title = thread.get_title(question_post)
                    result.append({'url': url, 'title': title})
                
            return result 

        def get_cached_data():
            """similar thread data will expire
            with the default expiration delay
            """
            key = 'similar-threads-%s' % self.id
            data = cache.cache.get(key)
            if data is None:
                data = get_data()
                cache.cache.set(key, data)
            return data

        return LazyList(get_cached_data)