Ejemplo n.º 1
0
    def _inference_one_chain(self, doc_tokens, rand):
        """Inference topics with one markov chain.

        Returns the sparse topics p(z|d).
        """
        document = Document(self.model.num_topics)
        document.parse_from_tokens(doc_tokens, rand, self.vocabulary,
                                   self.model)
        if document.num_words() == 0:
            return dict()

        accumulated_topic_hist = {}
        for i in xrange(self.total_iterations):
            # one iteration
            for word in document.words:
                # --
                document.decrease_topic(word.topic, 1)

                new_topic = self._sample_word_topic(document, word.id, rand)
                assert new_topic != None
                word.topic = new_topic
                # ++
                document.increase_topic(new_topic, 1)

            if i >= self.burn_in_iterations:
                for non_zero in document.doc_topic_hist.non_zeros:
                    if non_zero.topic in accumulated_topic_hist:
                        accumulated_topic_hist[
                            non_zero.topic] += non_zero.count
                    else:
                        accumulated_topic_hist[non_zero.topic] = non_zero.count

        topic_dist = self._l1normalize_distribution(accumulated_topic_hist)
        return topic_dist
Ejemplo n.º 2
0
    def _inference_one_chain(self, doc_tokens, rand):
        """Inference topics with one markov chain.

        Returns the sparse topics p(z|d).
        """
        document = Document(self.model.num_topics)
        document.parse_from_tokens(doc_tokens, rand,
                self.vocabulary, self.model)
        if document.num_words() == 0:
            return dict()

        accumulated_topic_hist = {}
        for i in xrange(self.total_iterations):
            # one iteration
            for word in document.words:
                # --
                document.decrease_topic(word.topic, 1)

                new_topic = self._sample_word_topic(document, word.id, rand)
                assert new_topic != None
                word.topic = new_topic
                # ++
                document.increase_topic(new_topic, 1)

            if i >= self.burn_in_iterations:
                for non_zero in document.doc_topic_hist.non_zeros:
                    if non_zero.topic in accumulated_topic_hist:
                        accumulated_topic_hist[non_zero.topic] += non_zero.count
                    else:
                        accumulated_topic_hist[non_zero.topic] = non_zero.count

        topic_dist = self._l1normalize_distribution(accumulated_topic_hist)
        return topic_dist