def _inference_one_chain(self, doc_tokens, rand): """Inference topics with one markov chain. Returns the sparse topics p(z|d). """ document = Document(self.model.num_topics) document.parse_from_tokens(doc_tokens, rand, self.vocabulary, self.model) if document.num_words() == 0: return dict() accumulated_topic_hist = {} for i in xrange(self.total_iterations): # one iteration for word in document.words: # -- document.decrease_topic(word.topic, 1) new_topic = self._sample_word_topic(document, word.id, rand) assert new_topic != None word.topic = new_topic # ++ document.increase_topic(new_topic, 1) if i >= self.burn_in_iterations: for non_zero in document.doc_topic_hist.non_zeros: if non_zero.topic in accumulated_topic_hist: accumulated_topic_hist[ non_zero.topic] += non_zero.count else: accumulated_topic_hist[non_zero.topic] = non_zero.count topic_dist = self._l1normalize_distribution(accumulated_topic_hist) return topic_dist
def _inference_one_chain(self, doc_tokens, rand): """Inference topics with one markov chain. Returns the sparse topics p(z|d). """ document = Document(self.model.num_topics) document.parse_from_tokens(doc_tokens, rand, self.vocabulary, self.model) if document.num_words() == 0: return dict() accumulated_topic_hist = {} for i in xrange(self.total_iterations): # one iteration for word in document.words: # -- document.decrease_topic(word.topic, 1) new_topic = self._sample_word_topic(document, word.id, rand) assert new_topic != None word.topic = new_topic # ++ document.increase_topic(new_topic, 1) if i >= self.burn_in_iterations: for non_zero in document.doc_topic_hist.non_zeros: if non_zero.topic in accumulated_topic_hist: accumulated_topic_hist[non_zero.topic] += non_zero.count else: accumulated_topic_hist[non_zero.topic] = non_zero.count topic_dist = self._l1normalize_distribution(accumulated_topic_hist) return topic_dist