def sampling_dishes(self, iteration): """ sample a topic of each table """ # need to check this function for doc in self.doc_list: tables = doc.get_tables() for table in tables: tableWords = doc.tableWords[table] old_topic = doc.tableTopic[table] # remove current topic of table self.table_assigned_topics[old_topic] -= 1 if self.table_assigned_topics[old_topic] == 0: del self.table_assigned_topics[old_topic] for wordNo, counts in tableWords.iteritems(): self.word_topic.decrease(wordNo, old_topic, counts) topic_prob = dict() for topicNo in self.table_assigned_topics.keys(): topic_prob[topicNo] = np.log( self.table_assigned_topics[topicNo] ) + self.word_topic.get_multiword_log_conditional( tableWords, topicNo, self.eta) new_topic_no = self.get_new_topic() topic_prob[new_topic_no] = np.log( self.alpha ) + self.word_topic.get_multiword_log_conditional( tableWords, new_topic_no, self.eta) topic_prob = log_normalize(topic_prob) new_topic = sampling_from_dict(topic_prob) doc.tableTopic[table] = new_topic # if a new topic is chosen if new_topic == new_topic_no: self.table_assigned_topics[new_topic] = 0 self.table_assigned_topics[new_topic] += 1 for wordNo, counts in tableWords.iteritems(): self.word_topic.increase(wordNo, new_topic, counts)
def sampling_dishes(self, iteration): """ sample a topic of each table """ # need to check this function for doc in self.doc_list: tables = doc.get_tables() for table in tables: tableWords = doc.tableWords[table] old_topic = doc.tableTopic[table] # remove current topic of table self.table_assigned_topics[old_topic] -= 1 if self.table_assigned_topics[old_topic] == 0: del self.table_assigned_topics[old_topic] for wordNo, counts in tableWords.iteritems(): self.word_topic.decrease(wordNo, old_topic, counts) topic_prob = dict() for topicNo in self.table_assigned_topics.keys(): topic_prob[topicNo] = np.log(self.table_assigned_topics[topicNo]) + self.word_topic.get_multiword_log_conditional(tableWords, topicNo, self.eta) new_topic_no = self.get_new_topic() topic_prob[new_topic_no] = np.log(self.alpha) + self.word_topic.get_multiword_log_conditional(tableWords, new_topic_no, self.eta) topic_prob = log_normalize(topic_prob) new_topic = sampling_from_dict(topic_prob) doc.tableTopic[table] = new_topic # if a new topic is chosen if new_topic == new_topic_no: self.table_assigned_topics[new_topic] = 0 self.table_assigned_topics[new_topic] += 1 for wordNo, counts in tableWords.iteritems(): self.word_topic.increase(wordNo, new_topic, counts)
def sampling_tables(self, iteration): """ iterate a corpus and sample a table of each word token Keyword arguments: iteration -- current iteration count """ for doc in self.doc_list: doc_length = doc.get_doc_length() - 1 for word in doc.word_list: wordNo = word.wordNo # remove current word from assigned table if iteration != 0: old_table = word.tableNo old_topic = doc.tableTopic[old_table] doc.remove_word_from_table(wordNo, old_table) if doc.get_table_sum(old_table) == 0: self.table_assigned_topics[old_topic] -= 1 self.total_table -= 1 if self.table_assigned_topics[old_topic] == 0: del self.table_assigned_topics[old_topic] self.word_topic.decrease(wordNo, old_topic) # compute conditional for each table, topic tables = doc.get_tables() topic_prob = dict() for topicNo in self.word_topic.get_topics(): topic_prob[topicNo] = self.word_topic.get_conditional(wordNo,topicNo,self.eta) new_topic_no = self.word_topic.get_new_topic() topic_prob[new_topic_no] = self.word_topic.get_conditional(wordNo,new_topic_no, self.eta) table_prob = dict() for tableNo in tables: table_prob[tableNo] = topic_prob[doc.tableTopic[tableNo]] * (doc.tableSum[tableNo])/(doc_length + self.beta) new_table_no = doc.get_new_table() new_table_prob = 0 new_table_dict = dict() for topicNo in topic_prob.keys(): if self.table_assigned_topics.has_key(topicNo): prob = (self.table_assigned_topics[topicNo])/(self.total_table + self.alpha) * topic_prob[topicNo] else: prob = self.alpha/(self.total_table + self.alpha) * topic_prob[topicNo] new_table_prob += prob new_table_dict[topicNo] = prob table_prob[new_table_no] = new_table_prob * self.beta / (doc_length + self.beta) new_table = sampling_from_dict(table_prob) # if a new table is chosen if new_table == new_table_no: new_topic_of_new_table = sampling_from_dict(new_table_dict) self.total_table += 1 # if a new topic is chosen for the new table if new_topic_of_new_table == new_topic_no: self.table_assigned_topics[new_topic_of_new_table] = 0 self.table_assigned_topics[new_topic_of_new_table] += 1 doc.tableTopic[new_table] = new_topic_of_new_table word.tableNo = new_table doc.add_word_to_table(wordNo, new_table) self.word_topic.increase(wordNo, doc.tableTopic[new_table])
def sampling_tables(self, iteration): """ iterate a corpus and sample a table of each word token Keyword arguments: iteration -- current iteration count """ for doc in self.doc_list: doc_length = doc.get_doc_length() - 1 for word in doc.word_list: wordNo = word.wordNo # remove current word from assigned table if iteration != 0: old_table = word.tableNo old_topic = doc.tableTopic[old_table] doc.remove_word_from_table(wordNo, old_table) if doc.get_table_sum(old_table) == 0: self.table_assigned_topics[old_topic] -= 1 self.total_table -= 1 if self.table_assigned_topics[old_topic] == 0: del self.table_assigned_topics[old_topic] self.word_topic.decrease(wordNo, old_topic) # compute conditional for each table, topic tables = doc.get_tables() topic_prob = dict() for topicNo in self.word_topic.get_topics(): topic_prob[topicNo] = self.word_topic.get_conditional( wordNo, topicNo, self.eta) new_topic_no = self.word_topic.get_new_topic() topic_prob[new_topic_no] = self.word_topic.get_conditional( wordNo, new_topic_no, self.eta) table_prob = dict() for tableNo in tables: table_prob[tableNo] = topic_prob[ doc.tableTopic[tableNo]] * (doc.tableSum[tableNo]) / ( doc_length + self.beta) new_table_no = doc.get_new_table() new_table_prob = 0 new_table_dict = dict() for topicNo in topic_prob.keys(): if self.table_assigned_topics.has_key(topicNo): prob = (self.table_assigned_topics[topicNo]) / ( self.total_table + self.alpha) * topic_prob[topicNo] else: prob = self.alpha / (self.total_table + self.alpha) * topic_prob[topicNo] new_table_prob += prob new_table_dict[topicNo] = prob table_prob[new_table_no] = new_table_prob * self.beta / ( doc_length + self.beta) new_table = sampling_from_dict(table_prob) # if a new table is chosen if new_table == new_table_no: new_topic_of_new_table = sampling_from_dict(new_table_dict) self.total_table += 1 # if a new topic is chosen for the new table if new_topic_of_new_table == new_topic_no: self.table_assigned_topics[new_topic_of_new_table] = 0 self.table_assigned_topics[new_topic_of_new_table] += 1 doc.tableTopic[new_table] = new_topic_of_new_table word.tableNo = new_table doc.add_word_to_table(wordNo, new_table) self.word_topic.increase(wordNo, doc.tableTopic[new_table])