Ejemplo n.º 1
0
    def sampling_dishes(self, iteration):
        """ sample a topic of each table
        """
        # need to check this function

        for doc in self.doc_list:
            tables = doc.get_tables()

            for table in tables:
                tableWords = doc.tableWords[table]
                old_topic = doc.tableTopic[table]

                # remove current topic of table
                self.table_assigned_topics[old_topic] -= 1
                if self.table_assigned_topics[old_topic] == 0:
                    del self.table_assigned_topics[old_topic]
                for wordNo, counts in tableWords.iteritems():
                    self.word_topic.decrease(wordNo, old_topic, counts)

                topic_prob = dict()
                for topicNo in self.table_assigned_topics.keys():
                    topic_prob[topicNo] = np.log(
                        self.table_assigned_topics[topicNo]
                    ) + self.word_topic.get_multiword_log_conditional(
                        tableWords, topicNo, self.eta)

                new_topic_no = self.get_new_topic()

                topic_prob[new_topic_no] = np.log(
                    self.alpha
                ) + self.word_topic.get_multiword_log_conditional(
                    tableWords, new_topic_no, self.eta)

                topic_prob = log_normalize(topic_prob)
                new_topic = sampling_from_dict(topic_prob)

                doc.tableTopic[table] = new_topic
                # if a new topic is chosen
                if new_topic == new_topic_no:
                    self.table_assigned_topics[new_topic] = 0
                self.table_assigned_topics[new_topic] += 1
                for wordNo, counts in tableWords.iteritems():
                    self.word_topic.increase(wordNo, new_topic, counts)
Ejemplo n.º 2
0
    def sampling_dishes(self, iteration):
        """ sample a topic of each table
        """
        # need to check this function
        
        for doc in self.doc_list:
            tables = doc.get_tables()

            for table in tables:
                tableWords = doc.tableWords[table]
                old_topic = doc.tableTopic[table]

                # remove current topic of table
                self.table_assigned_topics[old_topic] -= 1
                if self.table_assigned_topics[old_topic] == 0:
                    del self.table_assigned_topics[old_topic]
                for wordNo, counts in tableWords.iteritems():
                    self.word_topic.decrease(wordNo, old_topic, counts)

                topic_prob = dict()
                for topicNo in self.table_assigned_topics.keys():
                    topic_prob[topicNo] = np.log(self.table_assigned_topics[topicNo]) + self.word_topic.get_multiword_log_conditional(tableWords, topicNo, self.eta)
                    
                new_topic_no = self.get_new_topic()

                topic_prob[new_topic_no] = np.log(self.alpha) + self.word_topic.get_multiword_log_conditional(tableWords, new_topic_no, self.eta)

                topic_prob = log_normalize(topic_prob)
                new_topic = sampling_from_dict(topic_prob)

                doc.tableTopic[table] = new_topic
                # if a new topic is chosen
                if new_topic == new_topic_no:
                    self.table_assigned_topics[new_topic] = 0
                self.table_assigned_topics[new_topic] += 1
                for wordNo, counts in tableWords.iteritems():
                    self.word_topic.increase(wordNo, new_topic, counts)
Ejemplo n.º 3
0
    def sampling_tables(self, iteration):
        """ iterate a corpus and sample a table of each word token

        Keyword arguments:
        iteration -- current iteration count
        """
        for doc in self.doc_list:
            doc_length = doc.get_doc_length() - 1

            for word in doc.word_list:
                wordNo = word.wordNo

                # remove current word from assigned table
                if iteration != 0:
                    old_table = word.tableNo
                    old_topic = doc.tableTopic[old_table]

                    doc.remove_word_from_table(wordNo, old_table)
                    if doc.get_table_sum(old_table) == 0:
                        self.table_assigned_topics[old_topic] -= 1
                        self.total_table -= 1
                        if self.table_assigned_topics[old_topic] == 0:
                            del self.table_assigned_topics[old_topic]
                    self.word_topic.decrease(wordNo, old_topic)

                # compute conditional for each table, topic
                tables = doc.get_tables()
                topic_prob = dict()
                for topicNo in self.word_topic.get_topics():
                    topic_prob[topicNo] = self.word_topic.get_conditional(wordNo,topicNo,self.eta)

                new_topic_no = self.word_topic.get_new_topic()
                topic_prob[new_topic_no] = self.word_topic.get_conditional(wordNo,new_topic_no, self.eta)

                table_prob = dict()
                for tableNo in tables:
                    table_prob[tableNo] = topic_prob[doc.tableTopic[tableNo]] * (doc.tableSum[tableNo])/(doc_length + self.beta)

                new_table_no = doc.get_new_table()
                new_table_prob = 0
                new_table_dict = dict()
                for topicNo in topic_prob.keys():
                    if self.table_assigned_topics.has_key(topicNo):
                        prob = (self.table_assigned_topics[topicNo])/(self.total_table + self.alpha) * topic_prob[topicNo]
                    else:
                        prob = self.alpha/(self.total_table + self.alpha) * topic_prob[topicNo]
                    new_table_prob += prob
                    new_table_dict[topicNo] = prob

                table_prob[new_table_no] = new_table_prob * self.beta / (doc_length + self.beta)

                new_table = sampling_from_dict(table_prob)

                # if a new table is chosen
                if new_table == new_table_no:
                    new_topic_of_new_table = sampling_from_dict(new_table_dict)
                    self.total_table += 1

                    # if a new topic is chosen for the new table
                    if new_topic_of_new_table == new_topic_no: 
                        self.table_assigned_topics[new_topic_of_new_table] = 0
                    self.table_assigned_topics[new_topic_of_new_table] += 1
                    doc.tableTopic[new_table] = new_topic_of_new_table

                word.tableNo = new_table
                doc.add_word_to_table(wordNo, new_table)
                self.word_topic.increase(wordNo, doc.tableTopic[new_table])
Ejemplo n.º 4
0
    def sampling_tables(self, iteration):
        """ iterate a corpus and sample a table of each word token

        Keyword arguments:
        iteration -- current iteration count
        """
        for doc in self.doc_list:
            doc_length = doc.get_doc_length() - 1

            for word in doc.word_list:
                wordNo = word.wordNo

                # remove current word from assigned table
                if iteration != 0:
                    old_table = word.tableNo
                    old_topic = doc.tableTopic[old_table]

                    doc.remove_word_from_table(wordNo, old_table)
                    if doc.get_table_sum(old_table) == 0:
                        self.table_assigned_topics[old_topic] -= 1
                        self.total_table -= 1
                        if self.table_assigned_topics[old_topic] == 0:
                            del self.table_assigned_topics[old_topic]
                    self.word_topic.decrease(wordNo, old_topic)

                # compute conditional for each table, topic
                tables = doc.get_tables()
                topic_prob = dict()
                for topicNo in self.word_topic.get_topics():
                    topic_prob[topicNo] = self.word_topic.get_conditional(
                        wordNo, topicNo, self.eta)

                new_topic_no = self.word_topic.get_new_topic()
                topic_prob[new_topic_no] = self.word_topic.get_conditional(
                    wordNo, new_topic_no, self.eta)

                table_prob = dict()
                for tableNo in tables:
                    table_prob[tableNo] = topic_prob[
                        doc.tableTopic[tableNo]] * (doc.tableSum[tableNo]) / (
                            doc_length + self.beta)

                new_table_no = doc.get_new_table()
                new_table_prob = 0
                new_table_dict = dict()
                for topicNo in topic_prob.keys():
                    if self.table_assigned_topics.has_key(topicNo):
                        prob = (self.table_assigned_topics[topicNo]) / (
                            self.total_table +
                            self.alpha) * topic_prob[topicNo]
                    else:
                        prob = self.alpha / (self.total_table +
                                             self.alpha) * topic_prob[topicNo]
                    new_table_prob += prob
                    new_table_dict[topicNo] = prob

                table_prob[new_table_no] = new_table_prob * self.beta / (
                    doc_length + self.beta)

                new_table = sampling_from_dict(table_prob)

                # if a new table is chosen
                if new_table == new_table_no:
                    new_topic_of_new_table = sampling_from_dict(new_table_dict)
                    self.total_table += 1

                    # if a new topic is chosen for the new table
                    if new_topic_of_new_table == new_topic_no:
                        self.table_assigned_topics[new_topic_of_new_table] = 0
                    self.table_assigned_topics[new_topic_of_new_table] += 1
                    doc.tableTopic[new_table] = new_topic_of_new_table

                word.tableNo = new_table
                doc.add_word_to_table(wordNo, new_table)
                self.word_topic.increase(wordNo, doc.tableTopic[new_table])