Esempio n. 1
0
    def resample(self):
        # 1. Resample table assignments
        for i, (w, p, s) in enumerate(self.analyses):
            self.decrement(w, p, s)
            w, p, s = self.increment(w)
            self.analyses[i] = (w, p, s)

        # 2. Resample table dishes
        new_analyses = []
        new_tables = {}
        new_ncustomers = {}
        for (w, old_p, old_s), tables in self.tables.iteritems():
            for c in tables:
                self.p_counts[old_p] -= 1
                self.s_counts[old_s] -= 1
                
                p, s = mult_sample(((p, s), self.base.prob(p, s))
                        for p, s in self.seg_mappings[w])
                self.p_counts[p] += 1
                self.s_counts[s] += 1

                if (w, p, s) not in new_tables:
                    new_tables[w, p, s] = []
                    new_ncustomers[w, p, s] = 0
                new_tables[w, p, s].append(c)
                new_ncustomers[w, p, s] += c
                new_analyses.extend([(w, p, s)] * c)

        self.analyses = new_analyses
        self.tables = new_tables
        self.ncustomers = new_ncustomers
Esempio n. 2
0
 def increment(self, word, initialize=False):
     if initialize:
         p, s = random.choice([a for a, _ in self._analysis_probs(word)])
     else:
         p, s = mult_sample(self._analysis_probs(word))
     self.prefix_model.increment(p)
     self.suffix_model.increment(s)
     self.analyses[word].append((p, s))
Esempio n. 3
0
 def increment(self, w, initialize=False):
     """Sample a segmentation and a table assignment for word #w"""
     # sample a table
     (p, s, seat) = mult_sample(self.seating_probs(w, initialize))
     # seat to the table
     if self._seat_to((w, p, s), seat):
         self.base.increment(p, s)
     # increment dish count
     self.analyses[w][p, s] += 1
Esempio n. 4
0
def sample_topics(doc, model, n_iter):
    assignments = [None] * len(doc)
    doc_topic = DirichletMultinomial(model.n_topics, model.alpha)
    for it in xrange(n_iter):
        for i, word in enumerate(doc):
            if it > 0: doc_topic.decrement(assignments[i])
            assignments[i] = mult_sample((k, (doc_topic.prob(k)
                * model.topic_word[k].prob(word))) for k in xrange(model.n_topics))
            doc_topic.increment(assignments[i])
    return topic_vector(doc_topic, model)
Esempio n. 5
0
 def increment(self, k):
     # Sample analysis & store assignment
     i = (0 if len(self.analyses[k]) == 1 else
             mult_sample((i, self.analysis_prob(analysis))
                 for i, analysis in enumerate(self.analyses[k])))
     self.assignments[k].append(i)
     analysis = self.analyses[k][i]
     # Increment models
     self.stem_model.increment(analysis.stem)
     self.pattern_model.increment(analysis.pattern)
Esempio n. 6
0
 def resample_labels(self):
     new_analyses = [Counter() for _ in xrange(len(self.word_vocabulary))]
     new_tables = {}
     new_ncustomers = {}
     for (w, old_p, old_s), tables in self.tables.iteritems():
         for c in tables:
             # remove (old_p, old_s)
             self.base.decrement(old_p, old_s)
             # resample
             (p, s) = mult_sample(((p, s), self.base.prob(p, s))
                     for p, s in self.segmentations(w))
             # add (p, s)
             if (w, p, s) not in new_tables:
                 new_tables[w, p, s] = []
                 new_ncustomers[w, p, s] = 0
             new_tables[w, p, s].append(c)
             new_ncustomers[w, p, s] += c
             new_analyses[w][p, s] += c
             self.base.increment(p, s)
     self.analyses = new_analyses
     self.tables = new_tables
     self.ncustomers = new_ncustomers
Esempio n. 7
0
 def increment(self, w, initialize=False):
     (w, p, s, seat) = mult_sample(self.seating_probs(w, initialize))
     if self._seat_to((w, p, s), seat):
         self.p_counts[p] += 1
         self.s_counts[s] += 1
     return (w, p, s)