Exemple #1
0
 def sampler(self, i, TOPICS, xcounts, ycounts, docId, different_word):
     ll = 0
     adder = add_count(xcounts, ycounts)
     probs = {}
     for i in range(0, len(self.xcorpus)):
         for j in range(0, len(self.xcorpus[i])):
             x = self.xcorpus[i][j]
             y = self.ycorpus[i][j]
             adder.add_counter(x, y, i, -1)
             for k in range(TOPICS):
                 if xcounts.has_key(k) and (x, k) in xcounts  and ycounts.has_key(docId) \
                 and (y, docId) in ycounts:
                     if xcounts[k] != 0 and ycounts[docId] != 0:
                         p_x_y = 1.0 * xcounts[
                             (x, k)] + self.alpha / xcounts[
                                 k] + self.alpha * len(different_word)
                         p_y_Y = 1.0 * ycounts[
                             (y, docId)] + self.beta / ycounts[
                                 docId] + self.beta * TOPICS
                         probs.update({k: p_x_y * p_y_Y})
         new_y = Sampling.sampleOne(probs)
         print new_y
         ll = ll + log(probs[new_y])
         adder.add_counter(x, new_y, i, 1)
         self.ycorpus[i][j] = new_y
     print ll
Exemple #2
0
 def sampler(self, i, TOPICS, xcounts, ycounts, docId, different_word):
     ll = 0
     adder = add_count(xcounts, ycounts)
     probs = {}
     for i in range(0, len(self.xcorpus)):
         for j in range(0, len(self.xcorpus[i])):
             x = self.xcorpus[i][j]
             y = self.ycorpus[i][j]
             adder.add_counter(x, y, i, -1)
             for k in range(TOPICS):
                 if xcounts.has_key(k) and (x, k) in xcounts  and ycounts.has_key(docId) \
                 and (y, docId) in ycounts:
                     if xcounts[k] != 0 and ycounts[docId] != 0:
                        p_x_y = 1.0 * xcounts[(x, k)] + self.alpha / xcounts[k] + self.alpha * len(different_word)
                        p_y_Y = 1.0 * ycounts[(y, docId)] + self.beta / ycounts[docId] + self.beta * TOPICS
                        probs.update({k : p_x_y * p_y_Y})
         new_y = Sampling.sampleOne(probs)
         print new_y
         ll = ll + log(probs[new_y])
         adder.add_counter(x, new_y, i ,1)
         self.ycorpus[i][j] = new_y
     print ll
 def initilize(self):
     first_time = 1
     adder = add_count(self.xcounts, self.ycounts)
     self.docid = os.path.getsize("07-train.txt")
     #for line in open("wiki-en-documents.word", "r"):
     for line in open("07-train.txt", "r"):
         rline = line.rstrip("¥n")
         words = numpy.array(rline.split(" "))
         topics_vector = []
         self.different_word = set(words)
         for word in words:
             topic = randint(self.TOPICS) + 1
             topics_vector.append(topic)
             adder.add_counter(word, topic, self.docid,  1)
         array_topics_vector = numpy.array(topics_vector)
         if first_time == 1:
             self.xcorpus = numpy.hstack((self.xcorpus, words))
             self.ycorpus = numpy.hstack((self.ycorpus, array_topics_vector))
             first_time = first_time + 1
         else:
             self.xcorpus = numpy.vstack((self.xcorpus, words))
             self.ycorpus = numpy.vstack((self.ycorpus, array_topics_vector))
 def initilize(self):
     first_time = 1
     adder = add_count(self.xcounts, self.ycounts)
     self.docid = os.path.getsize("07-train.txt")
     #for line in open("wiki-en-documents.word", "r"):
     for line in open("07-train.txt", "r"):
         rline = line.rstrip("¥n")
         words = numpy.array(rline.split(" "))
         topics_vector = []
         self.different_word = set(words)
         for word in words:
             topic = randint(self.TOPICS) + 1
             topics_vector.append(topic)
             adder.add_counter(word, topic, self.docid, 1)
         array_topics_vector = numpy.array(topics_vector)
         if first_time == 1:
             self.xcorpus = numpy.hstack((self.xcorpus, words))
             self.ycorpus = numpy.hstack(
                 (self.ycorpus, array_topics_vector))
             first_time = first_time + 1
         else:
             self.xcorpus = numpy.vstack((self.xcorpus, words))
             self.ycorpus = numpy.vstack(
                 (self.ycorpus, array_topics_vector))
 def test_add(self):
     adder = add_count()
Exemple #6
0
 def test_add(self):
     adder = add_count()