Exemple #1
0
    def test_read_and_writer_pb(self):
        fp = open('../testdata/recordio.dat', 'wb')
        record_writer = RecordWriter(fp)
        for i in xrange(20):
            word_topic_hist = WordTopicHistogramPB()
            word_topic_hist.word = i
            for j in xrange(20):
                non_zero = word_topic_hist.sparse_topic_hist.non_zeros.add()
                non_zero.topic = j
                non_zero.count = j + 1
            self.assertTrue(
                record_writer.write(word_topic_hist.SerializeToString()))
        fp.close()

        fp = open('../testdata/recordio.dat', 'rb')
        record_reader = RecordReader(fp)
        i = 0
        while True:
            blob = record_reader.read()
            if blob == None:
                break
            word_topic_hist = WordTopicHistogramPB()
            word_topic_hist.ParseFromString(blob)
            self.assertEqual(i, word_topic_hist.word)
            sparse_topic_hist = word_topic_hist.sparse_topic_hist
            self.assertEqual(20, len(sparse_topic_hist.non_zeros))
            for j in xrange(len(sparse_topic_hist.non_zeros)):
                self.assertEqual(j, sparse_topic_hist.non_zeros[j].topic)
                self.assertEqual(j + 1, sparse_topic_hist.non_zeros[j].count)
            i += 1
        self.assertEqual(20, i)
        fp.close()
Exemple #2
0
 def _save_word_topic_hist(self, filename):
     fp = open(filename, 'wb')
     record_writer = RecordWriter(fp)
     for word, ordered_sparse_topic_hist in self.word_topic_hist.iteritems(
     ):
         word_topic_hist_pb = WordTopicHistogramPB()
         word_topic_hist_pb.word = word
         word_topic_hist_pb.sparse_topic_hist.ParseFromString(
             ordered_sparse_topic_hist.serialize_to_string())
         record_writer.write(word_topic_hist_pb.SerializeToString())
     fp.close()