def test_read_and_writer_pb(self): fp = open('../testdata/recordio.dat', 'wb') record_writer = RecordWriter(fp) for i in xrange(20): word_topic_hist = WordTopicHistogramPB() word_topic_hist.word = i for j in xrange(20): non_zero = word_topic_hist.sparse_topic_hist.non_zeros.add() non_zero.topic = j non_zero.count = j + 1 self.assertTrue( record_writer.write(word_topic_hist.SerializeToString())) fp.close() fp = open('../testdata/recordio.dat', 'rb') record_reader = RecordReader(fp) i = 0 while True: blob = record_reader.read() if blob == None: break word_topic_hist = WordTopicHistogramPB() word_topic_hist.ParseFromString(blob) self.assertEqual(i, word_topic_hist.word) sparse_topic_hist = word_topic_hist.sparse_topic_hist self.assertEqual(20, len(sparse_topic_hist.non_zeros)) for j in xrange(len(sparse_topic_hist.non_zeros)): self.assertEqual(j, sparse_topic_hist.non_zeros[j].topic) self.assertEqual(j + 1, sparse_topic_hist.non_zeros[j].count) i += 1 self.assertEqual(20, i) fp.close()
def _save_word_topic_hist(self, filename): fp = open(filename, 'wb') record_writer = RecordWriter(fp) for word, ordered_sparse_topic_hist in self.word_topic_hist.iteritems(): word_topic_hist_pb = WordTopicHistogramPB() word_topic_hist_pb.word = word word_topic_hist_pb.sparse_topic_hist.ParseFromString( ordered_sparse_topic_hist.serialize_to_string()) record_writer.write(word_topic_hist_pb.SerializeToString()) fp.close()
def _save_word_topic_hist(self, filename): fp = open(filename, 'wb') record_writer = RecordWriter(fp) for word, ordered_sparse_topic_hist in self.word_topic_hist.iteritems( ): word_topic_hist_pb = WordTopicHistogramPB() word_topic_hist_pb.word = word word_topic_hist_pb.sparse_topic_hist.ParseFromString( ordered_sparse_topic_hist.serialize_to_string()) record_writer.write(word_topic_hist_pb.SerializeToString()) fp.close()
def _load_word_topic_hist(self, filename): logging.info('Loading word_topic_hist matrix N(w|z).') self.word_topic_hist.clear() fp = open(filename, "rb") record_reader = RecordReader(fp) while True: blob = record_reader.read() if blob == None: break word_topic_hist_pb = WordTopicHistogramPB() word_topic_hist_pb.ParseFromString(blob) ordered_sparse_topic_hist = \ OrderedSparseTopicHistogram(self.num_topics) ordered_sparse_topic_hist.parse_from_string( word_topic_hist_pb.sparse_topic_hist.SerializeToString()) self.word_topic_hist[word_topic_hist_pb.word] = \ ordered_sparse_topic_hist fp.close() return (len(self.word_topic_hist) > 0)