def test_save(self): with tempfile.NamedTemporaryFile( prefix="sourced.ml-topics-test-") as f: self.model.save(f.name) new = Topics().load(f.name) self.assertEqual(self.model.tokens, new.tokens) self.assertEqual((self.model.matrix != new.matrix).getnnz(), 0)
def bigartm2asdf(args): """ BigARTM "readable" model -> Topics -> Modelforge ASDF. """ tokens = [] data = [] indices = [] indptr = [0] if args.input != "-": fin = open(args.input) else: fin = sys.stdin try: # the first line is the header fin.readline() for line in fin: items = line.split(";") tokens.append(items[0]) nnz = 0 for i, v in enumerate(items[2:]): if v == "0": continue nnz += 1 data.append(float(v)) indices.append(i) indptr.append(indptr[-1] + nnz) finally: if args.input != "-": fin.close() data = numpy.array(data, dtype=numpy.float32) indices = numpy.array(indices, dtype=numpy.int32) matrix = csr_matrix((data, indices, indptr), shape=(len(tokens), len(items) - 2)).T Topics().construct(tokens, None, matrix).save(args.output)
def test_bigartm2asdf(self): with tempfile.NamedTemporaryFile(prefix="sourced.ml-topics-test-") as f: args = argparse.Namespace( input=os.path.join(os.path.dirname(__file__), paths.TOPICS_SRC), output=f.name) bigartm2asdf(args) model = Topics().load(f.name) self.assertEqual(len(model), 320) self.assertEqual(len(model.tokens), 1000)
class TopicsTests(unittest.TestCase): def setUp(self): self.model = Topics().load(source=paths.TOPICS) def test_dump(self): res = self.model.dump() self.assertEqual(res, """320 topics, 1000 tokens First 10 tokens: ['ulcancel', 'domainlin', 'trudi', 'fncreateinstancedbaselin', 'wbnz', 'lmultiplicand', 'otronumero', 'qxln', 'gvgq', 'polaroidish'] Topics: unlabeled non-zero elements: 6211 (0.019409)""") # nopep8 def test_props(self): self.assertEqual(len(self.model), 320) self.assertEqual(len(self.model.tokens), 1000) self.assertIsNone(self.model.topics) zt = self.model[0] self.assertEqual(len(zt), 8) self.assertEqual(zt[0][0], "olcustom") self.assertAlmostEqual(zt[0][1], 1.23752e-06, 6) def test_label(self): with self.assertRaises(ValueError): self.model.label_topics([1, 2, 3]) with self.assertRaises(TypeError): self.model.label_topics(list(range(320))) self.model.label_topics([str(i) for i in range(320)]) self.assertEqual(self.model.topics[0], "0") def test_save(self): with tempfile.NamedTemporaryFile( prefix="sourced.ml-topics-test-") as f: self.model.save(f.name) new = Topics().load(f.name) self.assertEqual(self.model.tokens, new.tokens) self.assertEqual((self.model.matrix != new.matrix).getnnz(), 0) def test_bigartm2asdf_entry(self): with tempfile.NamedTemporaryFile( prefix="sourced.ml-topics-test-") as f: args = argparse.Namespace(input=os.path.join( os.path.dirname(__file__), paths.TOPICS_SRC), output=f.name) bigartm2asdf_entry(args) model = Topics().load(f.name) self.assertEqual(len(model), 320) self.assertEqual(len(model.tokens), 1000)
def setUp(self): self.model = Topics().load(source=paths.TOPICS)