Example #1
0
 def save(self, output, deps=None):
     if not deps:
         deps = tuple()
     self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps)
     write_model(
         self._meta, {
             "tokens": merge_strings(self.tokens),
             "matrix": disassemble_sparse_matrix(self.matrix)
         }, output)
Example #2
0
 def save(self, output, deps=None):
     if not deps or len(deps) < 2:
         raise ValueError(
             "You must specify DocumentFrequencies and Id2Vec dependencies "
             "to save NBOW.")
     self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps)
     write_model(
         self._meta, {
             "repos": merge_strings(self._repos),
             "matrix": disassemble_sparse_matrix(self._matrix)
         }, output)
Example #3
0
 def save(self, output, deps: Union[None, list] = None) -> None:
     if not deps:
         deps = self.meta["dependencies"]
     self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps)
     write_model(
         self._meta, {
             "tokens":
             merge_strings(self.tokens),
             "topics":
             merge_strings(self.topics)
             if self.topics is not None else False,
             "matrix":
             disassemble_sparse_matrix(self.matrix)
         }, output)
Example #4
0
 def save(self, output, deps=None):
     if not deps:
         raise ValueError(
             "You must specify DocumentFrequencies dependency to save BOW.")
     self._meta = generate_meta(self.NAME, ast2vec.__version__, *deps)
     if self.tokens:
         write_model(
             self._meta, {
                 "repos": merge_strings(self._repos),
                 "matrix": disassemble_sparse_matrix(self.matrix),
                 "tokens": merge_strings(self.tokens)
             }, output)
     else:
         self._log.warning("Did not write %s because the model is empty",
                           output)
Example #5
0
 def test_disassemble_sparse_matrix_empty(self):
     arr = numpy.zeros((10, 10), dtype=numpy.float32)
     mat = csr_matrix(arr)
     dis = disassemble_sparse_matrix(mat)
     self.assertIsInstance(dis, dict)
     self.assertIn("shape", dis)
     self.assertIn("format", dis)
     self.assertIn("data", dis)
     self.assertEqual(dis["shape"], arr.shape)
     self.assertEqual(dis["format"], "csr")
     self.assertIsInstance(dis["data"], list)
     self.assertEqual(len(dis["data"]), 3)
     self.assertEqual(dis["data"][0].size, 0)
     self.assertEqual(dis["data"][1].size, 0)
     self.assertTrue((dis["data"][2] == 0).all())
Example #6
0
 def test_disassemble_sparse_matrix(self):
     arr = numpy.zeros((10, 10), dtype=numpy.float32)
     numpy.random.seed(0)
     arr[numpy.random.randint(0, 10, (50, 2))] = 1
     mat = csr_matrix(arr)
     dis = disassemble_sparse_matrix(mat)
     self.assertIsInstance(dis, dict)
     self.assertIn("shape", dis)
     self.assertIn("format", dis)
     self.assertIn("data", dis)
     self.assertEqual(dis["shape"], arr.shape)
     self.assertEqual(dis["format"], "csr")
     self.assertIsInstance(dis["data"], (tuple, list))
     self.assertEqual(len(dis["data"]), 3)
     self.assertTrue((dis["data"][0] == mat.data).all())
     self.assertTrue((dis["data"][1] == mat.indices).all())
     self.assertTrue((dis["data"][2] == mat.indptr).all())
Example #7
0
 def _generate_tree(self):
     return {
         "tokens": merge_strings(self.tokens),
         "matrix": disassemble_sparse_matrix(self.matrix)
     }