Ejemplo n.º 1
0
 def convert_model(self, model: Snippet) -> BOW:
     bags = [self._uasts2bow(uast) for uast in model.uasts]
     data = list(zip(*[(bag_[x], i, x) for i, bag_ in enumerate(bags) for x in bag_]))
     matrix = csr_matrix((data[0], (data[1], data[2])),
                         shape=(len(bags), len(self._uasts2bow.vocabulary)),
                         dtype=numpy.float32)
     bow = BOW(log_level=logging.WARNING)
     bow.construct(repos=model.names, matrix=matrix, tokens=self._tokens)
     bow.meta["dependencies"] = [self._uasts2bow.docfreq]
     return bow
Ejemplo n.º 2
0
 def convert_model(self, model: UASTModel) -> BOW:
     bag = self._uasts2bow(model.uasts)
     data = numpy.array(list(bag.values()), dtype=numpy.float32)
     indices = numpy.array(list(bag.keys()), dtype=numpy.int32)
     matrix = csr_matrix((data, indices, [0, len(data)]),
                         shape=(1, len(self._uasts2bow.vocabulary)))
     bow = BOW(log_level=logging.WARNING)
     bow.construct(repos=[model.repository],
                   matrix=matrix,
                   tokens=self._tokens)
     bow.meta["dependencies"] = [self._uasts2bow.docfreq]
     return bow
Ejemplo n.º 3
0
 def test_all(self):
     with tempfile.TemporaryDirectory(
             prefix="ast2vec-test-source2bow-") as tmpdir:
         args = argparse.Namespace(processes=2,
                                   input=paths.DATA_DIR_SOURCE,
                                   output=tmpdir,
                                   filter="**/source_*.asdf",
                                   vocabulary_size=500,
                                   docfreq=os.path.join(
                                       os.path.dirname(__file__),
                                       paths.DOCFREQ),
                                   overwrite_existing=True)
         source2bow_entry(args)
         for n, file in enumerate(os.listdir(tmpdir)):
             bow = BOW().load(os.path.join(tmpdir, file))
             self.assertGreater(bow._matrix.getnnz(), 0)
             self.assertEqual(len(bow.repos), 1)
         self.assertEqual(n, 3)
Ejemplo n.º 4
0
def bow2vw_entry(args: argparse.Namespace):
    if not args.nbow:
        bow = BOW().load(source=args.bow)
    else:
        bow = NBOW.as_bow(args.nbow, args.id2vec)
    convert_bow_to_vw(bow, args.output)