Exemple #1
0
 def _load_tree(self, tree: dict) -> None:
     self.__dict__.update(tree)
     self.tokens = split_strings(self.tokens)
     self.frequencies = {
         w: self.frequencies["vals"][i]
         for i, w in enumerate(split_strings(self.frequencies["keys"]))}
     self.checker = SymSpell(max_dictionary_edit_distance=self.max_distance)
     self.checker.__dict__.update(tree["checker"])
     deletes = {}
     words = split_strings(self.checker._deletes["strings"])
     lengths = self.checker._deletes["lengths"]
     data = self.checker._deletes["data"]
     offset = 0
     for i, delindex in enumerate(self.checker._deletes["indexes"]):
         length = lengths[i]
         deletes[delindex] = [words[j] for j in data[offset:offset + length]]
         offset += length
     self.checker._deletes = deletes
     self.checker._words = {w: self.checker._words[i] for i, w in enumerate(words)}
     vectors = self.wv["vectors"]
     wv = FastTextKeyedVectors(vectors.shape[1], self.wv["min_n"], self.wv["max_n"],
                               self.wv["bucket"], True)
     wv.vectors = numpy.array(vectors)
     vocab = split_strings(self.wv["vocab"]["strings"])
     wv.vocab = {
         s: Vocab(index=i, count=self.wv["vocab"]["counts"][i])
         for i, s in enumerate(vocab)}
     wv.bucket = self.wv["bucket"]
     wv.index2word = wv.index2entity = vocab
     wv.num_ngram_vectors = self.wv["num_ngram_vectors"]
     wv.vectors_ngrams = numpy.array(self.wv["vectors_ngrams"])
     wv.hash2index = {k: v for v, k in enumerate(self.wv["hash2index"])}
     self.wv = wv
Exemple #2
0
 def _load_tree(self, tree):
     library_names = {}
     library_metadata = {lang: {} for lang in tree["langs"]}
     for lang in tree["langs"]:
         library_names[lang] = set(
             split_strings(tree[lang]["library_names"]))
         for meta, libs in tree[lang]["library_metadata"].items():
             library_metadata[lang][meta] = set(split_strings(libs))
     self.construct(library_names, library_metadata)
Exemple #3
0
 def _load_tree(self, tree):
     matrix = assemble_sparse_matrix(tree["matrix"])
     files = split_strings(tree["files"])
     deps = split_strings(tree["deps"])
     ind_to_langs = {
         ind: lang
         for ind, lang in enumerate(split_strings(tree["ind_to_langs"]))
     }
     ind_to_repos = {
         ind: repo
         for ind, repo in enumerate(split_strings(tree["ind_to_repos"]))
     }
     self.construct(matrix, files, deps, ind_to_langs, ind_to_repos)
Exemple #4
0
 def _load_tree(self, tree: dict, tokens=None):
     if tokens is None:
         tokens = split_strings(tree["tokens"])
     freqs = tree["freqs"]
     self._log.info("Building the docfreq dictionary...")
     tokfreq = dict(zip(tokens, freqs))
     self.construct(docs=tree["docs"], tokfreqs=tokfreq)
Exemple #5
0
 def _load_tree(self, tree):
     self._levels = {}
     for key, vals in tree["schemes"].items():
         classes = split_strings(vals["classes"])
         levels = vals["levels"]
         self.levels[key] = dict(
             zip(classes, numpy.split(levels, len(classes))))
Exemple #6
0
def validate_asdf_file(obj, filename):
    data = asdf.open(filename)
    obj.assertIn("meta", data.tree)
    obj.assertIn("filenames", data.tree)
    obj.assertIn("uasts", data.tree)
    obj.assertIn("repository", data.tree)
    obj.assertEqual(data.tree["meta"]["model"], "uast")
    Node.FromString(split_strings(data.tree["uasts"])[0])
    obj.assertEqual(0, len(data.tree["meta"]["dependencies"]))
    obj.assertEqual(data.tree["meta"]["model"], "uast")
def validate_asdf_file(obj, filename):
    data = asdf.open(filename)
    obj.assertIn("meta", data.tree)
    obj.assertIn("filenames", data.tree)
    obj.assertIn("sources", data.tree)
    obj.assertIn("uasts", data.tree)
    obj.assertIn("repository", data.tree)
    obj.assertIn("positions", data.tree)
    Node.FromString(split_strings(data.tree["uasts"])[0])
    obj.assertEqual(data.tree["sources"]["lengths"].shape[0],
                    data.tree["uasts"]["lengths"].shape[0])
    obj.assertEqual(0, len(data.tree["meta"]["dependencies"]))
    obj.assertEqual(data.tree["meta"]["model"], "snippet")
Exemple #8
0
 def _load_tree(self, tree: dict) -> None:
     self.construct(
         split_strings(tree["tokens"]),
         split_strings(tree["topics"]) if tree["topics"] else None,
         assemble_sparse_matrix(tree["matrix"]))
Exemple #9
0
 def _load_tree(self, tree):
     self.construct(embeddings=tree["embeddings"].copy(),
                    tokens=split_strings(tree["tokens"]))
Exemple #10
0
 def _load_tree(self, tree):
     tokens = split_strings(tree["tokens"])
     super()._load_tree(tree, tokens)
     self._log.info("Mapping the keys order...")
     self._order = {k: i for i, k in enumerate(tokens)}
Exemple #11
0
 def _load_tree(self, tree: dict) -> None:
     super()._load_tree(tree)
     self._identifiers = set(split_strings(tree["identifiers"]))
Exemple #12
0
 def _load_tree(self, tree):
     self.id_to_element = split_strings(tree["elements"])
     data, indptr = tree["data"], tree["indptr"]
     self.communities = [data[i:j] for i, j in zip(indptr, indptr[1:])]
Exemple #13
0
 def _load_tree_kwargs(self, tree: dict):
     return {
         "documents": split_strings(tree["documents"]),
         "matrix": assemble_sparse_matrix(tree["matrix"]),
         "tokens": split_strings(tree["tokens"])
     }
Exemple #14
0
 def _load_tree(self, tree):
     self.id_to_element = split_strings(tree["elements"])
     data, indptr = tree["data"], tree["indptr"]
     self.communities = [data[i:j] for i, j in zip(indptr, indptr[1:])]
Exemple #15
0
 def _load_tree(self, tree):
     self.id_to_cc = tree["cc"]
     self.id_to_cc[0]  # do not remove - loads the array from disk
     self.id_to_element = split_strings(tree["elements"])
     self.id_to_buckets = assemble_sparse_matrix(tree["buckets"])
Exemple #16
0
 def _load_tree(self, tree):
     self.id_to_cc = tree["cc"]
     self.id_to_cc[0]  # do not remove - loads the array from disk
     self.id_to_element = split_strings(tree["elements"])
     self.id_to_buckets = assemble_sparse_matrix(tree["buckets"])
Exemple #17
0
 def _load_tree_kwargs(self, tree: dict):
     return dict(documents=split_strings(tree["documents"]),
                 matrix=assemble_sparse_matrix(tree["matrix"]),
                 tokens=split_strings(tree["tokens"]))