def _generate_tree(self): return { "tokens": merge_strings(self.tokens), "topics": merge_strings(self.topics) if self.topics is not None else False, "matrix": disassemble_sparse_matrix(self.matrix) }
def _generate_tree(self) -> dict: tree = self.__dict__.copy() for key in vars(Model()): del tree[key] freqkeys = [""] * len(self.frequencies) freqvals = numpy.zeros(len(self.frequencies), dtype=numpy.uint32) for i, (key, val) in enumerate(sorted(self.frequencies.items())): freqkeys[i] = key freqvals[i] = val tree["frequencies"] = {"keys": merge_strings(freqkeys), "vals": freqvals} tree["checker"] = self.checker.__dict__.copy() delstrs = set() delindexes = numpy.zeros(len(self.checker._deletes), dtype=numpy.uint32) dellengths = numpy.zeros_like(delindexes) for i, (key, dss) in enumerate(self.checker._deletes.items()): delindexes[i] = key dellengths[i] = len(dss) for ds in dss: delstrs.add(ds) delstrs = sorted(delstrs) delstrs_map = {s: i for i, s in enumerate(delstrs)} deldata = numpy.zeros(sum(dellengths), dtype=numpy.uint32) offset = 0 for di in delindexes: dss = self.checker._deletes[di] for j, ds in enumerate(dss): deldata[offset + j] = delstrs_map[ds] offset += len(dss) tree["checker"]["_deletes"] = { "strings": merge_strings(delstrs), "indexes": delindexes, "lengths": dellengths, "data": deldata, } wordvals = numpy.zeros(len(self.checker._words), dtype=numpy.uint32) for key, val in self.checker._words.items(): wordvals[delstrs_map[key]] = val tree["checker"]["_words"] = wordvals tree["tokens"] = merge_strings(self.tokens) vocab_strings = [""] * len(self.wv.vocab) vocab_counts = numpy.zeros(len(vocab_strings), dtype=numpy.uint32) for key, val in self.wv.vocab.items(): vocab_strings[val.index] = key vocab_counts[val.index] = val.count hash2index = numpy.zeros(len(self.wv.hash2index), dtype=numpy.uint32) for key, val in self.wv.hash2index.items(): hash2index[val] = key tree["wv"] = { "vocab": {"strings": merge_strings(vocab_strings), "counts": vocab_counts}, "vectors": self.wv.vectors, "min_n": self.wv.min_n, "max_n": self.wv.max_n, "bucket": self.wv.bucket, "num_ngram_vectors": self.wv.num_ngram_vectors, "vectors_ngrams": self.wv.vectors_ngrams, "hash2index": hash2index, } return tree
def _generate_tree(self): tree = {"langs": self._langs} for lang, library_names in self._library_names.items(): tree[lang] = { "library_names": merge_strings(sorted(library_names)) } tree[lang]["library_metadata"] = {} for meta, libs in self._library_metadata[lang].items(): tree[lang]["library_metadata"][meta] = merge_strings( sorted(libs)) return tree
def _generate_tree(self): return { "matrix": disassemble_sparse_matrix(self._matrix), "files": merge_strings(self._files), "deps": merge_strings(self._deps), "ind_to_langs": merge_strings( [self._ind_to_langs[ind] for ind in range(len(self._files))]), "ind_to_repos": merge_strings( [self._ind_to_repos[ind] for ind in range(len(self._files))]), }
def _generate_tree(self): tokens = [None] * len(self) freqs = numpy.zeros(len(self), dtype=numpy.float32) for k, i in self._order.items(): tokens[i] = k freqs[i] = self._df[k] return {"docs": self.docs, "tokens": merge_strings(tokens), "freqs": freqs}
def _generate_tree(self): tokens = self.tokens() freqs = numpy.array([self._df[t] for t in tokens], dtype=numpy.float32) return { "docs": self.docs, "tokens": merge_strings(tokens), "freqs": freqs }
def _generate_tree(self): size = sum(map(len, self.communities)) data = numpy.zeros(size, dtype=numpy.uint32) indptr = numpy.zeros(len(self.communities) + 1, dtype=numpy.int64) pos = 0 for i, community in enumerate(self.communities): data[pos:pos + len(community)] = community pos += len(community) indptr[i + 1] = pos return {"data": data, "indptr": indptr, "elements": merge_strings(self.id_to_element)}
def _generate_tree(self): size = sum(map(len, self.communities)) data = numpy.zeros(size, dtype=numpy.uint32) indptr = numpy.zeros(len(self.communities) + 1, dtype=numpy.int64) pos = 0 for i, community in enumerate(self.communities): data[pos:pos + len(community)] = community pos += len(community) indptr[i + 1] = pos return {"data": data, "indptr": indptr, "elements": merge_strings(self.id_to_element)}
def _generate_tree(self): tree = {"schemes": {}} for key, vals in self.levels.items(): tree["schemes"][key] = scheme = {} npartitions = len(next(iter(vals.values()))) classes = [None for _ in range(len(vals))] scheme["levels"] = levels = numpy.zeros(len(vals) * npartitions, dtype=numpy.int32) for i, pair in enumerate(vals.items()): classes[i], levels[i * npartitions:(i + 1) * npartitions] = pair scheme["classes"] = merge_strings(classes) return tree
def _generate_tree(self): return {"cc": self.id_to_cc, "elements": merge_strings(self.id_to_element), "buckets": disassemble_sparse_matrix(self.id_to_buckets)}
def _generate_tree(self): return {"embeddings": self.embeddings, "tokens": merge_strings(self.tokens)}
def _generate_tree(self): return {"cc": self.id_to_cc, "elements": merge_strings(self.id_to_element), "buckets": disassemble_sparse_matrix(self.id_to_buckets)}
def _generate_tree(self): return { "documents": merge_strings(self._documents), "matrix": disassemble_sparse_matrix(self._matrix), "tokens": merge_strings(self.tokens) }
def _generate_tree(self) -> dict: tree = super()._generate_tree() tree.update(identifiers=merge_strings(sorted(self._identifiers))) return tree