コード例 #1
0
    def to_disk(self, path: Path, **kwargs):
        """Serialize CandidateGenerator to disk
        
        path (Path): Directory to serialize to
        """
        cfg = {
            "k": self.k,
            "m_parameter": self.m_parameter,
            "ef_search": self.ef_search,
            "ef_construction": self.ef_construction,
            "n_threads": self.n_threads,
        }
        serializers = {
            "cg_cfg":
            lambda p: srsly.write_json(p, cfg),
            "aliases":
            lambda p: srsly.write_json(p.with_suffix(".json"), self.aliases),
            "short_aliases":
            lambda p: srsly.write_json(p.with_suffix(".json"), self.
                                       short_aliases),
            "ann_index":
            lambda p: self.ann_index.saveIndex(str(p.with_suffix(".bin"))),
            "tfidf_vectorizer":
            lambda p: joblib.dump(self.vectorizer, p.with_suffix(".joblib")),
            "tfidf_vectors_sparse":
            lambda p: scipy.sparse.save_npz(
                p.with_suffix(".npz"), self.alias_tfidfs.astype(np.float16)),
        }

        to_disk(path, serializers, {})
コード例 #2
0
    def to_disk(self,
                path: Union[str, Path],
                *,
                exclude: Iterable[str] = tuple()) -> None:
        """Serialize the pipe to disk.

        path (str / Path): Path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.

        DOCS: https://nightly.spacy.io/api/transformer#to_disk
        """
        def save_model(p):
            trf_dir = Path(p).absolute()
            if not trf_dir.exists():
                trf_dir.mkdir()
            self.model.attrs["tokenizer"].save_pretrained(str(trf_dir))
            transformer = self.model.layers[0].shims[0]._model
            torch.save(transformer.state_dict(), trf_dir / WEIGHTS_NAME)
            transformer.config.to_json_file(trf_dir / CONFIG_NAME)

        serialize = {}
        serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
        serialize["vocab"] = lambda p: self.vocab.to_disk(p)
        serialize["model"] = lambda p: save_model(p)
        util.to_disk(path, serialize, exclude)
コード例 #3
0
    def to_disk(self, path: Path, exclude: Tuple = tuple(), **kwargs):
        """Serialize RemoteAnnLinker to disk.
        
        path (Path): directory to serialize to
        exclude (Tuple, optional): config to exclude. Defaults to tuple().
        """
        path = ensure_path(path)
        serializers = {"cfg": lambda p: srsly.write_json(p, self.cfg)}

        to_disk(path, serializers, {})
コード例 #4
0
    def to_disk(
        self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
    ) -> None:
        """Serialize the pipe to disk.

        path (str / Path): Path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.

        DOCS: https://spacy.io/api/transformer#to_disk
        """
        serialize = {}
        serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
        serialize["vocab"] = lambda p: self.vocab.to_disk(p)
        serialize["model"] = lambda p: self.model.to_disk(p)
        util.to_disk(path, serialize, exclude)
コード例 #5
0
    def to_disk(self, path, exclude=tuple(), **kwargs):
        """Serialize the pipe and its model to disk."""

        def save_model(p):
            trf_dir = Path(p).absolute()
            trf_dir.mkdir()
            self.model.attrs["tokenizer"].save_pretrained(str(trf_dir))
            transformer = self.model.layers[0].shims[0]._model
            torch.save(transformer.state_dict(), trf_dir / WEIGHTS_NAME)
            transformer.config.to_json_file(trf_dir / CONFIG_NAME)

        serialize = {}
        serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg)
        serialize["vocab"] = lambda p: self.vocab.to_disk(p)
        serialize["model"] = lambda p: save_model(p)
        exclude = util.get_serialization_exclude(serialize, exclude, kwargs)
        util.to_disk(path, serialize, exclude)
コード例 #6
0
 def to_disk(self, path, **_kwargs):
     path = util.ensure_path(path)
     serializers = OrderedDict(
         (("cfg", lambda p: srsly.write_json(p, self._get_config())), ))
     return util.to_disk(path, serializers, [])
コード例 #7
0
ファイル: fi.py プロジェクト: aajanki/spacy-fi
 def to_disk(self,
             path: Union[str, Path],
             *,
             exclude: Iterable[str] = SimpleFrozenList()):
     serialize = {"lookups": lambda p: self.lookups.to_disk(p)}
     util.to_disk(path, serialize, exclude)