def load_pkuseg_processors(path): try: import pkuseg except ImportError: if self.use_pkuseg: raise ImportError(self._pkuseg_install_msg) if self.pkuseg_seg: data = srsly.read_msgpack(path) (user_dict, do_process, common_words, other_words) = data self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(user_dict) self.pkuseg_seg.postprocesser.do_process = do_process self.pkuseg_seg.postprocesser.common_words = set(common_words) self.pkuseg_seg.postprocesser.other_words = set(other_words)
def from_disk(self, path: Union[Path, str], exclude: Sequence[str] = tuple()): """Load a Sense2Vec object from a directory. path (unicode / Path): The path to load from. exclude (list): Names of serialization fields to exclude. RETURNS (Sense2Vec): The loaded object. """ path = Path(path) strings_path = path / "strings.json" freqs_path = path / "freqs.json" cache_path = path / "cache" self.vectors = Vectors().from_disk(path) self.cfg.update(srsly.read_json(path / "cfg")) if freqs_path.exists(): self.freqs = dict(srsly.read_json(freqs_path)) if "strings" not in exclude and strings_path.exists(): self.strings = StringStore().from_disk(strings_path) if "cache" not in exclude and cache_path.exists(): self.cache = srsly.read_msgpack(cache_path) self._row2key = None return self
def load_patterns(p): self.add_patterns(srsly.read_msgpack(p))