Example #1
0
 def load_pkuseg_processors(path):
     try:
         import pkuseg
     except ImportError:
         if self.use_pkuseg:
             raise ImportError(self._pkuseg_install_msg)
     if self.pkuseg_seg:
         data = srsly.read_msgpack(path)
         (user_dict, do_process, common_words, other_words) = data
         self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(user_dict)
         self.pkuseg_seg.postprocesser.do_process = do_process
         self.pkuseg_seg.postprocesser.common_words = set(common_words)
         self.pkuseg_seg.postprocesser.other_words = set(other_words)
Example #2
0
    def from_disk(self, path: Union[Path, str], exclude: Sequence[str] = tuple()):
        """Load a Sense2Vec object from a directory.

        path (unicode / Path): The path to load from.
        exclude (list): Names of serialization fields to exclude.
        RETURNS (Sense2Vec): The loaded object.
        """
        path = Path(path)
        strings_path = path / "strings.json"
        freqs_path = path / "freqs.json"
        cache_path = path / "cache"
        self.vectors = Vectors().from_disk(path)
        self.cfg.update(srsly.read_json(path / "cfg"))
        if freqs_path.exists():
            self.freqs = dict(srsly.read_json(freqs_path))
        if "strings" not in exclude and strings_path.exists():
            self.strings = StringStore().from_disk(strings_path)
        if "cache" not in exclude and cache_path.exists():
            self.cache = srsly.read_msgpack(cache_path)
        self._row2key = None
        return self
Example #3
0
 def load_patterns(p):
     self.add_patterns(srsly.read_msgpack(p))