def from_bytes(self, data, **kwargs): pkuseg_features_b = b"" pkuseg_weights_b = b"" pkuseg_processors_data = None def deserialize_pkuseg_features(b): nonlocal pkuseg_features_b pkuseg_features_b = b def deserialize_pkuseg_weights(b): nonlocal pkuseg_weights_b pkuseg_weights_b = b def deserialize_pkuseg_processors(b): nonlocal pkuseg_processors_data pkuseg_processors_data = srsly.msgpack_loads(b) deserializers = OrderedDict(( ("cfg", lambda b: self._set_config(srsly.json_loads(b))), ("pkuseg_features", deserialize_pkuseg_features), ("pkuseg_weights", deserialize_pkuseg_weights), ("pkuseg_processors", deserialize_pkuseg_processors), )) util.from_bytes(data, deserializers, []) if pkuseg_features_b and pkuseg_weights_b: with tempfile.TemporaryDirectory() as tempdir: tempdir = Path(tempdir) with open(tempdir / "features.pkl", "wb") as fileh: fileh.write(pkuseg_features_b) with open(tempdir / "weights.npz", "wb") as fileh: fileh.write(pkuseg_weights_b) try: import pkuseg except ImportError: raise ImportError( "pkuseg not installed. To use this model, " + _PKUSEG_INSTALL_MSG) self.pkuseg_seg = pkuseg.pkuseg(str(tempdir)) if pkuseg_processors_data: ( user_dict, do_process, common_words, other_words, ) = pkuseg_processors_data self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(user_dict) self.pkuseg_seg.postprocesser.do_process = do_process self.pkuseg_seg.postprocesser.common_words = set(common_words) self.pkuseg_seg.postprocesser.other_words = set(other_words) return self
def load_pkuseg_processors(path): try: import pkuseg except ImportError: if self.use_pkuseg: raise ImportError(self._pkuseg_install_msg) if self.pkuseg_seg: data = srsly.read_msgpack(path) (user_dict, do_process, common_words, other_words) = data self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(user_dict) self.pkuseg_seg.postprocesser.do_process = do_process self.pkuseg_seg.postprocesser.common_words = set(common_words) self.pkuseg_seg.postprocesser.other_words = set(other_words)
def pkuseg_update_user_dict(self, words, reset=False): if self.pkuseg_seg: if reset: try: import pkuseg self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(None) except ImportError: if self.use_pkuseg: msg = ("pkuseg not installed: unable to reset pkuseg " "user dict. Please " + _PKUSEG_INSTALL_MSG) raise ImportError(msg) for word in words: self.pkuseg_seg.preprocesser.insert(word.strip(), '')