Python Postag Examples

Programming Language: Python

Namespace/Package Name: pkuseg.postag

Class/Type: Postag

Examples at hotexamples.com: 2

Python Postag - 2 examples found. These are the top rated real world Python examples of pkuseg.postag.Postag extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Postag(1)

tag(1)

Frequently Used Methods

Postag (1)

tag (1)

Example #1

Show file

    def __init__(self,
                 model_name="default",
                 user_dict="default",
                 postag=False):
        """初始化函数，加载模型及用户词典"""
        # print("loading model")
        # config = Config()
        # self.config = config
        self.postag = postag
        if model_name in ["default"]:
            config.modelDir = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                "models",
                model_name,
            )
        elif model_name in config.available_models:
            config.modelDir = os.path.join(
                config.pkuseg_home,
                model_name,
            )
            download_model(config.model_urls[model_name], config.pkuseg_home,
                           config.model_hash[model_name])
        else:
            config.modelDir = model_name
        # config.fModel = os.path.join(config.modelDir, "model.txt")
        if user_dict is None:
            file_name = None
            other_names = None
        else:
            if user_dict not in config.available_models:
                file_name = user_dict
            else:
                file_name = None
            if model_name in config.models_with_dict:
                other_name = os.path.join(
                    config.pkuseg_home,
                    model_name,
                    model_name + "_dict.pkl",
                )
                default_name = os.path.join(
                    os.path.dirname(os.path.realpath(__file__)),
                    "dicts",
                    "default.pkl",
                )
                other_names = [other_name, default_name]
            else:
                default_name = os.path.join(
                    os.path.dirname(os.path.realpath(__file__)),
                    "dicts",
                    "default.pkl",
                )
                other_names = [default_name]

        self.preprocesser = Preprocesser(file_name)
        # self.preprocesser = Preprocesser([])
        self.postprocesser = Postprocesser(None, other_names)

        self.feature_extractor = FeatureExtractor.load()
        self.model = Model.load()

        self.idx_to_tag = {
            idx: tag
            for tag, idx in self.feature_extractor.tag_to_idx.items()
        }

        self.n_feature = len(self.feature_extractor.feature_to_idx)
        self.n_tag = len(self.feature_extractor.tag_to_idx)

        if postag:
            download_model(config.model_urls["postag"], config.pkuseg_home,
                           config.model_hash[model_name])
            postag_dir = os.path.join(
                config.pkuseg_home,
                "postag",
            )
            self.tagger = Postag(postag_dir)

Example #2

Show file

class pkuseg:
    def __init__(self,
                 model_name="default",
                 user_dict="default",
                 postag=False):
        """初始化函数，加载模型及用户词典"""
        # print("loading model")
        # config = Config()
        # self.config = config
        self.postag = postag
        if model_name in ["default"]:
            config.modelDir = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                "models",
                model_name,
            )
        elif model_name in config.available_models:
            config.modelDir = os.path.join(
                config.pkuseg_home,
                model_name,
            )
            download_model(config.model_urls[model_name], config.pkuseg_home,
                           config.model_hash[model_name])
        else:
            config.modelDir = model_name
        # config.fModel = os.path.join(config.modelDir, "model.txt")
        if user_dict is None:
            file_name = None
            other_names = None
        else:
            if user_dict not in config.available_models:
                file_name = user_dict
            else:
                file_name = None
            if model_name in config.models_with_dict:
                other_name = os.path.join(
                    config.pkuseg_home,
                    model_name,
                    model_name + "_dict.pkl",
                )
                default_name = os.path.join(
                    os.path.dirname(os.path.realpath(__file__)),
                    "dicts",
                    "default.pkl",
                )
                other_names = [other_name, default_name]
            else:
                default_name = os.path.join(
                    os.path.dirname(os.path.realpath(__file__)),
                    "dicts",
                    "default.pkl",
                )
                other_names = [default_name]

        self.preprocesser = Preprocesser(file_name)
        # self.preprocesser = Preprocesser([])
        self.postprocesser = Postprocesser(None, other_names)

        self.feature_extractor = FeatureExtractor.load()
        self.model = Model.load()

        self.idx_to_tag = {
            idx: tag
            for tag, idx in self.feature_extractor.tag_to_idx.items()
        }

        self.n_feature = len(self.feature_extractor.feature_to_idx)
        self.n_tag = len(self.feature_extractor.tag_to_idx)

        if postag:
            download_model(config.model_urls["postag"], config.pkuseg_home,
                           config.model_hash[model_name])
            postag_dir = os.path.join(
                config.pkuseg_home,
                "postag",
            )
            self.tagger = Postag(postag_dir)

        # print("finish")

    def _cut(self, text):
        """
        直接对文本分词
        """

        examples = list(self.feature_extractor.normalize_text(text))
        length = len(examples)

        all_feature = []  # type: List[List[int]]
        for idx in range(length):
            node_feature_idx = self.feature_extractor.get_node_features_idx(
                idx, examples)
            # node_feature = self.feature_extractor.get_node_features(
            #     idx, examples
            # )

            # node_feature_idx = []
            # for feature in node_feature:
            #     feature_idx = self.feature_extractor.feature_to_idx.get(feature)
            #     if feature_idx is not None:
            #         node_feature_idx.append(feature_idx)
            # if not node_feature_idx:
            #     node_feature_idx.append(0)

            all_feature.append(node_feature_idx)

        _, tags = _inf.decodeViterbi_fast(all_feature, self.model)

        words = []
        current_word = None
        is_start = True
        for tag, char in zip(tags, text):
            if is_start:
                current_word = char
                is_start = False
            elif "B" in self.idx_to_tag[tag]:
                words.append(current_word)
                current_word = char
            else:
                current_word += char
        if current_word:
            words.append(current_word)

        return words

    def cut(self, txt):
        """分词，结果返回一个list"""

        txt = txt.strip()

        ret = []

        if not txt:
            return ret

        imary = txt.split()  # 根据空格分为多个片段

        # 对每个片段分词
        for w0 in imary:
            if not w0:
                continue

            # 根据用户词典拆成更多片段
            lst, isword = self.preprocesser.solve(w0)

            for w, isw in zip(lst, isword):
                if isw:
                    ret.append(w)
                    continue

                output = self._cut(w)
                ret.extend(self.postprocesser(output))

        if self.postag:
            tags = self.tagger.tag(ret)
            ret = list(zip(ret, tags))
        return ret