예제 #1
0
    def __init__(self, config, dataset, feature_extractor):
        self.config = config
        self.X = dataset
        self.n_feature = dataset.n_feature  # 特征总数量
        self.n_tag = dataset.n_tag  # 标签总数量==5

        if config.init_model is None:
            self.model = Model(self.n_feature, self.n_tag)  # do this
        else:
            self.model = Model.load(config.init_model)
            self.model.expand(self.n_feature, self.n_tag)

        self.optim = self._get_optimizer(dataset, self.model)

        self.feature_extractor = feature_extractor
        self.idx_to_chunk_tag = {
        }  # {0: 'B', 1: 'B_single', 2: 'I', 3: 'I', 4: 'I'}
        """
        `tag_to_idx` : 
            {'B': 0, 'B_single': 1, 'I': 2, 'I_end': 3, 'I_first': 4}
        
        `startswith()` 函数:
            >>> aaa
            'Begin'
            >>> aaa.startswith("A")
            False
            >>> aaa.startswith("B")
            True
        """
        for tag, idx in feature_extractor.tag_to_idx.items():
            if tag.startswith("I"):  # ['I', 'I_end', 'I_first']
                tag = "I"
            if tag.startswith("O"):
                tag = "O"
            self.idx_to_chunk_tag[idx] = tag
예제 #2
0
    def __init__(self, config, dataset, feature_extractor):
        self.config = config
        self.X = dataset
        self.n_feature = dataset.n_feature
        self.n_tag = dataset.n_tag

        if config.init_model is None:
            self.model = Model(self.n_feature, self.n_tag)
        else:
            self.model = Model.load(config.init_model)
            self.model.expand(self.n_feature, self.n_tag)

        self.optim = self._get_optimizer(dataset, self.model)

        self.feature_extractor = feature_extractor
        self.idx_to_chunk_tag = {}
        for tag, idx in feature_extractor.tag_to_idx.items():
            if tag.startswith("I"):
                tag = "I"
            if tag.startswith("O"):
                tag = "O"
            self.idx_to_chunk_tag[idx] = tag
예제 #3
0
    def __init__(self,
                 model_name="default",
                 user_dict="default",
                 postag=False):
        """初始化函数,加载模型及用户词典"""
        # print("loading model")
        # config = Config()
        # self.config = config
        self.postag = postag
        if model_name in ["default"]:
            config.modelDir = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                "models",
                model_name,
            )
        elif model_name in config.available_models:
            config.modelDir = os.path.join(
                config.pkuseg_home,
                model_name,
            )
            download_model(config.model_urls[model_name], config.pkuseg_home,
                           config.model_hash[model_name])
        else:
            config.modelDir = model_name
        # config.fModel = os.path.join(config.modelDir, "model.txt")
        if user_dict is None:
            file_name = None
            other_names = None
        else:
            if user_dict not in config.available_models:
                file_name = user_dict
            else:
                file_name = None
            if model_name in config.models_with_dict:
                other_name = os.path.join(
                    config.pkuseg_home,
                    model_name,
                    model_name + "_dict.pkl",
                )
                default_name = os.path.join(
                    os.path.dirname(os.path.realpath(__file__)),
                    "dicts",
                    "default.pkl",
                )
                other_names = [other_name, default_name]
            else:
                default_name = os.path.join(
                    os.path.dirname(os.path.realpath(__file__)),
                    "dicts",
                    "default.pkl",
                )
                other_names = [default_name]

        self.preprocesser = Preprocesser(file_name)
        # self.preprocesser = Preprocesser([])
        self.postprocesser = Postprocesser(None, other_names)

        self.feature_extractor = FeatureExtractor.load()
        self.model = Model.load()

        self.idx_to_tag = {
            idx: tag
            for tag, idx in self.feature_extractor.tag_to_idx.items()
        }

        self.n_feature = len(self.feature_extractor.feature_to_idx)
        self.n_tag = len(self.feature_extractor.tag_to_idx)

        if postag:
            download_model(config.model_urls["postag"], config.pkuseg_home,
                           config.model_hash[model_name])
            postag_dir = os.path.join(
                config.pkuseg_home,
                "postag",
            )
            self.tagger = Postag(postag_dir)
예제 #4
0
    def __init__(self, model_name="default", user_dict="default"):
        """初始化函数,加载模型及用户词典"""
        # print("loading model")
        # config = Config()
        # self.config = config
        if model_name in ["default"]:
            config.modelDir = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                "models",
                model_name,
            )
        else:
            config.modelDir = model_name
        # config.fModel = os.path.join(config.modelDir, "model.txt")
        if user_dict == "default":
            # file_name = os.path.join(
            #     os.path.dirname(os.path.realpath(__file__)),
            #     "dicts", "default_common.txt",
            # )
            file_name = None
            other_name = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                "dicts",
                "default.txt",
            )
        else:
            file_name = user_dict
            other_name = None

        # self.preprocesser = Preprocesser(file_name)
        self.preprocesser = Preprocesser([])
        self.postprocesser = Postprocesser(file_name, other_name)

        self.feature_extractor = FeatureExtractor.load()
        self.model = Model.load()

        self.idx_to_tag = {
            idx: tag
            for tag, idx in self.feature_extractor.tag_to_idx.items()
        }

        # self.idx2tag = [None] * len(self.testFeature.tagIndexMap)
        # for i in self.testFeature.tagIndexMap:
        #     self.idx2tag[self.testFeature.tagIndexMap[i]] = i
        # if config.nLabel == 2:
        #     B = B_single = "B"
        #     I_first = I = I_end = "I"
        # elif config.nLabel == 3:
        #     B = B_single = "B"
        #     I_first = I = "I"
        #     I_end = "I_end"
        # elif config.nLabel == 4:
        #     B = "B"
        #     B_single = "B_single"
        #     I_first = I = "I"
        #     I_end = "I_end"
        # elif config.nLabel == 5:
        #     B = "B"
        #     B_single = "B_single"
        #     I_first = "I_first"
        #     I = "I"
        #     I_end = "I_end"
        # self.B = B
        # self.B_single = B_single
        # self.I_first = I_first
        # self.I = I
        # self.I_end = I_end

        self.n_feature = len(self.feature_extractor.feature_to_idx)
        self.n_tag = len(self.feature_extractor.tag_to_idx)