def __init__(self, config, dataset, feature_extractor): self.config = config self.X = dataset self.n_feature = dataset.n_feature # 特征总数量 self.n_tag = dataset.n_tag # 标签总数量==5 if config.init_model is None: self.model = Model(self.n_feature, self.n_tag) # do this else: self.model = Model.load(config.init_model) self.model.expand(self.n_feature, self.n_tag) self.optim = self._get_optimizer(dataset, self.model) self.feature_extractor = feature_extractor self.idx_to_chunk_tag = { } # {0: 'B', 1: 'B_single', 2: 'I', 3: 'I', 4: 'I'} """ `tag_to_idx` : {'B': 0, 'B_single': 1, 'I': 2, 'I_end': 3, 'I_first': 4} `startswith()` 函数: >>> aaa 'Begin' >>> aaa.startswith("A") False >>> aaa.startswith("B") True """ for tag, idx in feature_extractor.tag_to_idx.items(): if tag.startswith("I"): # ['I', 'I_end', 'I_first'] tag = "I" if tag.startswith("O"): tag = "O" self.idx_to_chunk_tag[idx] = tag
def __init__(self, config, dataset, feature_extractor): self.config = config self.X = dataset self.n_feature = dataset.n_feature self.n_tag = dataset.n_tag if config.init_model is None: self.model = Model(self.n_feature, self.n_tag) else: self.model = Model.load(config.init_model) self.model.expand(self.n_feature, self.n_tag) self.optim = self._get_optimizer(dataset, self.model) self.feature_extractor = feature_extractor self.idx_to_chunk_tag = {} for tag, idx in feature_extractor.tag_to_idx.items(): if tag.startswith("I"): tag = "I" if tag.startswith("O"): tag = "O" self.idx_to_chunk_tag[idx] = tag
def __init__(self, model_name="default", user_dict="default", postag=False): """初始化函数,加载模型及用户词典""" # print("loading model") # config = Config() # self.config = config self.postag = postag if model_name in ["default"]: config.modelDir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "models", model_name, ) elif model_name in config.available_models: config.modelDir = os.path.join( config.pkuseg_home, model_name, ) download_model(config.model_urls[model_name], config.pkuseg_home, config.model_hash[model_name]) else: config.modelDir = model_name # config.fModel = os.path.join(config.modelDir, "model.txt") if user_dict is None: file_name = None other_names = None else: if user_dict not in config.available_models: file_name = user_dict else: file_name = None if model_name in config.models_with_dict: other_name = os.path.join( config.pkuseg_home, model_name, model_name + "_dict.pkl", ) default_name = os.path.join( os.path.dirname(os.path.realpath(__file__)), "dicts", "default.pkl", ) other_names = [other_name, default_name] else: default_name = os.path.join( os.path.dirname(os.path.realpath(__file__)), "dicts", "default.pkl", ) other_names = [default_name] self.preprocesser = Preprocesser(file_name) # self.preprocesser = Preprocesser([]) self.postprocesser = Postprocesser(None, other_names) self.feature_extractor = FeatureExtractor.load() self.model = Model.load() self.idx_to_tag = { idx: tag for tag, idx in self.feature_extractor.tag_to_idx.items() } self.n_feature = len(self.feature_extractor.feature_to_idx) self.n_tag = len(self.feature_extractor.tag_to_idx) if postag: download_model(config.model_urls["postag"], config.pkuseg_home, config.model_hash[model_name]) postag_dir = os.path.join( config.pkuseg_home, "postag", ) self.tagger = Postag(postag_dir)
def __init__(self, model_name="default", user_dict="default"): """初始化函数,加载模型及用户词典""" # print("loading model") # config = Config() # self.config = config if model_name in ["default"]: config.modelDir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "models", model_name, ) else: config.modelDir = model_name # config.fModel = os.path.join(config.modelDir, "model.txt") if user_dict == "default": # file_name = os.path.join( # os.path.dirname(os.path.realpath(__file__)), # "dicts", "default_common.txt", # ) file_name = None other_name = os.path.join( os.path.dirname(os.path.realpath(__file__)), "dicts", "default.txt", ) else: file_name = user_dict other_name = None # self.preprocesser = Preprocesser(file_name) self.preprocesser = Preprocesser([]) self.postprocesser = Postprocesser(file_name, other_name) self.feature_extractor = FeatureExtractor.load() self.model = Model.load() self.idx_to_tag = { idx: tag for tag, idx in self.feature_extractor.tag_to_idx.items() } # self.idx2tag = [None] * len(self.testFeature.tagIndexMap) # for i in self.testFeature.tagIndexMap: # self.idx2tag[self.testFeature.tagIndexMap[i]] = i # if config.nLabel == 2: # B = B_single = "B" # I_first = I = I_end = "I" # elif config.nLabel == 3: # B = B_single = "B" # I_first = I = "I" # I_end = "I_end" # elif config.nLabel == 4: # B = "B" # B_single = "B_single" # I_first = I = "I" # I_end = "I_end" # elif config.nLabel == 5: # B = "B" # B_single = "B_single" # I_first = "I_first" # I = "I" # I_end = "I_end" # self.B = B # self.B_single = B_single # self.I_first = I_first # self.I = I # self.I_end = I_end self.n_feature = len(self.feature_extractor.feature_to_idx) self.n_tag = len(self.feature_extractor.tag_to_idx)