def __init__(self, model_path, type_filter=False, save_label=False, batch=32, save_expand_subject=True): # mention -> entity_json_line self.subject_id_dict = subject_id_dict self._model = BLSTMCRFModel.load_model(model_path) # self._model = DDDDModel.load_model(model_path) self.type_filter = type_filter self.batch = batch self.save_label = save_label self.save_expand_subject = save_expand_subject
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config model_fold = cp["EVALUATION"].get("model_fold") output_dir = os.path.join('experiments', model_fold) test_x, test_y = CoNLL2003Corpus.get_sequence_tagging_data('test') model_path = os.path.join(output_dir, 'model') model = BLSTMCRFModel.load_model(model_path) report_evaluate = model.evaluate(test_x, test_y, debug_info=True) with open(os.path.join(output_dir, 'report_evaluate.log'), 'w') as f: f.write(f"The evaluate report is :\n {str(report_evaluate)}\n")
def main(): # parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) # default config model_fold = cp["TEST"].get("model_fold") output_dir = os.path.join('experiments', model_fold) model_path = os.path.join(output_dir, 'model') model = BLSTMCRFModel.load_model(model_path) sentence = 'China and the United States are about the same size' sentence_list = sentence.split() result = model.predict(sentence_list) result_dict = model.predict(sentence_list, output_dict=True) print(f'the sentence is {sentence}') print(f'the result is {result}') print(f'the result of dict is {result_dict}') logging.info('test predict: {} -> {}'.format(sentence_list, result)) with open(os.path.join(output_dir, 'result_predict.log'), 'w') as f: f.write(f"The predict result is : {str(result)}\n")
log_filepath = r"D:\data\biendata\ccks2019_el\ner\m0.1log" # emn_path = r'D:\data\bert\chinese_L-12_H-768_A-12' emn_path = r'D:\data\bert\chinese-bert_chinese_wwm_L-12_H-768_A-12' # check_point = ModelCheckpoint(model_path, monitor="val_loss", verbose=1, save_best_only=True, mode="min") early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=1) # early_stop = EarlyStopping(monitor="val_crf_accuracy", mode="max", patience=2) log = TensorBoard(log_dir=log_filepath, write_images=False, write_graph=True, histogram_freq=0) model = BLSTMCRFModel.load_model(model_path_o) model.fit(train_x, train_y, x_validate=validate_x, y_validate=validate_y, epochs=40, batch_size=512, labels_weight=True, fit_kwargs={'callbacks': [early_stop, log]}) model.evaluate(test_x, test_y) model.save(model_path_n) """ 继续训练
def __init__(self, model_path, type_filter=False): # mention -> entity_json_line self.subject_id_dict = subject_id_dict self._model = BLSTMCRFModel.load_model(model_path) self.type_filter = type_filter
def __init__(self, model_path): # mention -> entity_json_line self.subject_id_dict = subject_id_dict self._model = BLSTMCRFModel.load_model(model_path)
x = [] return datas def model_predict(self, model, text): x_test = self.build_input(text) result = model.predict(x_test) chars = [i for i in text] tags = [] for i in range(len(result)): tags = tags + result[i] res = list(zip(chars, tags)) print(res) return (res) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--option', type=str, default='predict') args = parser.parse_args() config = Config() bertner = BERTNER() if args.option == 'train': bertner.train_model() else: model = BLSTMCRFModel.load_model(bertner.model_path) while 1: s = input('enter an sent:').strip() bertner.model_predict(model, s)
def __init__(self, model_path): with tf.device('/gpu:0'): # mention -> entity_json_line self.subject_dic = super().get_kb_dic() self._model = BLSTMCRFModel.load_model(model_path)
print('train start') train_x, train_y = get_train_data('data/train_text.txt') embedding = BERTEmbedding("bert-base-chinese", sequence_length=512) model = BLSTMCRFModel(embedding) length = int(len(train_x) * 0.9) print(len(train_x[:length]), len(train_y[:length])) model.fit(train_x[:length], train_y[:length], train_x[length:], train_y[length:], epochs=5, batch_size=20) # model.fit(train_x[:length], train_y[:length], train_x[length:], train_y[length:], epochs=5, batch_size=128, # labels_weight=True, default_labels_weight=100) valid_x = train_x[length:] valid_y = train_y[length:] model.save('models') print('train end') print('predict start') try: model = BLSTMCRFModel.load_model('models') except Exception: print('模型加载失败') newsId_set = set() try: with open('data/result_bert.txt', 'r', encoding='utf-8') as file: for line in file: newsId_set.add(line.split('\t')[0]) except IOError: print('文件不存在') test_data = loadData('data/coreEntityEmotion_test_stage1.txt') test_data += loadData('data/coreEntityEmotion_train.txt') with open('data/result_bert.txt', 'a', encoding='utf-8') as file: for news in tqdm(test_data): if news['newsId'] in newsId_set:
def eval_crf(): model_path = r"D:\data\biendata\ccks2019_el\ner_model" model = BLSTMCRFModel.load_model(model_path) validate_x, validate_y = dload.load_json_data('validate') model.evaluate(validate_x, validate_y)
## 面向脚本编程 from kashgari.tasks.seq_labeling import BLSTMCRFModel from util import InputHelper # 读取模型 new_model = BLSTMCRFModel.load_model('./model') # 读取测试集数据 with open('./data/test', 'r', encoding='utf-8') as g: test_data = g.readlines() # 对测试集进行预测 with open('./keywords_test', 'w', encoding='utf-8') as g_key: for ids, line in enumerate(test_data): try: label = InputHelper().iob_iobes( new_model.predict(line.replace('\t', '。'))) result = InputHelper().result_to_json(line, label) line_keys = [entity['word'] for entity in result['entities']] g_key.write(','.join(line_keys) + '\n') except Exception as e: g_key.write('\n') # “人工智能” # 清洗一些看得到的错误,取前三个。这里做的很糙 with open('./keywords_test', 'r', encoding='utf-8') as g: data = g.readlines() with open('./keywords', 'w', encoding='utf-8') as f: for line in data: