def train(all_batch=[], read_from_file=True, section=[]): if read_from_file: with open(config.get_pkl_path("train"), "rb") as f: train_word_batches, train_char_batches, train_char_len_batches, train_pos_tag_batches, train_entity_batches, train_toi_batches, train_word_origin_batches = pickle.load( f) with open(config.get_pkl_path("test"), "rb") as f: test_word_batches, test_char_batches, test_char_len_batches, test_pos_tag_batches, test_entity_batches, test_toi_batches, test_word_origin_batches = pickle.load( f) else: train_word_batches, train_char_batches, train_char_len_batches, train_pos_tag_batches, train_entity_batches, train_toi_batches, train_toi_batch_layer0, train_toi_batch_layer1 = all_batch[ 0] dev_word_batches, dev_char_batches, dev_char_len_batches, dev_pos_tag_batches, dev_entity_batches, dev_toi_batches, dev_toi_batch_layer0, dev_toi_batch_layer1 = all_batch[ 1] test_word_batches, test_char_batches, test_char_len_batches, test_pos_tag_batches, test_entity_batches, test_toi_batches, test_toi_batch_layer0, test_toi_batch_layer1 = all_batch[ 2] misc_config = pickle.load(open(config.get_pkl_path("config"), "rb")) config.load_config(misc_config) ner_model = TOI_BERT(config) if config.if_DTE: ner_model.load_vector() if (len(section)): config.layer_maxlen = section if config.if_gpu and torch.cuda.is_available(): ner_model = ner_model.cuda() evaluate = Evaluate(ner_model, config) parameters = filter(lambda p: p.requires_grad, ner_model.parameters()) optimizer = create_opt(parameters, config.opt, config.lr) best_model = None best_per = 0 pre_loss = 100000 train_all_batches = list( zip(train_word_batches, train_char_batches, train_char_len_batches, train_pos_tag_batches, train_entity_batches, train_toi_batches, train_word_origin_batches)) tokenizer = BertTokenizer.from_pretrained( f"bert-{config.bert_config}-uncased") bert_model = BertModel.from_pretrained( f"{config.bert_path}{config.bert_config}") bert_model.cuda() bert_model.eval() for parameter in bert_model.parameters(): parameter.requires_grad = False for e_ in range(config.epoch): print("Epoch:", e_ + 1) cur_time = time.time() if config.if_shuffle: shuffle(train_all_batches) losses = [] ner_model.train() config.mode = 'Train' runtimeModel = ModelInRuntime.instance( (ner_model, bert_model, tokenizer, config, len(train_all_batches) + len(test_word_batches))) runtimeModel.model = ner_model for each_batch in tqdm(train_all_batches): optimizer.zero_grad() runtimeModel.setTrainData(each_batch) result, _, aim = runtimeModel.runClassification() loss = ner_model.calc_loss(result, aim) loss.backward() optimizer.step() losses.append(loss.data.cpu().numpy()) sub_loss = np.mean(losses) print(f'Avg loss = {sub_loss:.4f}') print(f"Training step took {time.time() - cur_time:.0f} seconds") if e_ >= 0: print("dev:") cls_f1 = evaluate.get_f1( zip(test_word_batches, test_char_batches, test_char_len_batches, test_pos_tag_batches, test_entity_batches, test_toi_batches, test_word_origin_batches), bert_model) if cls_f1 > best_per and cls_f1 > config.score_th: best_per = cls_f1 model_path = config.get_model_path( ) + f"/epoch{e_ + 1}_f1_{cls_f1:.4f}.pth" torch.save(ner_model.state_dict(), model_path) print("model save in " + model_path) print('\n\n') if sub_loss >= pre_loss: adjust_learning_rate(optimizer) pre_loss = sub_loss
pos_tag_batch_var = pos_tag_batch_var.cuda() gold_label_vec = gold_label_vec.cuda() ner_model.train() optimizer.zero_grad() cls_s, _ = ner_model(mask_batch_var, word_batch_var, char_batch_var, char_len_batch, pos_tag_batch_var, toi_box_batch) loss = ner_model.calc_loss(cls_s, gold_label_vec) loss.backward() # torch.nn.utils.clip_grad_norm_(ner_model.parameters(), 3, norm_type=2) optimizer.step() losses.append(loss.data.cpu().numpy()) sub_loss = np.mean(losses) print(f'Avg loss = {sub_loss:.4f}') print(f"Training step took {time.time() - cur_time:.0f} seconds") if e_ >= 20: print("Dev:") cls_f1 = evaluate.get_f1(zip(dev_word_batches, dev_char_batches, dev_char_len_batches, dev_pos_tag_batches, dev_entity_batches, dev_toi_batches)) if cls_f1 > best_per: best_per = cls_f1 model_path = config.get_model_path() + f"epoch{e_ + 1}.pth" torch.save(ner_model.state_dict(), model_path) print("model save in " + model_path) print('\n\n') if sub_loss >= pre_loss: adjust_learning_rate(optimizer) pre_loss = sub_loss
os.environ["CUDA_VISIBLE_DEVICES"] = "0" mode = "test" # test dev test_best = True epoch_start = 1 epoch_end = 100 misc_config = pickle.load(open(config.get_pkl_path("config"), "rb")) config.load_config(misc_config) bert_model = BertModel.from_pretrained( f"{config.bert_path}{config.bert_config}") bert_model.cuda() bert_model.eval() model_path = config.get_model_path() + "f1_0.771.pth" with open(config.get_pkl_path(mode), "rb") as f: word_batches, char_batches, char_len_batches, pos_tag_batches, entity_batches, toi_batches, word_origin_batches = pickle.load( f) print("load data from " + config.get_pkl_path(mode)) #print(model_path) if not os.path.exists(model_path): print("loda model error") print("load model from " + model_path) ner_model = TOI_BERT(config) ner_model.load_state_dict(torch.load(model_path)) if config.if_gpu and torch.cuda.is_available(): ner_model = ner_model.cuda() evaluate = Evaluate(ner_model, config)
epoch_end = 50 config.if_detail = True config.if_output = False config.if_filter = True config.score_th = 0.5 misc_config = pickle.load(open(config.get_pkl_path("config"), "rb")) config.load_config(misc_config) with open(config.get_pkl_path(mode), "rb") as f: word_batches, char_batches, char_len_batches, pos_tag_batches, entity_batches, toi_batches = pickle.load( f) print("load data from " + config.get_pkl_path(mode)) for e in range(epoch_start, epoch_end + 1): model_path = config.get_model_path( ) + f"epoch{e}.pth" # test trained model # model_path = config.get_model_path() + f"best.pth" # test best model if not os.path.exists(model_path): continue print("load model from " + model_path) ner_model = TOICNN(config) ner_model.load_state_dict(torch.load(model_path)) if config.if_gpu and torch.cuda.is_available(): ner_model = ner_model.cuda() evaluate = Evaluate(ner_model, config) evaluate.get_f1( zip(word_batches, char_batches, char_len_batches, pos_tag_batches, entity_batches, toi_batches)) print("\n\n")