def train_model(self, num_epochs, train_data, eval): batched_data = batching_list_instances(self.config, train_data) self.optimizer = get_optimizer(self.config, self.model, 'sgd') for epoch in range(num_epochs): epoch_loss = 0 self.model.zero_grad() for index in tqdm(np.random.permutation(len(batched_data))): self.model.train() sequence_loss = self.model(*batched_data[index][0:5], batched_data[index][-2], batched_data[index][-3]) loss = sequence_loss epoch_loss = epoch_loss + loss.data loss.backward(retain_graph=True) self.optimizer.step() self.model.zero_grad() print(epoch_loss) if eval: self.model.eval() dev_batches = batching_list_instances(self.config, self.dev) test_batches = batching_list_instances(self.config, self.test) dev_metrics = self.evaluate_model_top(dev_batches, "dev", self.dev, self.triggers) test_metrics = self.evaluate_model_top(test_batches, "test", self.test, self.triggers) self.model.zero_grad() return self.model
def self_training(self, num_epochs, train_data, unlabeled_data): self.optimizer = get_optimizer(self.config, self.model, 'sgd') merged_data = train_data unlabels = unlabeled_data for epoch in range(num_epochs): batched_data = batching_list_instances(self.config, merged_data) epoch_loss = 0 self.model.zero_grad() for index in tqdm(np.random.permutation(len(batched_data))): self.model.train() sequence_loss = self.model(*batched_data[index][0:5], batched_data[index][-2], batched_data[index][-3]) loss = sequence_loss epoch_loss = epoch_loss + loss.data loss.backward(retain_graph=True) self.optimizer.step() self.model.zero_grad() print(epoch_loss) self.model.eval() dev_batches = batching_list_instances(self.config, self.dev) test_batches = batching_list_instances(self.config, self.test) dev_metrics = self.evaluate_model_top(dev_batches, "dev", self.dev, self.triggers) test_metrics = self.evaluate_model_top(test_batches, "test", self.test, self.triggers) self.model.zero_grad() weaklabel, unlabel = self.weak_label_selftrain( unlabels, self.triggers) merged_data = merged_data + weaklabel unlabels = unlabel print(len(merged_data), len(weaklabel), len(unlabels)) return self.model
def train_one(config: Config, train_insts: List[Instance], dev_insts: List[Instance], model_name: str, test_insts: List[Instance] = None, config_name: str = None, result_filename: str = None) -> NNCRF: train_batches = batching_list_instances(config, train_insts) dev_batches = batching_list_instances(config, dev_insts) if test_insts: test_batches = simple_batching(config, test_insts) else: test_batches = None model = NNCRF(config) model.train() optimizer = get_optimizer(config, model) epoch = config.num_epochs best_dev_f1 = -1 saved_test_metrics = None for i in range(1, epoch + 1): epoch_loss = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) for index in np.random.permutation(len(train_batches)): model.train() loss = model(*train_batches[index]) epoch_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss, end_time - start_time), flush=True) model.eval() # metric is [precision, recall, f_score] dev_metrics = evaluate_model(config, model, "dev", dev_insts) if test_insts is not None: test_metrics = evaluate_model(config, model, "test", test_insts) if dev_metrics[2] > best_dev_f1: print("saving the best model...") best_dev_f1 = dev_metrics[2] if test_insts is not None: saved_test_metrics = test_metrics torch.save(model.state_dict(), model_name) # # Save the corresponding config as well. if config_name: f = open(config_name, 'wb') pickle.dump(config, f) f.close() if result_filename: write_results(result_filename, test_insts) model.zero_grad() if test_insts is not None: print(f"The best dev F1: {best_dev_f1}") print(f"The corresponding test: {saved_test_metrics}") return model
def train_model(config: Config, train_insts: List[List[Instance]], dev_insts: List[Instance]): train_num = len(train_insts) logging.info(("[Training Info] number of instances: %d" % (train_num))) dev_batches = batching_list_instances(config, dev_insts) # 验证集一直不会改变 model_folder = config.model_folder logging.info("[Training Info] The model will be saved to: %s" % (model_folder)) if not os.path.exists(model_folder): os.makedirs(model_folder) logging.info("-" * 20 + f" [Training Info] Running for {iter}th large iterations. " + "-" * 20) train_batches = batching_list_instances(config, train_insts) logging.info( "\n" + f"-------- [Training Info] Training fold {0}. Initialized from pre-trained Model -------" ) model_name = model_folder + f"/bert_crf_simple" train_one( config=config, train_batches=train_batches, # Initialize bert model dev_insts=dev_insts, dev_batches=dev_batches, model_name=model_name)
def train_model(config: Config, train_insts: List[List[Instance]], dev_insts: List[Instance]): train_num = sum([len(insts) for insts in train_insts]) logging.info(("[Training Info] number of instances: %d" % (train_num))) dev_batches = batching_list_instances(config, dev_insts) # 验证集一直不会改变 model_folder = config.model_folder logging.info("[Training Info] The model will be saved to: %s" % (model_folder)) if not os.path.exists(model_folder): os.makedirs(model_folder) num_outer_iterations = config.num_outer_iterations for iter in range(num_outer_iterations): logging.info("-" * 20 + f" [Training Info] Running for {iter}th large iterations. " + "-" * 20) train_batches = [batching_list_instances(config, insts) for insts in train_insts] for fold_id in range(2): # train 2 models in 2 folds logging.info("\n" + f"-------- [Training Info] Training fold {fold_id}. Initialized from pre-trained Model -------") model_name = model_folder + f"/bert_crf_{fold_id}" train_one(config=config, train_batches=train_batches[fold_id], # Initialize bert model dev_insts=dev_insts, dev_batches=dev_batches, model_name=model_name) logging.info("\n\n[Data Info] Assigning labels") # 模型0更新训练数据1,模型1更新训练数据0 for fold_id in range(2): model = load_model(config) model_name = model_folder + f"/bert_crf_{fold_id}" utils.load_checkpoint(os.path.join(model_name, 'best.pth.tar'), model) dev_metrics = evaluate_model(config, model, train_batches[fold_id], train_insts[fold_id]) logging.info(str(fold_id) + " self [train set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (dev_metrics[0], dev_metrics[1], dev_metrics[2])) dev_metrics = evaluate_model(config, model, train_batches[1-fold_id], train_insts[1-fold_id]) logging.info(str(fold_id) + " other [train set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (dev_metrics[0], dev_metrics[1], dev_metrics[2])) hard_constraint_predict(config=config, model=model, fold_batches=train_batches[1 - fold_id], folded_insts=train_insts[1 - fold_id]) # set a new label id, k is set to 2, so 1 - fold_id can be used # train the final model logging.info("\n\n") logging.info("-------- [Training Info] Training the final model-------- ") # merge the result data to training the final model all_train_insts = list(itertools.chain.from_iterable(train_insts)) logging.info("Initialized from pre-trained Model") model_name = model_folder + "/final_bert_crf" config_name = model_folder + "/config.conf" all_train_batches = batching_list_instances(config=config, insts=all_train_insts) train_one(config=config, train_batches=all_train_batches, dev_insts=dev_insts, dev_batches=dev_batches, model_name=model_name, config_name=config_name) # load the best final model # utils.load_checkpoint(os.path.join(model_name, 'best.pth.tar'), model) # model.eval() # logging.info("\n") # result = evaluate_model(config, model, dev_batches, "dev", dev_insts) logging.info("\n\n")
def train_model(self, num_epochs, train_data): batched_data = batching_list_instances(self.config, train_data) self.optimizer = get_optimizer(self.config, self.model, 'adam') criterion = nn.NLLLoss() for epoch in range(num_epochs): epoch_loss = 0 self.model.zero_grad() for index in tqdm(np.random.permutation(len(batched_data))): self.model.train() trig_rep, trig_type_probas, match_trig, match_sent = self.model( *batched_data[index][0:5], batched_data[index][-2]) trigger_loss = criterion(trig_type_probas, batched_data[index][-1]) soft_matching_loss = self.contrastive_loss( match_trig, match_sent, torch.stack([torch.tensor(1)] * trig_rep.size(0) + [torch.tensor(0)] * trig_rep.size(0))) loss = trigger_loss + soft_matching_loss epoch_loss = epoch_loss + loss.data loss.backward(retain_graph=True) self.optimizer.step() self.model.zero_grad() print(epoch_loss) self.test_model(train_data) self.model.zero_grad() return self.model
def test_model(self, test_data): batched_data = batching_list_instances(self.config, test_data) self.model.eval() predicted_list = [] target_list = [] match_target_list = [] matched_list = [] for index in tqdm(np.random.permutation(len(batched_data))): trig_rep, trig_type_probas, match_trig, match_sent = self.model( *batched_data[index][0:5], batched_data[index][-2]) trig_type_value, trig_type_predicted = torch.max( trig_type_probas, 1) target = batched_data[index][-1] target_list.extend(target.tolist()) predicted_list.extend(trig_type_predicted.tolist()) match_target_list.extend([torch.tensor(1)] * trig_rep.size(0) + [torch.tensor(0)] * trig_rep.size(0)) distances = (match_trig - match_sent).pow(2).sum(1) distances = torch.sqrt(distances) matched_list.extend((distances < 1.0).long().tolist()) print("trigger classification accuracy ", accuracy_score(predicted_list, target_list)) print("soft matching accuracy ", accuracy_score(matched_list, match_target_list))
def get_triggervec(self, data): batched_data = batching_list_instances(self.config, data) self.model.eval() logits_list = [] predicted_list = [] trigger_list = [] for index in tqdm(range(len(batched_data))): trig_rep, trig_type_probas, match_trig, match_sent = self.model( *batched_data[index][0:5], batched_data[index][-2]) trig_type_value, trig_type_predicted = torch.max( trig_type_probas, 1) ne_batch_insts = data[index * self.config.batch_size:(index + 1) * self.config.batch_size] for idx in range(len(trig_rep)): ne_batch_insts[idx].trigger_vec = trig_rep[idx] logits_list.extend(trig_rep) predicted_list.extend(trig_type_predicted) word_seq = batched_data[index][0] trigger_positions = batched_data[index][-2] for ws, tp in zip(word_seq, trigger_positions): trigger_list.append(" ".join(self.config.idx2word[ws[index]] for index in tp)) return logits_list, predicted_list, trigger_list
def evaluate_model(config: Config, model: NNCRF, name: str, insts: List[Instance]): ## evaluation batch_insts_ids = batching_list_instances(config, insts) metrics = np.asarray([0, 0, 0], dtype=int) batch_id = 0 batch_size = config.batch_size for batch in batch_insts_ids: one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] with torch.no_grad(): batch_max_scores, batch_max_ids = model.decode(batch) metrics += evaluate_batch_insts(batch_insts=one_batch_insts, batch_pred_ids=batch_max_ids, batch_gold_ids=batch[-1], word_seq_lens=batch[1], idx2label=config.idx2labels) batch_id += 1 p, total_predict, total_entity = metrics[0], metrics[1], metrics[2] precision = p * 1.0 / total_predict * 100 if total_predict != 0 else 0 recall = p * 1.0 / total_entity * 100 if total_entity != 0 else 0 fscore = 2.0 * precision * recall / ( precision + recall) if precision != 0 or recall != 0 else 0 print("[%s set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, precision, recall, fscore), flush=True) return [precision, recall, fscore]
def weak_label_selftrain(self, unlabeled_data, triggers): batched_data = batching_list_instances(self.config, unlabeled_data, is_soft=False, is_naive=True) weakly_labeled, unlabeled, confidence = self.weakly_labeling(batched_data, unlabeled_data, triggers) confidence_order = [i[0] for i in sorted(enumerate(confidence), key=lambda x: x[1])] threshold = int(len(confidence_order) * 0.01) high_confidence = confidence_order[:threshold] low_confidence = confidence_order[threshold:] final_weakly_labeled = [weakly_labeled[i] for i in high_confidence] unlabeled = unlabeled + [weakly_labeled[i] for i in low_confidence] return final_weakly_labeled, unlabeled
def update_train_insts(config: Config, train_insts: List[List[Instance]], model_names): # assign hard prediction to other folds if config.variant == "hard": print("\n\n[Data Info] Assigning labels for the HARD approach") else: print( "\n\n[Data Info] Performing marginal decoding to assign the marginals" ) train_batches = [ batching_list_instances(config, insts) for insts in train_insts ] for fold_id, folded_train_insts in enumerate(train_insts): model = NNCRF(config) model_name = model_names[fold_id] model.load_state_dict(torch.load(model_name)) predict_with_constraints( config=config, model=model, fold_batches=train_batches[1 - fold_id], folded_insts=train_insts[1 - fold_id]) ## set a new label id print("\n\n") return train_insts
def train_model(config: Config, epoch: int, train_insts: List[Instance], dev_insts: List[Instance], test_insts: List[Instance]): model = NNCRF(config) optimizer = get_optimizer(config, model) train_num = len(train_insts) print("number of instances: %d" % (train_num)) print(colored("[Shuffled] Shuffle the training instance ids", "red")) random.shuffle(train_insts) batched_data = batching_list_instances(config, train_insts) dev_batches = batching_list_instances(config, dev_insts) test_batches = batching_list_instances(config, test_insts) best_dev = [-1, 0] best_test = [-1, 0] model_folder = config.model_folder res_folder = "results" if os.path.exists("model_files/" + model_folder): raise FileExistsError( f"The folder model_files/{model_folder} exists. Please either delete it or create a new one " f"to avoid override.") model_path = f"model_files/{model_folder}/lstm_crf.m" config_path = f"model_files/{model_folder}/config.conf" res_path = f"{res_folder}/{model_folder}.results" print("[Info] The model will be saved to: %s.tar.gz" % (model_folder)) os.makedirs(f"model_files/{model_folder}", exist_ok=True) ## create model files. not raise error if exist os.makedirs(res_folder, exist_ok=True) no_incre_dev = 0 for i in tqdm(range(1, epoch + 1), desc="Epoch"): epoch_loss = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) for index in tqdm(np.random.permutation(len(batched_data)), desc="--training batch", total=len(batched_data)): model.train() loss = model(*batched_data[index]) epoch_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss, end_time - start_time), flush=True) model.eval() dev_metrics = evaluate_model(config, model, dev_batches, "dev", dev_insts) test_metrics = evaluate_model(config, model, test_batches, "test", test_insts) if dev_metrics[2] > best_dev[0]: print("saving the best model...") no_incre_dev = 0 best_dev[0] = dev_metrics[2] best_dev[1] = i best_test[0] = test_metrics[2] best_test[1] = i torch.save(model.state_dict(), model_path) # Save the corresponding config as well. f = open(config_path, 'wb') pickle.dump(config, f) f.close() write_results(res_path, test_insts) else: no_incre_dev += 1 model.zero_grad() if no_incre_dev >= config.max_no_incre: print( "early stop because there are %d epochs not increasing f1 on dev" % no_incre_dev) break print("Archiving the best Model...") with tarfile.open(f"model_files/{model_folder}/{model_folder}.tar.gz", "w:gz") as tar: tar.add(f"model_files/{model_folder}", arcname=os.path.basename(model_folder)) print("Finished archiving the models") print("The best dev: %.2f" % (best_dev[0])) print("The corresponding test: %.2f" % (best_test[0])) print("Final testing.") model.load_state_dict(torch.load(model_path)) model.eval() evaluate_model(config, model, test_batches, "test", test_insts) write_results(res_path, test_insts)
def train_model(self, num_epochs, train_data, output_count="", is_paint=True): batched_data, batch_insts = batching_list_instances( self.config, train_data) size = len(batched_data) // 10 self.optimizer = get_optimizer(self.config, self.model, self.config.optimizer) start = time.gmtime() losses = [] train_precisions = [] train_recalls = [] train_fscores = [] test_precisions = [] test_recalls = [] test_fscores = [] for epoch in range(num_epochs): epoch_loss = 0 self.model.zero_grad() print(f"------------------epoch: {(epoch+1)}------------------") for index in tqdm(np.random.permutation(len(batched_data))): self.model.train() sequence_loss = self.model(*batched_data[index][0:5], batched_data[index][-1], batch_insts[index]) loss = sequence_loss if index % size == 0: losses.append(loss.data) epoch_loss = epoch_loss + loss.data loss.backward(retain_graph=True) self.optimizer.step() self.model.zero_grad() print(epoch_loss) self.model.eval() # train_batches, train_insts = batching_list_instances(self.config, train_data) # train_metrics = self.evaluate_model(train_batches, "train", train_data, train_insts) # train_precisions.append(train_metrics[0]) # train_recalls.append(train_metrics[1]) # train_fscores.append(train_metrics[2]) test_batches, test_insts = batching_list_instances( self.config, self.test) test_metrics = self.evaluate_model(test_batches, "test", self.test, test_insts) test_precisions.append(test_metrics[0]) test_recalls.append(test_metrics[1]) test_fscores.append(test_metrics[2]) self.model.zero_grad() end = time.gmtime() start = time.strftime("%H:%M:%S", start).split(":") start = [str((int(start[0]) + 8) % 24)] + start[1:] end = time.strftime("%H:%M:%S", end).split(":") end = [str((int(end[0]) + 8) % 24)] + end[1:] print(f"startTime: {start}") print(f"endTime: {end}") # print("Train") # print("precisions", train_precisions) # print("recalls", train_recalls) # print("fscores:", train_fscores) # print("Test") print("precisions", test_precisions) print("recalls", test_recalls) print("fscores:", test_fscores) x = list(range(1, num_epochs + 1)) x_list = [ i / (len(losses) / num_epochs) for i in list(range(1, len(losses) + 1)) ] # for i, v in enumerate(epoch_list): # if ((i + 1) % train_plt_size) == 0: # epoch_list[i] = (i // train_plt_size) + 1 if is_paint: plt.figure() plt.grid(linestyle="--") # 设置背景网格线为虚线 ax = plt.gca() ax.spines['top'].set_visible(False) # 去掉上边框 ax.spines['right'].set_visible(False) # 去掉右边框 plt.plot(x, test_precisions, marker='o', color="red", label="precision", linewidth=1.5) plt.plot(x, test_recalls, marker='o', color="green", label="recall", linewidth=1.5) plt.plot(x, test_fscores, marker='o', color="blue", label="fscore", linewidth=1.5) plt.xlabel('epoch') plt.ylabel('Performance Percentile') plt.legend(loc=0, numpoints=1) leg = plt.gca().get_legend() ltext = leg.get_texts() plt.setp(ltext, fontsize=12, fontweight='bold') # 设置图例字体的大小和粗细 plt.savefig( f'per-{self.config.dataset}-{self.config.optimizer}-{num_epochs}-{self.config.learning_rate}-{output_count}.pdf', format='pdf') plt.savefig( f'per-{self.config.dataset}-{self.config.optimizer}-{num_epochs}-{self.config.learning_rate}-{output_count}.svg', format='svg') # plt.show() plt.figure() plt.grid(linestyle="--") # 设置背景网格线为虚线 ax = plt.gca() ax.spines['top'].set_visible(False) # 去掉上边框 ax.spines['right'].set_visible(False) # 去掉右边框 plt.plot(x_list, losses) plt.xlabel('epoch') plt.ylabel('Train Loss') plt.legend(loc=0, numpoints=1) leg = plt.gca().get_legend() ltext = leg.get_texts() plt.setp(ltext, fontsize=12, fontweight='bold') # 设置图例字体的大小和粗细 plt.savefig( f'loss-{self.config.dataset}-{self.config.optimizer}-{num_epochs}-{self.config.learning_rate}-{output_count}.pdf', format='pdf') plt.savefig( f'loss-{self.config.dataset}-{self.config.optimizer}-{num_epochs}-{self.config.learning_rate}-{output_count}.svg', format='svg') # plt.show() else: plt.figure() plt.grid(linestyle="--") # 设置背景网格线为虚线 ax = plt.gca() ax.spines['top'].set_visible(False) # 去掉上边框 ax.spines['right'].set_visible(False) # 去掉右边框 plt.plot(x_list, losses) plt.xlabel('epoch') plt.ylabel('Train Loss') plt.legend(loc=0, numpoints=1) leg = plt.gca().get_legend() ltext = leg.get_texts() plt.setp(ltext, fontsize=12, fontweight='bold') # 设置图例字体的大小和粗细 plt.savefig( f'loss-{self.config.dataset}-{self.config.optimizer}-{num_epochs}-{self.config.learning_rate}-{output_count}.pdf', format='pdf') plt.savefig( f'loss-{self.config.dataset}-{self.config.optimizer}-{num_epochs}-{self.config.learning_rate}-{output_count}.svg', format='svg') return self.model
def train_model(config: Config, train_insts: List[List[Instance]], dev_insts: List[Instance]): train_num = sum([len(insts) for insts in train_insts]) logging.info(("[Training Info] number of instances: %d" % (train_num))) # get the batched data dev_batches = batching_list_instances(config, dev_insts) model_folder = config.model_folder logging.info("[Training Info] The model will be saved to: %s" % (model_folder)) if not os.path.exists(model_folder): os.makedirs(model_folder) num_outer_iterations = config.num_outer_iterations for iter in range(num_outer_iterations): logging.info(f"[Training Info] Running for {iter}th large iterations.") model_names = [] # model names for each fold train_batches = [ batching_list_instances(config, insts) for insts in train_insts ] logging.info("length of train_insts:%d" % len(train_insts)) # train 2 models in 2 folds for fold_id, folded_train_insts in enumerate(train_insts): logging.info(f"[Training Info] Training fold {fold_id}.") # Initialize bert model logging.info("Initialized from pre-trained Model") model_name = model_folder + f"/bert_crf_{fold_id}" model_names.append(model_name) train_one(config=config, train_batches=train_batches[fold_id], dev_insts=dev_insts, dev_batches=dev_batches, model_name=model_name) # assign prediction to other folds logging.info("\n\n") logging.info("[Data Info] Assigning labels") # using the model trained in one fold to predict the result of another fold's data # and update the label of another fold with the predict result for fold_id, folded_train_insts in enumerate(train_insts): cfig_path = os.path.join(config.bert_model_dir, 'bert_config.json') cfig = BertConfig.from_json_file(cfig_path) cfig.device = config.device cfig.label2idx = config.label2idx cfig.label_size = config.label_size cfig.idx2labels = config.idx2labels model_name = model_folder + f"/bert_crf_{fold_id}" model = BertCRF(cfig=cfig) model.to(cfig.device) utils.load_checkpoint(os.path.join(model_name, 'best.pth.tar'), model) hard_constraint_predict( config=config, model=model, fold_batches=train_batches[1 - fold_id], folded_insts=train_insts[1 - fold_id] ) # set a new label id, k is set to 2, so 1 - fold_id can be used logging.info("\n\n") logging.info("[Training Info] Training the final model") # merge the result data to training the final model all_train_insts = list(itertools.chain.from_iterable(train_insts)) logging.info("Initialized from pre-trained Model") model_name = model_folder + "/final_bert_crf" config_name = model_folder + "/config.conf" all_train_batches = batching_list_instances(config=config, insts=all_train_insts) # train the final model model = train_one(config=config, train_batches=all_train_batches, dev_insts=dev_insts, dev_batches=dev_batches, model_name=model_name, config_name=config_name) # load the best final model utils.load_checkpoint(os.path.join(model_name, 'best.pth.tar'), model) model.eval() logging.info("\n") result = evaluate_model(config, model, dev_batches, "dev", dev_insts) logging.info("\n\n")
def train_model(config: Config, train_insts: List[List[Instance]], dev_insts: List[Instance], test_insts: List[Instance]): train_num = sum([len(insts) for insts in train_insts]) print("[Training Info] number of instances: %d" % (train_num)) dev_batches = batching_list_instances(config, dev_insts) test_batches = batching_list_instances(config, test_insts) best_dev = [-1, 0] best_test = [-1, 0] model_folder = config.model_folder res_folder = config.res_folder if os.path.exists(model_folder): raise FileExistsError( f"The folder {model_folder} exists. Please either delete it or create a new one " f"to avoid override.") print("[Training Info] The model will be saved to: %s.tar.gz" % (model_folder)) if not os.path.exists(model_folder): os.makedirs(model_folder) if not os.path.exists(res_folder): os.makedirs(res_folder) num_outer_iterations = config.num_outer_iterations SL_warmup = 2 for iter in range(num_outer_iterations): print(f"[Training Info] Running for {iter}th large iterations.") #change fold devision every two iter if (iter > 0 and iter // 2 != (iter - 1) // 2): train_insts = train_insts[0] + train_insts[1] random.shuffle(train_insts) num_insts_in_fold = math.ceil(len(train_insts) / config.num_folds) train_insts = [ train_insts[i * num_insts_in_fold:(i + 1) * num_insts_in_fold] for i in range(config.num_folds) ] model_names = [] #model names for each fold train_batches = [ batching_list_instances(config, insts) for insts in train_insts ] neg_noise_rate_gold, pos_noise_rate_gold = ratio_estimation( config, train_insts) if (config.neg_noise_rate >= 0): neg_noise_rate = config.neg_noise_rate else: neg_noise_rate = neg_noise_rate_gold if (config.pos_noise_rate >= 0): pos_noise_rate = config.pos_noise_rate else: pos_noise_rate = pos_noise_rate_gold if (iter > 0): neg_noise_rate = 0.005 pos_noise_rate = 0.15 print('negative noise rate: ' + str(neg_noise_rate)) print('positve noise rate: ' + str(pos_noise_rate)) if (config.warm_up_num == 0): rate_schedule_neg, rate_schedule_pos = gen_forget_rate( config.num_epochs, neg_noise_rate, pos_noise_rate, config.num_gradual_neg, config.num_gradual_pos) else: rate_schedule_neg, rate_schedule_pos = gen_forget_rate_warmup( config.num_epochs, neg_noise_rate, pos_noise_rate, config.warm_up_num, config.num_gradual_neg, config.num_gradual_pos) for fold_id, folded_train_insts in enumerate(train_insts): print(f"[Training Info] Training fold {fold_id}.") model_name = model_folder + f"/lstm_crf_{fold_id}.m" model_names.append(model_name) train_one(config=config, train_batches=train_batches[fold_id], dev_insts=dev_insts, dev_batches=dev_batches, model_name=model_name, rate_schedule_neg=rate_schedule_neg, rate_schedule_pos=rate_schedule_pos) # assign hard prediction to other folds print("\n\n[Data Info] Assigning labels for the HARD approach") for fold_id, folded_train_insts in enumerate(train_insts): model = NNCRF_sl(config) model_name = model_names[fold_id] model.load_state_dict(torch.load(model_name)) hard_constraint_predict( config=config, model=model, fold_batches=train_batches[1 - fold_id], folded_insts=train_insts[1 - fold_id]) ## set a new label id print("\n\n") print("[Training Info] Training the final model") all_train_insts = list(itertools.chain.from_iterable(train_insts)) model_name = model_folder + "/num_outer_iterations_final_lstm_crf.m" config_name = model_folder + "/num_outer_iterations_config.conf" res_name = res_folder + "/num_outer_iterations_lstm_crf.results".format( ) all_train_batches = batching_list_instances(config=config, insts=all_train_insts) neg_noise_rate, pos_noise_rate = ratio_estimation(config, train_insts) rate_schedule_neg = np.zeros(config.num_epochs) rate_schedule_pos = np.zeros(config.num_epochs) model = train_one(config=config, train_batches=all_train_batches, dev_insts=dev_insts, dev_batches=dev_batches, model_name=model_name, config_name=config_name, test_insts=test_insts, test_batches=test_batches, result_filename=res_name, rate_schedule_neg=rate_schedule_neg, rate_schedule_pos=rate_schedule_pos) print("Archiving the best Model...") with tarfile.open( model_folder + "/" + str(num_outer_iterations) + model_folder + ".tar.gz", "w:gz") as tar: tar.add(model_folder, arcname=os.path.basename(model_folder)) model.load_state_dict(torch.load(model_name)) model.eval() evaluate_model(config, model, test_batches, "test", test_insts) write_results(res_name, test_insts)
def train_model(config: Config, epoch: int, train_insts: List[Instance], dev_insts: List[Instance], test_insts: List[Instance]): model = NNCRF(config) optimizer = get_optimizer(config, model) train_num = len(train_insts) print("number of instances: %d" % (train_num)) print(colored("[Shuffled] Shuffle the training instance ids", "red")) random.shuffle(train_insts) batched_data = batching_list_instances(config, train_insts) dev_batches = batching_list_instances(config, dev_insts) test_batches = batching_list_instances(config, test_insts) best_dev = [-1, 0] best_test = [-1, 0] model_folder = config.model_folder res_folder = "results" if os.path.exists(model_folder): raise FileExistsError( f"The folder {model_folder} exists. Please either delete it or create a new one " f"to avoid override.") model_name = model_folder + "/lstm_crf.m".format() config_name = model_folder + "/config.conf" res_name = res_folder + "/lstm_crf.results".format() print("[Info] The model will be saved to: %s.tar.gz" % (model_folder)) if not os.path.exists(model_folder): os.makedirs(model_folder) if not os.path.exists(res_folder): os.makedirs(res_folder) for i in range(1, epoch + 1): epoch_loss = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) for index in np.random.permutation(len(batched_data)): model.train() loss = model(*batched_data[index]) epoch_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() loss.detach() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss, end_time - start_time), flush=True) model.eval() dev_metrics = evaluate_model(config, model, dev_batches, "dev", dev_insts) test_metrics = evaluate_model(config, model, test_batches, "test", test_insts) if test_metrics[1][2] > best_test[0]: print("saving the best model...") best_dev[0] = dev_metrics[1][2] best_dev[1] = i best_test[0] = test_metrics[1][2] best_test[1] = i torch.save(model.state_dict(), model_name) # Save the corresponding config as well. f = open(config_name, 'wb') pickle.dump(config, f) f.close() print('Exact\n') print_report(test_metrics[-2]) print('Overlap\n') print_report(test_metrics[-1]) write_results(res_name, test_insts) print("Archiving the best Model...") with tarfile.open(model_folder + "/" + model_folder + ".tar.gz", "w:gz") as tar: tar.add(model_folder, arcname=os.path.basename(model_folder)) model.zero_grad() print("Finished archiving the models") print("The best dev: %.2f" % (best_dev[0])) print("The corresponding test: %.2f" % (best_test[0])) print("Final testing.") model.load_state_dict(torch.load(model_name)) model.eval() evaluate_model(config, model, test_batches, "test", test_insts) write_results(res_name, test_insts)
def train_model(config: Config, epoch: int, train_insts: List[Instance], dev_insts: List[Instance], test_insts: List[Instance]): ### Data Processing Info train_num = len(train_insts) print("number of instances: %d" % (train_num)) print(colored("[Shuffled] Shuffle the training instance ids", "red")) random.shuffle(train_insts) batched_data = batching_list_instances(config, train_insts) dev_batches = batching_list_instances(config, dev_insts) test_batches = batching_list_instances(config, test_insts) if config.embedder_type == "normal": model = NNCRF(config) optimizer = get_optimizer(config, model) scheduler = None else: print( colored( f"[Model Info]: Working with transformers package from huggingface with {config.embedder_type}", 'red')) print( colored( f"[Optimizer Info]: You should be aware that you are using the optimizer from huggingface.", 'red')) print( colored( f"[Optimizer Info]: Change the optimier in transformers_util.py if you want to make some modifications.", 'red')) model = TransformersCRF(config) optimizer, scheduler = get_huggingface_optimizer_and_scheduler( config, model, num_training_steps=len(batched_data) * epoch, weight_decay=0.0, eps=1e-8, warmup_step=0) print( colored(f"[Optimizer Info] Modify the optimizer info as you need.", 'red')) print(optimizer) model.to(config.device) best_dev = [-1, 0] best_test = [-1, 0] model_folder = config.model_folder res_folder = "results" if os.path.exists("model_files/" + model_folder): raise FileExistsError( f"The folder model_files/{model_folder} exists. Please either delete it or create a new one " f"to avoid override.") model_path = f"model_files/{model_folder}/lstm_crf.m" config_path = f"model_files/{model_folder}/config.conf" res_path = f"{res_folder}/{model_folder}.results" print("[Info] The model will be saved to: %s.tar.gz" % (model_folder)) os.makedirs(f"model_files/{model_folder}", exist_ok=True) ## create model files. not raise error if exist os.makedirs(res_folder, exist_ok=True) no_incre_dev = 0 print( colored( f"[Train Info] Start training, you have set to stop if performace not increase for {config.max_no_incre} epochs", 'red')) for i in tqdm(range(1, epoch + 1), desc="Epoch"): epoch_loss = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) for index in tqdm(np.random.permutation(len(batched_data)), desc="--training batch", total=len(batched_data)): model.train() loss = model(**batched_data[index]) epoch_loss += loss.item() loss.backward() if config.max_grad_norm > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm) optimizer.step() optimizer.zero_grad() model.zero_grad() if scheduler is not None: scheduler.step() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss, end_time - start_time), flush=True) model.eval() dev_metrics = evaluate_model(config, model, dev_batches, "dev", dev_insts) test_metrics = evaluate_model(config, model, test_batches, "test", test_insts) if dev_metrics[2] > best_dev[0]: print("saving the best model...") no_incre_dev = 0 best_dev[0] = dev_metrics[2] best_dev[1] = i best_test[0] = test_metrics[2] best_test[1] = i torch.save(model.state_dict(), model_path) # Save the corresponding config as well. f = open(config_path, 'wb') pickle.dump(config, f) f.close() write_results(res_path, test_insts) else: no_incre_dev += 1 model.zero_grad() if no_incre_dev >= config.max_no_incre: print( "early stop because there are %d epochs not increasing f1 on dev" % no_incre_dev) break print("Archiving the best Model...") with tarfile.open(f"model_files/{model_folder}/{model_folder}.tar.gz", "w:gz") as tar: tar.add(f"model_files/{model_folder}", arcname=os.path.basename(model_folder)) print("Finished archiving the models") print("The best dev: %.2f" % (best_dev[0])) print("The corresponding test: %.2f" % (best_test[0])) print("Final testing.") model.load_state_dict(torch.load(model_path)) model.eval() evaluate_model(config, model, test_batches, "test", test_insts) write_results(res_path, test_insts)
def train_model(config: Config, train_insts: List[List[Instance]], dev_insts: List[Instance], test_insts: List[Instance]): train_num = sum([len(insts) for insts in train_insts]) print(f"[Training Info] number of instances: {train_num:d}") dev_batches = batching_list_instances(config, dev_insts) test_batches = batching_list_instances(config, test_insts) best_dev = [-1, 0] best_test = [-1, 0] model_folder = config.model_folder res_folder = "results" # if os.path.exists(model_folder): # raise FileExistsError(f"The folder {model_folder} exists. Please either delete it or create a new one " # f"to avoid override.") print(f"[Training Info] The model will be saved to: {model_folder}.tar.gz") if not os.path.exists(model_folder): os.makedirs(model_folder) if not os.path.exists(res_folder): os.makedirs(res_folder) num_outer_iterations = config.num_outer_iterations for iter in range(num_outer_iterations): print(f"[Training Info] Running for {iter}th large iterations.") model_names = [] # model names for each fold train_batches = [batching_list_instances(config, insts) for insts in train_insts] for fold_id, folded_train_insts in enumerate(train_insts): print(f"[Training Info] Training fold {fold_id}.") model_name = model_folder + f"/lstm_crf_{fold_id}.m" model_names.append(model_name) train_one(config=config, train_batches=train_batches[fold_id], dev_insts=dev_insts, dev_batches=dev_batches, model_name=model_name) # assign hard prediction to other folds print("\n\n[Data Info] Assigning labels for the HARD approach") for fold_id, folded_train_insts in enumerate(train_insts): model = NNCRF(config) model_name = model_names[fold_id] model.load_state_dict(torch.load(model_name)) hard_constraint_predict(config=config, model=model, fold_batches=train_batches[1 - fold_id], folded_insts=train_insts[1 - fold_id]) # set a new label id print("\n\n") print("[Training Info] Training the final model") all_train_insts = list(itertools.chain.from_iterable(train_insts)) model_name = model_folder + "/final_lstm_crf.m" config_name = model_folder + "/config.conf" res_name = res_folder + "/lstm_crf.results".format() all_train_batches = batching_list_instances(config=config, insts=all_train_insts) model = train_one(config=config, train_batches=all_train_batches, dev_insts=dev_insts, dev_batches=dev_batches, model_name=model_name, config_name=config_name, test_insts=test_insts, test_batches=test_batches, result_filename=res_name) print("Archiving the best Model...") with tarfile.open(model_folder + "/" + model_folder + ".tar.gz", "w:gz") as tar: tar.add(model_folder, arcname=os.path.basename(model_folder)) # print("The best dev: %.2f" % (best_dev[0])) # print("The corresponding test: %.2f" % (best_test[0])) # print("Final testing.") model.load_state_dict(torch.load(model_name)) model.eval() evaluate_model(config, model, test_batches, "test", test_insts) write_results(res_name, test_insts)