def predict_with_constraints(config: Config, model: NNCRF, fold_batches: List[Tuple], folded_insts: List[Instance]): batch_id = 0 batch_size = config.batch_size model.eval() for batch in fold_batches: one_batch_insts = folded_insts[batch_id * batch_size:(batch_id + 1) * batch_size] word_seq_lens = batch[1].cpu().numpy() if config.variant == "hard": with torch.no_grad(): batch_max_scores, batch_max_ids = model.decode(batch) batch_max_ids = batch_max_ids.cpu().numpy() for idx in range(len(batch_max_ids)): length = word_seq_lens[idx] prediction = batch_max_ids[idx][:length].tolist() prediction = prediction[::-1] one_batch_insts[idx].output_ids = prediction else: ## means soft model, assign soft probabilit with torch.no_grad(): marginals = model.get_marginal(batch) marginals = marginals.cpu().numpy() for idx in range(len(marginals)): length = word_seq_lens[idx] one_batch_insts[idx].marginals = marginals[idx, :length, :] batch_id += 1
def hard_constraint_predict(config: Config, model: NNCRF, fold_batches: List[Tuple], folded_insts: List[Instance], model_type: str = "hard"): batch_id = 0 batch_size = config.batch_size model.eval() for batch in fold_batches: one_batch_insts = folded_insts[batch_id * batch_size:(batch_id + 1) * batch_size] _, batch_max_ids = model.decode(batch) batch_max_ids = batch_max_ids.cpu().numpy() word_seq_lens = batch[1].cpu().numpy() for idx in range(len(batch_max_ids)): length = word_seq_lens[idx] prediction = batch_max_ids[idx][:length].tolist() prediction = prediction[::-1] one_batch_insts[idx].output_ids = prediction batch_id += 1
def evaluate_model(config: Config, model: NNCRF, batch_insts_ids, name: str, insts: List[Instance]): ## evaluation metrics = np.asarray([0, 0, 0], dtype=int) batch_id = 0 batch_size = config.batch_size for batch in batch_insts_ids: one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] batch_max_scores, batch_max_ids = model.decode(batch) metrics += evaluate_batch_insts(batch_insts=one_batch_insts, batch_pred_ids=batch_max_ids, batch_gold_ids=batch[-1], word_seq_lens=batch[1], idx2label=config.idx2labels) batch_id += 1 p, total_predict, total_entity = metrics[0], metrics[1], metrics[2] precision = p * 1.0 / total_predict * 100 if total_predict != 0 else 0 recall = p * 1.0 / total_entity * 100 if total_entity != 0 else 0 fscore = 2.0 * precision * recall / ( precision + recall) if precision != 0 or recall != 0 else 0 print("[%s set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, precision, recall, fscore), flush=True) return [precision, recall, fscore]
def update_train_insts(config: Config, train_insts: List[List[Instance]], model_names): # assign hard prediction to other folds if config.variant == "hard": print("\n\n[Data Info] Assigning labels for the HARD approach") else: print( "\n\n[Data Info] Performing marginal decoding to assign the marginals" ) train_batches = [ batching_list_instances(config, insts) for insts in train_insts ] for fold_id, folded_train_insts in enumerate(train_insts): model = NNCRF(config) model_name = model_names[fold_id] model.load_state_dict(torch.load(model_name)) predict_with_constraints( config=config, model=model, fold_batches=train_batches[1 - fold_id], folded_insts=train_insts[1 - fold_id]) ## set a new label id print("\n\n") return train_insts
def evaluate_model(config: Config, model: NNCRF, batch_insts_ids, name: str, insts: List[Instance]): ## evaluation p_dict, total_predict_dict, total_entity_dict = Counter(), Counter( ), Counter() batch_id = 0 batch_size = config.batch_size for batch in batch_insts_ids: one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] batch_max_scores, batch_max_ids = model.decode(batch) batch_p, batch_predict, batch_total = evaluate_batch_insts( one_batch_insts, batch_max_ids, batch[-1], batch[1], config.idx2labels, config.use_crf_layer) p_dict += batch_p total_predict_dict += batch_predict total_entity_dict += batch_total batch_id += 1 for key in total_entity_dict: precision_key, recall_key, fscore_key = get_metric( p_dict[key], total_entity_dict[key], total_predict_dict[key]) print("[%s] Prec.: %.2f, Rec.: %.2f, F1: %.2f" % (key, precision_key, recall_key, fscore_key)) if key == config.new_type: precision_new_type, recall_new_type, fscore_new_type = get_metric( p_dict[key], total_entity_dict[key], total_predict_dict[key]) total_p = sum(list(p_dict.values())) total_predict = sum(list(total_predict_dict.values())) total_entity = sum(list(total_entity_dict.values())) precision, recall, fscore = get_metric(total_p, total_entity, total_predict) print(colored( "[%s set Total] Prec.: %.2f, Rec.: %.2f, F1: %.2f" % (name, precision, recall, fscore), 'blue'), flush=True) if config.choose_by_new_type: return [precision_new_type, recall_new_type, fscore_new_type] else: return [precision, recall, fscore]
def evaluate_model(config: Config, model: NNCRF, batch_insts_ids, name: str, insts: List[Instance]): ## evaluation metrics_exact = np.asarray([0, 0, 0], dtype=int) metrics_overlap = np.asarray([0, 0, 0], dtype=int) dict_exact = {} dict_overlap = {} batch_id = 0 batch_size = config.batch_size for batch in batch_insts_ids: one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] batch_max_scores, batch_max_ids = model.decode(batch) results = evaluate_batch_insts(one_batch_insts, batch_max_ids, batch[-1], batch[1], config.idx2labels) metrics_exact += results[0] metrics_overlap += results[1] for key in results[2]: if key not in dict_exact: dict_exact[key] = [0, 0, 0] dict_exact[key][0] += results[2][key][0] dict_exact[key][1] += results[2][key][1] dict_exact[key][2] += results[2][key][2] for key in results[3]: if key not in dict_overlap: dict_overlap[key] = [0, 0, 0] dict_overlap[key][0] += results[3][key][0] dict_overlap[key][1] += results[3][key][1] dict_overlap[key][2] += results[3][key][2] batch_id += 1 p_exact, total_predict, total_entity = metrics_exact[0], metrics_exact[ 1], metrics_exact[2] precision_exact = p_exact * 1.0 / total_predict * 100 if total_predict != 0 else 0 recall_exact = p_exact * 1.0 / total_entity * 100 if total_entity != 0 else 0 fscore_exact = 2.0 * precision_exact * recall_exact / ( precision_exact + recall_exact) if precision_exact != 0 or recall_exact != 0 else 0 print("[%s set - Exact] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, precision_exact, recall_exact, fscore_exact), flush=True) #print_report(dict_exact) p_overlap, total_predict, total_entity = metrics_overlap[ 0], metrics_overlap[1], metrics_overlap[2] precision_overlap = p_overlap * 1.0 / total_predict * 100 if total_predict != 0 else 0 recall_overlap = p_overlap * 1.0 / total_entity * 100 if total_entity != 0 else 0 fscore_overlap = 2.0 * precision_overlap * recall_overlap / ( precision_overlap + recall_overlap) if precision_overlap != 0 or recall_overlap != 0 else 0 print("[%s set - Overlap] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, precision_overlap, recall_overlap, fscore_overlap), flush=True) #print_report(dict_overlap) return [precision_exact, recall_exact, fscore_exact], [precision_overlap, recall_overlap, fscore_overlap], dict_exact, dict_overlap
def train_model(config: Config, epoch: int, train_insts: List[Instance], dev_insts: List[Instance], test_insts: List[Instance]): model = NNCRF(config) optimizer = get_optimizer(config, model) train_num = len(train_insts) print("number of instances: %d" % (train_num)) print(colored("[Shuffled] Shuffle the training instance ids", "red")) random.shuffle(train_insts) batched_data = batching_list_instances(config, train_insts) dev_batches = batching_list_instances(config, dev_insts) test_batches = batching_list_instances(config, test_insts) best_dev = [-1, 0] best_test = [-1, 0] model_folder = config.model_folder res_folder = "results" if os.path.exists(model_folder): raise FileExistsError( f"The folder {model_folder} exists. Please either delete it or create a new one " f"to avoid override.") model_name = model_folder + "/lstm_crf.m".format() config_name = model_folder + "/config.conf" res_name = res_folder + "/lstm_crf.results".format() print("[Info] The model will be saved to: %s.tar.gz" % (model_folder)) if not os.path.exists(model_folder): os.makedirs(model_folder) if not os.path.exists(res_folder): os.makedirs(res_folder) for i in range(1, epoch + 1): epoch_loss = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) for index in np.random.permutation(len(batched_data)): model.train() loss = model(*batched_data[index]) epoch_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() loss.detach() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss, end_time - start_time), flush=True) model.eval() dev_metrics = evaluate_model(config, model, dev_batches, "dev", dev_insts) test_metrics = evaluate_model(config, model, test_batches, "test", test_insts) if test_metrics[1][2] > best_test[0]: print("saving the best model...") best_dev[0] = dev_metrics[1][2] best_dev[1] = i best_test[0] = test_metrics[1][2] best_test[1] = i torch.save(model.state_dict(), model_name) # Save the corresponding config as well. f = open(config_name, 'wb') pickle.dump(config, f) f.close() print('Exact\n') print_report(test_metrics[-2]) print('Overlap\n') print_report(test_metrics[-1]) write_results(res_name, test_insts) print("Archiving the best Model...") with tarfile.open(model_folder + "/" + model_folder + ".tar.gz", "w:gz") as tar: tar.add(model_folder, arcname=os.path.basename(model_folder)) model.zero_grad() print("Finished archiving the models") print("The best dev: %.2f" % (best_dev[0])) print("The corresponding test: %.2f" % (best_test[0])) print("Final testing.") model.load_state_dict(torch.load(model_name)) model.eval() evaluate_model(config, model, test_batches, "test", test_insts) write_results(res_name, test_insts)
def train_model(config: Config, epoch: int, train_insts: List[Instance], dev_insts: List[Instance], test_insts: List[Instance]): model = NNCRF(config) optimizer = get_optimizer(config, model) train_num = len(train_insts) print("number of instances: %d" % (train_num)) print(colored("[Shuffled] Shuffle the training instance ids", "red")) random.shuffle(train_insts) batched_data = batching_list_instances(config, train_insts) dev_batches = batching_list_instances(config, dev_insts) test_batches = batching_list_instances(config, test_insts) best_dev = [-1, 0] best_test = [-1, 0] model_folder = config.model_folder res_folder = "results" if os.path.exists("model_files/" + model_folder): raise FileExistsError( f"The folder model_files/{model_folder} exists. Please either delete it or create a new one " f"to avoid override.") model_path = f"model_files/{model_folder}/lstm_crf.m" config_path = f"model_files/{model_folder}/config.conf" res_path = f"{res_folder}/{model_folder}.results" print("[Info] The model will be saved to: %s.tar.gz" % (model_folder)) os.makedirs(f"model_files/{model_folder}", exist_ok=True) ## create model files. not raise error if exist os.makedirs(res_folder, exist_ok=True) no_incre_dev = 0 for i in tqdm(range(1, epoch + 1), desc="Epoch"): epoch_loss = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) for index in tqdm(np.random.permutation(len(batched_data)), desc="--training batch", total=len(batched_data)): model.train() loss = model(*batched_data[index]) epoch_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() end_time = time.time() print("Epoch %d: %.5f, Time is %.2fs" % (i, epoch_loss, end_time - start_time), flush=True) model.eval() dev_metrics = evaluate_model(config, model, dev_batches, "dev", dev_insts) test_metrics = evaluate_model(config, model, test_batches, "test", test_insts) if dev_metrics[2] > best_dev[0]: print("saving the best model...") no_incre_dev = 0 best_dev[0] = dev_metrics[2] best_dev[1] = i best_test[0] = test_metrics[2] best_test[1] = i torch.save(model.state_dict(), model_path) # Save the corresponding config as well. f = open(config_path, 'wb') pickle.dump(config, f) f.close() write_results(res_path, test_insts) else: no_incre_dev += 1 model.zero_grad() if no_incre_dev >= config.max_no_incre: print( "early stop because there are %d epochs not increasing f1 on dev" % no_incre_dev) break print("Archiving the best Model...") with tarfile.open(f"model_files/{model_folder}/{model_folder}.tar.gz", "w:gz") as tar: tar.add(f"model_files/{model_folder}", arcname=os.path.basename(model_folder)) print("Finished archiving the models") print("The best dev: %.2f" % (best_dev[0])) print("The corresponding test: %.2f" % (best_test[0])) print("Final testing.") model.load_state_dict(torch.load(model_path)) model.eval() evaluate_model(config, model, test_batches, "test", test_insts) write_results(res_path, test_insts)
def train_model(config: Config, train_insts: List[List[Instance]], dev_insts: List[Instance], test_insts: List[Instance]): train_num = sum([len(insts) for insts in train_insts]) print(f"[Training Info] number of instances: {train_num:d}") dev_batches = batching_list_instances(config, dev_insts) test_batches = batching_list_instances(config, test_insts) best_dev = [-1, 0] best_test = [-1, 0] model_folder = config.model_folder res_folder = "results" # if os.path.exists(model_folder): # raise FileExistsError(f"The folder {model_folder} exists. Please either delete it or create a new one " # f"to avoid override.") print(f"[Training Info] The model will be saved to: {model_folder}.tar.gz") if not os.path.exists(model_folder): os.makedirs(model_folder) if not os.path.exists(res_folder): os.makedirs(res_folder) num_outer_iterations = config.num_outer_iterations for iter in range(num_outer_iterations): print(f"[Training Info] Running for {iter}th large iterations.") model_names = [] # model names for each fold train_batches = [batching_list_instances(config, insts) for insts in train_insts] for fold_id, folded_train_insts in enumerate(train_insts): print(f"[Training Info] Training fold {fold_id}.") model_name = model_folder + f"/lstm_crf_{fold_id}.m" model_names.append(model_name) train_one(config=config, train_batches=train_batches[fold_id], dev_insts=dev_insts, dev_batches=dev_batches, model_name=model_name) # assign hard prediction to other folds print("\n\n[Data Info] Assigning labels for the HARD approach") for fold_id, folded_train_insts in enumerate(train_insts): model = NNCRF(config) model_name = model_names[fold_id] model.load_state_dict(torch.load(model_name)) hard_constraint_predict(config=config, model=model, fold_batches=train_batches[1 - fold_id], folded_insts=train_insts[1 - fold_id]) # set a new label id print("\n\n") print("[Training Info] Training the final model") all_train_insts = list(itertools.chain.from_iterable(train_insts)) model_name = model_folder + "/final_lstm_crf.m" config_name = model_folder + "/config.conf" res_name = res_folder + "/lstm_crf.results".format() all_train_batches = batching_list_instances(config=config, insts=all_train_insts) model = train_one(config=config, train_batches=all_train_batches, dev_insts=dev_insts, dev_batches=dev_batches, model_name=model_name, config_name=config_name, test_insts=test_insts, test_batches=test_batches, result_filename=res_name) print("Archiving the best Model...") with tarfile.open(model_folder + "/" + model_folder + ".tar.gz", "w:gz") as tar: tar.add(model_folder, arcname=os.path.basename(model_folder)) # print("The best dev: %.2f" % (best_dev[0])) # print("The corresponding test: %.2f" % (best_test[0])) # print("Final testing.") model.load_state_dict(torch.load(model_name)) model.eval() evaluate_model(config, model, test_batches, "test", test_insts) write_results(res_name, test_insts)
def train_one(config: Config, train_batches: List[Tuple], dev_insts: List[Instance], dev_batches: List[Tuple], model_name: str, test_insts: List[Instance] = None, test_batches: List[Tuple] = None, config_name: str = None, result_filename: str = None) -> NNCRF: model = NNCRF(config) model.train() optimizer = get_optimizer(config, model) epoch = config.num_epochs best_dev_f1 = -1 saved_test_metrics = None for i in range(1, epoch + 1): epoch_loss = 0 start_time = time.time() model.zero_grad() if config.optimizer.lower() == "sgd": optimizer = lr_decay(config, optimizer, i) for index in np.random.permutation(len(train_batches)): model.train() loss = model(*train_batches[index]) epoch_loss += loss.item() loss.backward() optimizer.step() model.zero_grad() end_time = time.time() print(f"Epoch {i:d}: {epoch_loss:.5f}, Time is {end_time - start_time:.2f}s", flush=True) model.eval() # metric is [precision, recall, f_score] dev_metrics = evaluate_model(config, model, dev_batches, "dev", dev_insts) if test_insts is not None: test_metrics = evaluate_model(config, model, test_batches, "test", test_insts) if dev_metrics[2] > best_dev_f1: print("saving the best model...") best_dev_f1 = dev_metrics[2] if test_insts is not None: saved_test_metrics = test_metrics torch.save(model.state_dict(), model_name) # Save the corresponding config as well. if config_name: f = open(config_name, 'wb') pickle.dump(config, f) f.close() if result_filename: write_results(result_filename, test_insts) model.zero_grad() if test_insts is not None: print(f"The best dev F1: {best_dev_f1}") print(f"The corresponding test: {saved_test_metrics}") return model
def evaluate_model(config: Config, model: NNCRF, batch_insts_ids, name: str, insts: List[Instance]): ## evaluation i = 0 metrics = np.asarray([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], dtype=int) batch_id = 0 batch_size = config.batch_size for batch in batch_insts_ids: i += 1 flag = 0 one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] batch_max_scores, batch_max_ids = model.decode(batch) if i == len(batch_insts_ids) - 1: flag = 1 metrics += evaluate_batch_insts(one_batch_insts, batch_max_ids, batch[-1], batch[1], config.idx2labels, config.use_crf_layer, config.test_kind, flag) batch_id += 1 p, p_special, total_predict, total_entity, special_predict, special_entity = metrics[ 0], metrics[1], metrics[2], metrics[3], metrics[4], metrics[5] wrong_prediction = {} wrong_prediction["BLater"] = metrics[6] wrong_prediction["BEarlier"] = metrics[7] wrong_prediction["ILater"] = metrics[8] wrong_prediction["IEarlier"] = metrics[9] wrong_prediction["O2misc"] = metrics[10] wrong_prediction["misc2O"] = metrics[11] wrong_prediction[1] = metrics[12] wrong_prediction[2] = metrics[13] wrong_prediction[3] = metrics[14] wrong_prediction[4] = metrics[15] wrong_prediction[5] = metrics[16] wrong_prediction[6] = metrics[17] wrong_prediction[7] = metrics[18] wrong_prediction["length1"] = metrics[19] wrong_prediction["length2"] = metrics[20] wrong_prediction["length3"] = metrics[21] wrong_prediction["length4"] = metrics[22] wrong_prediction["length5"] = metrics[23] wrong_prediction["length6"] = metrics[24] wrong_prediction["length7"] = metrics[25] precision = p * 1.0 / total_predict * 100 if total_predict != 0 else 0 recall = p * 1.0 / total_entity * 100 if total_entity != 0 else 0 fscore = 2.0 * precision * recall / ( precision + recall) if precision != 0 or recall != 0 else 0 precision_special = p_special * 1.0 / special_predict * 100 if special_predict != 0 else 0 recall_special = p_special * 1.0 / special_entity * 100 if special_entity != 0 else 0 fscore_special = 2.0 * precision_special * recall_special / (precision_special + recall_special) \ if precision_special != 0 or recall_special != 0 else 0 print("---[%s set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, precision, recall, fscore), flush=True) print("---[%s of %s set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (config.test_kind, name, precision_special, recall_special, fscore_special), flush=True) print(p_special, special_entity, special_predict) for inn in wrong_prediction.keys(): if str(inn).startswith("length"): print(wrong_prediction[inn], end=" ") print() print(wrong_prediction) if name == "test": print() return [precision, recall, fscore]