def evaluate(model, criterion, data_loader, file_path, mode): """ mode eval: eval on development set and compute P/R/F1, called between training. mode predict: eval on development / test set, then write predictions to \ predict_test.json and predict_test.json.zip \ under args.data_path dir for later submission or evaluation. """ example_all = [] with open(file_path, "r", encoding="utf-8") as fp: for line in fp: example_all.append(json.loads(line)) id2spo_path = os.path.join(os.path.dirname(file_path), "id2spo.json") with open(id2spo_path, 'r', encoding='utf8') as fp: id2spo = json.load(fp) model.eval() loss_all = 0 eval_steps = 0 formatted_outputs = [] current_idx = 0 for batch in tqdm(data_loader, total=len(data_loader)): eval_steps += 1 input_ids, seq_len, tok_to_orig_start_index, tok_to_orig_end_index, labels = batch logits = model(input_ids=input_ids) mask = (input_ids != 0).logical_and((input_ids != 1)).logical_and( (input_ids != 2)) loss = criterion(logits, labels, mask) loss_all += loss.numpy().item() probs = F.sigmoid(logits) logits_batch = probs.numpy() seq_len_batch = seq_len.numpy() tok_to_orig_start_index_batch = tok_to_orig_start_index.numpy() tok_to_orig_end_index_batch = tok_to_orig_end_index.numpy() formatted_outputs.extend( decoding(example_all[current_idx:current_idx + len(logits)], id2spo, logits_batch, seq_len_batch, tok_to_orig_start_index_batch, tok_to_orig_end_index_batch)) current_idx = current_idx + len(logits) loss_avg = loss_all / eval_steps print("eval loss: %f" % (loss_avg)) if mode == "predict": predict_file_path = os.path.join(args.data_path, 'predictions.json') else: predict_file_path = os.path.join(args.data_path, 'predict_eval.json') predict_zipfile_path = write_prediction_results(formatted_outputs, predict_file_path) if mode == "eval": precision, recall, f1 = get_precision_recall_f1( file_path, predict_zipfile_path) os.system('rm {} {}'.format(predict_file_path, predict_zipfile_path)) return precision, recall, f1 elif mode != "predict": raise Exception("wrong mode for eval func")
def evaluate(model, criterion, data_loader, test_loss, file_path, mode): """ mode eval: eval on development set and compute P/R/F1, called between training. mode predict: eval on development / test set, then write predictions to \ predict_test.json and predict_test.json.zip \ under args.data_path dir for later submission or evaluation. """ probs_all = None seq_len_all = None tok_to_orig_start_index_all = None tok_to_orig_end_index_all = None loss_all = 0 eval_steps = 0 for batch in tqdm(data_loader): eval_steps += 1 input_ids, seq_len, tok_to_orig_start_index, tok_to_orig_end_index, labels = batch logits = model(input_ids=input_ids) mask = (input_ids != 0).logical_and((input_ids != 1)).logical_and((input_ids != 2)) loss = criterion((logits, labels, mask)) loss_all += test_loss(loss).result() probs = logits if probs_all is None: probs_all = probs.numpy() seq_len_all = seq_len.numpy() tok_to_orig_start_index_all = tok_to_orig_start_index.numpy() tok_to_orig_end_index_all = tok_to_orig_end_index.numpy() else: probs_all = np.append(probs_all, probs.numpy(), axis=0) seq_len_all = np.append(seq_len_all, seq_len.numpy(), axis=0) tok_to_orig_start_index_all = np.append( tok_to_orig_start_index_all, tok_to_orig_start_index.numpy(), axis=0) tok_to_orig_end_index_all = np.append( tok_to_orig_end_index_all, tok_to_orig_end_index.numpy(), axis=0) loss_avg = loss_all / eval_steps print("eval loss: %f" % (loss_avg)) id2spo_path = os.path.join(os.path.dirname(file_path), "id2spo.json") with open(id2spo_path, 'r', encoding='utf8') as fp: id2spo = json.load(fp) formatted_outputs = decoding(file_path, id2spo, probs_all, seq_len_all, tok_to_orig_start_index_all, tok_to_orig_end_index_all) if mode == "predict": predict_file_path = os.path.join(args.data_path, 'predictions.json') else: predict_file_path = os.path.join(args.data_path, 'predict_eval.json') predict_zipfile_path = write_prediction_results(formatted_outputs, predict_file_path) if mode == "eval": precision, recall, f1 = get_precision_recall_f1(file_path, predict_zipfile_path) os.system('rm {} {}'.format(predict_file_path, predict_zipfile_path)) return precision, recall, f1 elif mode != "predict": raise Exception("wrong mode for eval func")
def evaluate(model, criterion, data_loader, file_path, mode, logger): """ mode eval: eval on development set and compute P/R/F1, called between training. mode predict: eval on development / test set, then write predictions to \ predict_test.json and predict_test.json.zip \ under args.data_path dir for later submission or evaluation. """ model.eval() probs_all = None seq_len_all = None tok_to_orig_start_index_all = None tok_to_orig_end_index_all = None loss_all = 0 eval_steps = 0 logger.info( "\n----------------------------------IN Evaluate func-----------------------------------\n" ) for batch in tqdm(data_loader, total=len(data_loader)): eval_steps += 1 input_ids, seq_len, tok_to_orig_start_index, tok_to_orig_end_index, labels = batch if args.device == 'cuda': input_ids = input_ids.cuda() labels = labels.cuda() logits = model(input_ids=input_ids) mask = (input_ids != 0) & (input_ids != 1) & (input_ids != 2) loss = criterion(logits, labels, mask) loss_all += loss.detach().cpu().numpy().item() probs = torch.sigmoid(logits).cpu() if probs_all is None: probs_all = probs.numpy() seq_len_all = seq_len.numpy() tok_to_orig_start_index_all = tok_to_orig_start_index.numpy() tok_to_orig_end_index_all = tok_to_orig_end_index.numpy() else: probs_all = np.append(probs_all, probs.numpy(), axis=0) seq_len_all = np.append(seq_len_all, seq_len.numpy(), axis=0) tok_to_orig_start_index_all = np.append( tok_to_orig_start_index_all, tok_to_orig_start_index.numpy(), axis=0) tok_to_orig_end_index_all = np.append( tok_to_orig_end_index_all, tok_to_orig_end_index.numpy(), axis=0) loss_avg = loss_all / eval_steps logger.info("eval loss: %f" % (loss_avg)) id2spo_path = os.path.join(os.path.dirname(file_path), "id2spo.json") with open(id2spo_path, 'r', encoding='utf8') as fp: id2spo = json.load(fp) formatted_outputs = decoding(file_path, id2spo, probs_all, seq_len_all, tok_to_orig_start_index_all, tok_to_orig_end_index_all) if mode == "predict": predict_file_path = os.path.join(args.data_path, 'predictions.json') else: predict_file_path = os.path.join(args.data_path, 'predict_eval.json') predict_zipfile_path = write_prediction_results(formatted_outputs, predict_file_path) if mode == "eval": precision, recall, f1 = get_precision_recall_f1( file_path, predict_zipfile_path) os.system('rm {} {}'.format(predict_file_path, predict_zipfile_path)) return precision, recall, f1 elif mode != "predict": logger.debug("wrong mode for eval func") raise Exception("wrong mode for eval func") logger.info("Finish evaluating.")