def eval_one_epoch(sess, valid_loss, valid_num_batches): if FLAGS.show: bar = ProgressBar('Validation', max=valid_num_batches) val_loss = num_batches = 0 while True: if FLAGS.show: bar.next() try: loss = sess.run(valid_loss) val_loss += loss num_batches += 1 except tf.errors.OutOfRangeError: break if FLAGS.show: bar.finish() val_loss /= float(num_batches) return val_loss
def train_one_epoch(sess, summary_writer, merged, global_step, train_step, train_loss, train_num_batches): if FLAGS.show: bar = ProgressBar('Training', max=train_num_batches) tr_loss = num_batches = 0 while True: if FLAGS.show: bar.next() try: if num_batches % 50 == 49: _, loss, summary, step = sess.run( [train_step, train_loss, merged, global_step]) summary_writer.add_summary(summary, step) else: _, loss = sess.run([train_step, train_loss]) tr_loss += loss num_batches += 1 except tf.errors.OutOfRangeError: break if FLAGS.show: bar.finish() tr_loss /= float(num_batches) return tr_loss
def inference(model, cfg, during_training=False): model.eval() predictions, coco_results = {}, [] val_loader = make_data_loader(cfg, during_training=during_training) dataset = val_loader.dataset dl = len(val_loader) bar = ProgressBar(length=40, max_val=dl) timer.reset() with torch.no_grad(): for i, (img_list_batch, _) in enumerate(val_loader): if i == 1: timer.start() with timer.counter('forward'): img_tensor_batch = torch.stack( [aa.img for aa in img_list_batch], dim=0).cuda() c_pred, box_pred, iou_pred, anchors = model(img_tensor_batch) with timer.counter('post_process'): resized_size = [aa.resized_size for aa in img_list_batch] pred_batch = post_process(cfg, c_pred, box_pred, iou_pred, anchors, resized_size) with timer.counter('accumulate'): for pred in pred_batch: pred.to_cpu() for img_list, pred in zip(img_list_batch, pred_batch): if pred.box.shape[0] == 0: continue original_id = dataset.id_img_map[img_list.id] pred.resize(img_list.ori_size) pred.convert_mode("x1y1wh") boxes = pred.box.tolist() score = pred.score.tolist() label = pred.label.tolist() mapped_labels = [dataset.to_category_id[i] for i in label] coco_results.extend([{ "image_id": original_id, "category_id": mapped_labels[k], "bbox": box, "score": score[k] } for k, box in enumerate(boxes)]) aa = time.perf_counter() if i > 0: batch_time = aa - temp timer.add_batch_time(batch_time) time_name = [ 'batch', 'data', 'forward', 'post_process', 'accumulate' ] t_t, t_d, t_f, t_pp, t_acc = timer.get_times(time_name) fps, t_fps = 1 / (t_d + t_f + t_pp), 1 / t_t bar_str = bar.get_bar(i + 1) print( f'\rTesting: {bar_str} {i + 1}/{dl}, fps: {fps:.2f} | total fps: {t_fps:.2f} | t_t: {t_t:.3f} | ' f't_d: {t_d:.3f} | t_f: {t_f:.3f} | t_pp: {t_pp:.3f} | t_acc: {t_acc:.3f}', end='') temp = aa print('\n\nTest ended, doing evaluation...') json_name = cfg.weight.split('/')[-1].split('.')[0] file_path = f'results/{json_name}.json' with open(file_path, "w") as f: json.dump(coco_results, f) coco_dt = dataset.coco.loadRes(file_path) if cfg.val_api == 'Improved COCO': from my_cocoeval.cocoeval import SelfEval bbox_eval = SelfEval(dataset.coco, coco_dt, all_points=True) else: from pycocotools.cocoeval import COCOeval bbox_eval = COCOeval(dataset.coco, coco_dt, iouType='bbox') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() if not during_training: if cfg.val_api == 'Improved COCO': bbox_eval.draw_curve() else: compute_thre_per_class(bbox_eval)
def train(args, train_iter, model): logger.info("***** Running train *****") # 优化器 no_decay = ["bias", "LayerNorm.weight"] bert_param_optimizer = list(model.bert.named_parameters()) linear_param_optimizer = list(model.classifier.named_parameters()) linear_param_optimizer.extend(list( model.classifier_cls.named_parameters())) optimizer_grouped_parameters = [ { 'params': [ p for n, p in bert_param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': args.weight_decay, 'lr': args.learning_rate }, { 'params': [ p for n, p in bert_param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0, 'lr': args.learning_rate }, { 'params': [ p for n, p in linear_param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': args.weight_decay, 'lr': args.linear_learning_rate }, { 'params': [ p for n, p in linear_param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0, 'lr': args.linear_learning_rate }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) # 损失函数 criterion = CrossEntropyLossForChecklist().to(args.device) batch_mrc_loss = 0 batch_cls_loss = 0 pbar = ProgressBar(n_total=len(train_iter), desc='Training') print("****" * 20) fgm = FGM(model, epsilon=1, emb_name='word_embeddings.weight') for step, batch in enumerate(train_iter): for key in batch.keys(): batch[key] = batch[key].to(args.device) logits = model(input_ids=batch['all_input_ids'], attention_mask=batch['all_attention_mask'], token_type_ids=batch['all_token_type_ids']) # 正常训练 loss, mrc_loss, cls_loss = criterion( logits, (batch["all_start_positions"], batch["all_end_positions"], batch["all_answerable_label"])) loss.backward() # 对抗训练 fgm.attack() # 在embedding上添加对抗扰动 logits_adv = model(input_ids=batch['all_input_ids'], attention_mask=batch['all_attention_mask'], token_type_ids=batch['all_token_type_ids']) loss_adv, mrc_loss, cls_loss = criterion( logits_adv, (batch["all_start_positions"], batch["all_end_positions"], batch["all_answerable_label"])) loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度 fgm.restore() # 恢复embedding参数 # batch_mrc_loss += mrc_loss.item() batch_cls_loss += cls_loss.item() pbar( step, { 'batch_mrc_loss': batch_mrc_loss / (step + 1), 'batch_cls_loss': batch_cls_loss / (step + 1) }) optimizer.step() model.zero_grad()
def evaluate(args, eval_iter, model, prefix): logger.info("***** Running Evalation *****") all_start_logits = [] all_end_logits = [] all_cls_logits = [] batch_mrc_loss = 0 batch_cls_loss = 0 pbar = ProgressBar(n_total=len(eval_iter), desc="Evaluating") model.eval() criterion = CrossEntropyLossForChecklist().to(args.device) with torch.no_grad(): for step, batch in enumerate(eval_iter): for key in batch.keys(): batch[key] = batch[key].to(args.device) start_logits_tensor, end_logits_tensor, cls_logits_tensor = model( input_ids=batch['all_input_ids'], attention_mask=batch['all_attention_mask'], token_type_ids=batch['all_token_type_ids']) ########### loss, mrc_loss, cls_loss = criterion( (start_logits_tensor, end_logits_tensor, cls_logits_tensor), (batch["all_start_positions"], batch["all_end_positions"], batch["all_answerable_label"])) ######### for idx in range(start_logits_tensor.shape[0]): all_start_logits.append(start_logits_tensor.cpu().numpy()[idx]) all_end_logits.append(end_logits_tensor.cpu().numpy()[idx]) all_cls_logits.append(cls_logits_tensor.cpu().numpy()[idx]) pbar(step) all_predictions, all_nbest_json, all_cls_predictions = compute_prediction_checklist( eval_iter.dataset.examples, eval_iter.dataset.tokenized_examples, (all_start_logits, all_end_logits, all_cls_logits), True, args.n_best_size, args.max_answer_length, args.cls_threshold) with open(os.path.join(args.output_dir, prefix + '_predictions.json'), "w", encoding='utf-8') as writer: writer.write( json.dumps(all_predictions, ensure_ascii=False, indent=4) + "\n") with open(os.path.join(args.output_dir, prefix + '_nbest_predictions.json'), "w", encoding="utf8") as writer: writer.write( json.dumps(all_nbest_json, indent=4, ensure_ascii=False) + u"\n") if prefix == "eval": df = pd.DataFrame(eval_iter.dataset.examples) df = df.drop_duplicates(subset=["id"]) df["answers"] = df["answers"].apply(lambda x: x[0] if len(x[0]) != 0 else "no answer") df_pre_answers = pd.DataFrame.from_dict(all_predictions, orient="index", columns=["answers_pre"]) df_pre_cls = pd.DataFrame( all_cls_predictions, columns=["id", "is_impossible_pre", "pre_0", "pre_1"]) df_pre_cls = df_pre_cls.drop_duplicates(subset=["id"]) # df = df.merge(df_pre_answers, how="left", left_on="id", right_index=True) df = df.merge(df_pre_cls, how="left", on="id") df = df.set_index("id") predictions_details = df.to_dict("index") rouge = Rouge() with open(os.path.join(args.output_dir, prefix + '_predictions_details.json'), "w", encoding='utf-8') as writer: writer.write( json.dumps(predictions_details, ensure_ascii=False, indent=4) + "\n") EM = accuracy_score(df["answers"], df["answers_pre"]) # df_f1 = df[df["is_impossible"] == False].copy() df_f1 = df.copy() df_f1["answers"] = df_f1["answers"].apply(lambda x: " ".join(list(x))) df_f1["answers_pre"] = df_f1["answers_pre"].apply( lambda x: " ".join(list(x))) F1_score = rouge.get_scores(df_f1["answers_pre"], df_f1["answers"], avg=True)["rouge-1"]["f"] return F1_score, EM