def trainer(epoch, model, train_loader, valid_loader, criterion, optimizer, lr_scheduler, path, dataset): maximum = float(-np.inf) best_epoch = 0 result_list = defaultdict() for i in range(epoch): train_loss, train_gt, train_probs = batch_trainer( epoch=i, model=model, train_loader=train_loader, criterion=criterion, optimizer=optimizer, ) valid_loss, valid_gt, valid_probs = valid_trainer( epoch=i, model=model, valid_loader=valid_loader, criterion=criterion, ) lr_scheduler.step(metrics=valid_loss) train_result = get_pedestrian_metrics(train_gt, train_probs) valid_result = get_pedestrian_metrics(valid_gt, valid_probs) print( f'Evaluation on test set, \n', 'ma: {:.4f}, pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format( valid_result.ma, np.mean(valid_result.label_pos_recall), np.mean(valid_result.label_neg_recall)), 'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format( valid_result.instance_acc, valid_result.instance_prec, valid_result.instance_recall, valid_result.instance_f1)) # print label metrics ma attr_name_list = return_attr_name_list(dataset) for attr_name, _ma in zip(attr_name_list, valid_result.label_ma): print(f'{attr_name}: {_ma}') print(f'{time_str()}') print('-' * 60) cur_metric = valid_result.ma if cur_metric > maximum: maximum = cur_metric best_epoch = i save_ckpt(model, path, i, maximum) result_list[i] = [train_result, valid_result] torch.save(result_list, os.path.join(os.path.dirname(path), 'metric_log.pkl')) return maximum, best_epoch
def trainer(epoch, model, train_loader, valid_loader, criterion, optimizer, lr_scheduler, path, dataset): maximum = float(-np.inf) best_epoch = 0 result_list = defaultdict() for i in range(epoch): ##### train_loss, train_gt, train_probs = batch_trainer( epoch=i, model=model, train_loader=train_loader, criterion=criterion, optimizer=optimizer, ) ##eval in train set # test_alm(train_loader, model, attr_num=dataset.attr_num, description=dataset.attr_id, set = 'train') # eval in test set test_alm(valid_loader, model, attr_num=dataset.attr_num, description=dataset.attr_id, set='test', threshold=0.5) valid_loss, valid_gt, valid_probs = valid_trainer( model=model, valid_loader=valid_loader, criterion=criterion, ) lr_scheduler.step(metrics=valid_loss, epoch=i) train_result = get_pedestrian_metrics(train_gt, train_probs, threshold=0.5) valid_result = get_pedestrian_metrics(valid_gt, valid_probs, threshold=0.5) print( f'Evaluation on test set, \n', 'ma: {:.4f}, pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format( valid_result.ma, np.mean(valid_result.label_pos_recall), np.mean(valid_result.label_neg_recall)), 'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format( valid_result.instance_acc, valid_result.instance_prec, valid_result.instance_recall, valid_result.instance_f1)) print(f'{time_str()}') print('-' * 60) cur_metric = valid_result.ma if cur_metric > maximum: maximum = cur_metric best_epoch = i save_ckpt(model, path, i, maximum) result_list[i] = [train_result, valid_result] torch.save(result_list, os.path.join(os.path.dirname(path), 'metric_log.pkl')) return maximum, best_epoch
def trainer(cfg, args, epoch, model, model_ema, train_loader, valid_loader, criterion, optimizer, lr_scheduler, path, loss_w, viz, tb_writer): maximum = float(-np.inf) maximum_ema = float(-np.inf) best_epoch = 0 result_list = defaultdict() for e in range(epoch): if args.distributed: train_loader.sampler.set_epoch(epoch) lr = optimizer.param_groups[1]['lr'] train_loss, train_gt, train_probs, train_imgs = batch_trainer( cfg, args=args, epoch=e, model=model, model_ema=model_ema, train_loader=train_loader, criterion=criterion, optimizer=optimizer, loss_w=loss_w, scheduler=lr_scheduler, ) if args.distributed: if args.local_rank == 0: print("Distributing BatchNorm running means and vars") distribute_bn(model, args.world_size, args.dist_bn == 'reduce') # valid_loss, valid_gt, valid_probs, valid_imgs = valid_trainer( # args=args, # model=model, # valid_loader=valid_loader, # criterion=criterion, # loss_w=loss_w # ) if model_ema is not None: # and not cfg.TRAIN.EMA.FORCE_CPU: if args.local_rank == 0: print('using model_ema to validate') if args.distributed: distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce') valid_loss, valid_gt, valid_probs, valid_probs_ema, valid_imgs = valid_trainer( args=args, model=model, ema_model=model_ema.module, valid_loader=valid_loader, criterion=criterion, loss_w=loss_w) # if cfg.TRAIN.LR_SCHEDULER.TYPE == 'plateau': # lr_scheduler.step(metrics=valid_loss) # elif cfg.TRAIN.LR_SCHEDULER.TYPE == 'warmup_cosine': # lr_scheduler.step(epoch=e + 1) # else: # lr_scheduler.step() if cfg.METRIC.TYPE == 'multi_label': train_metric = get_multilabel_metrics(train_gt, train_probs) valid_metric = get_multilabel_metrics(valid_gt, valid_probs) if model_ema is not None: # and not cfg.TRAIN.EMA.FORCE_CPU: valid_metric_ema = get_multilabel_metrics( valid_gt, valid_probs_ema) if args.local_rank == 0: print( 'Performance : mAP: {:.4f}, OP: {:.4f}, OR: {:.4f}, OF1: {:.4f} CP: {:.4f}, CR: {:.4f}, ' 'CF1: {:.4f}'.format(valid_metric.map, valid_metric.OP, valid_metric.OR, valid_metric.OF1, valid_metric.CP, valid_metric.CR, valid_metric.CF1)) print( 'EMA Performance : mAP: {:.4f}, OP: {:.4f}, OR: {:.4f}, OF1: {:.4f} CP: {:.4f}, CR: {:.4f}, ' 'CF1: {:.4f}'.format( valid_metric_ema.map, valid_metric_ema.OP, valid_metric_ema.OR, valid_metric_ema.OF1, valid_metric_ema.CP, valid_metric_ema.CR, valid_metric_ema.CF1)) print(f'{time_str()}') print('-' * 60) tb_writer.add_scalars('train/lr', {'lr': lr}, e) tb_writer.add_scalars('train/loss', { 'train': train_loss, 'test': valid_loss }, e) tb_writer.add_scalars( 'train/perf', { 'mAP': train_metric.map, 'OP': train_metric.OP, 'OR': train_metric.OR, 'OF1': train_metric.OF1, 'CP': train_metric.CP, 'CR': train_metric.CR, 'CF1': train_metric.CF1 }, e) tb_writer.add_scalars( 'test/perf', { 'mAP': valid_metric.map, 'OP': valid_metric.OP, 'OR': valid_metric.OR, 'OF1': valid_metric.OF1, 'CP': valid_metric.CP, 'CR': valid_metric.CR, 'CF1': valid_metric.CF1 }, e) tb_writer.add_scalars( 'test/ema_perf', { 'mAP': valid_metric_ema.map, 'OP': valid_metric_ema.OP, 'OR': valid_metric_ema.OR, 'OF1': valid_metric_ema.OF1, 'CP': valid_metric_ema.CP, 'CR': valid_metric_ema.CR, 'CF1': valid_metric_ema.CF1 }, e) cur_metric = valid_metric.map if cur_metric > maximum: maximum = cur_metric best_epoch = e save_ckpt(model, path, e, maximum) cur_metric = valid_metric_ema.map if cur_metric > maximum_ema: maximum_ema = cur_metric best_epoch = e save_ckpt(model, path, e, maximum_ema) result_list[e] = { 'train_result': train_metric, 'valid_result': valid_metric, 'train_gt': train_gt, 'train_probs': train_probs, 'valid_gt': valid_gt, 'valid_probs': valid_probs } else: assert False, f'{cfg.METRIC.TYPE} is unavailable' with open(os.path.join(os.path.dirname(path), 'metric_log.pkl'), 'wb') as f: pickle.dump(result_list, f) return maximum, best_epoch
def trainer(epoch, model, train_loader, valid_loader, criterion, optimizer, lr_scheduler, path): maximum = float(-np.inf) best_epoch = 0 result_list = defaultdict() df_metrics = pd.DataFrame(columns=['epoch', 'train_loss', 'train_instance_acc', 'train_instance_prec', 'train_instance_recall', 'train_instance_f1', 'train_ma', 'train_pos_recall', 'train_neg_recall', 'valid_loss', 'valid_instance_acc', 'valid_instance_prec', 'valid_instance_recall', 'valid_instance_f1', 'valid_ma', 'valid_pos_recall', 'valid_neg_recall']) for i in range(epoch): train_loss, train_gt, train_probs = batch_trainer( epoch=i, model=model, train_loader=train_loader, criterion=criterion, optimizer=optimizer, ) valid_loss, valid_gt, valid_probs = valid_trainer( model=model, valid_loader=valid_loader, criterion=criterion, ) lr_scheduler.step(metrics=valid_loss, epoch=i) train_result = get_pedestrian_metrics(train_gt, train_probs) valid_result = get_pedestrian_metrics(valid_gt, valid_probs) # tensorboard added # writer.add_scalar(tag, function, iteration) writer_step = i writer.add_scalars('Loss', {'Train':train_loss, 'Valid':valid_loss}, writer_step) writer.add_scalars('Accuracy', {'Train':train_result.instance_acc, 'Valid':valid_result.instance_acc}, writer_step) writer.add_scalars('Precision', {'Train':train_result.instance_prec, 'Valid':valid_result.instance_prec}, writer_step) writer.add_scalars('Recall', {'Train':train_result.instance_recall, 'Valid':valid_result.instance_recall}, writer_step) writer.add_scalars('F1', {'Train':train_result.instance_f1, 'Valid':valid_result.instance_f1}, writer_step) writer.add_scalars('Mean Accuracy', {'Train':train_result.ma, 'Valid':valid_result.ma}, writer_step) writer.add_scalars('Pos Recall', {'Train':np.mean(train_result.label_pos_recall), 'Valid':np.mean(valid_result.label_pos_recall)}, writer_step) writer.add_scalars('Neg Recall', {'Train':np.mean(train_result.label_neg_recall), 'Valid':np.mean(valid_result.label_neg_recall)}, writer_step) print(f'Evaluation on test set, \n', 'ma: {:.4f}, pos_recall: {:.4f} , neg_recall: {:.4f} \n'.format( valid_result.ma, np.mean(valid_result.label_pos_recall), np.mean(valid_result.label_neg_recall)), 'Acc: {:.4f}, Prec: {:.4f}, Rec: {:.4f}, F1: {:.4f}'.format( valid_result.instance_acc, valid_result.instance_prec, valid_result.instance_recall, valid_result.instance_f1)) print(f'{time_str()}') print('-' * 60) # create metrics dataframe to save as csv new_metrics = { 'epoch':i, 'train_loss':train_loss, 'train_instance_acc':train_result.instance_acc, 'train_instance_prec':train_result.instance_prec, 'train_instance_recall':train_result.instance_recall, 'train_instance_f1':train_result.instance_f1, 'train_ma':train_result.ma, 'train_pos_recall':np.mean(train_result.label_pos_recall), 'train_neg_recall':np.mean(train_result.label_neg_recall), 'valid_loss':valid_loss, 'valid_instance_acc':valid_result.instance_acc, 'valid_instance_prec':valid_result.instance_prec, 'valid_instance_recall':valid_result.instance_recall, 'valid_instance_f1':valid_result.instance_f1, 'valid_ma':valid_result.ma, 'valid_pos_recall':np.mean(valid_result.label_pos_recall), 'valid_neg_recall':np.mean(valid_result.label_neg_recall) } #append row to the dataframe df_metrics = df_metrics.append(new_metrics, ignore_index=True) df_metrics.to_csv(csv_file_name, index=False) cur_metric = valid_result.ma if cur_metric > maximum: maximum = cur_metric best_epoch = i save_ckpt(model, path, i, maximum) result_list[i] = [train_result, valid_result] writer.close() torch.save(result_list, os.path.join(os.path.dirname(path), 'metric_log.pkl')) return maximum, best_epoch