def test_update(self): meter = MetricLogger() for i in range(10): meter.update(metric=float(i)) m = meter.meters["metric"] self.assertEqual(m.count, 10) self.assertEqual(m.total, 45) self.assertEqual(m.median, 4) self.assertEqual(m.avg, 4.5)
def test_no_attr(self): meter = MetricLogger() _ = meter.meters _ = meter.delimiter def broken(): _ = meter.not_existent self.assertRaises(AttributeError, broken)
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ): logger = logging.getLogger("fcos_core.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() pytorch_1_1_0_or_later = is_pytorch_1_1_0_or_later() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration # in pytorch >= 1.1.0, scheduler.step() should be run after optimizer.step() if not pytorch_1_1_0_or_later: scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) if losses > 1e5: import pdb pdb.set_trace() # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() if pytorch_1_1_0_or_later: scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train( model, arch, data_loader, val_loader, optimizer, alpha_optim, scheduler, checkpointer, device, checkpoint_period, arguments, cfg, tb_info={}, first_order=True, ): logger = logging.getLogger("fad_core.trainer") logger.info("Start the architecture search") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() pytorch_1_1_0_or_later = is_pytorch_1_1_0_or_later() Genotype = model.genotype() iteration = 0 for n_m, genotype in enumerate(Genotype): logger.info("genotype = {}".format(genotype)) for iteration, ((images, targets, _), (images_val, targets_val, _)) in enumerate(zip(data_loader, val_loader), start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() if len(targets) == cfg.SOLVER.IMS_PER_BATCH and len( targets_val) == cfg.SOLVER.IMS_PER_BATCH: images = images.to(device) targets = [target.to(device) for target in targets] images_val = images_val.to(device) targets_val = [target.to(device) for target in targets_val] # -------------- update alpha lr = scheduler.get_lr()[0] alpha_optim.zero_grad() if not first_order: # ----- 2nd order arch.unrolled_backward(images, targets, images_val, targets_val, lr, optimizer) else: # ----- 1st order arch.first_order_backward(images_val, targets_val) alpha_optim.step() # --------------- update w loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) for lkey, lval in loss_dict_reduced.items(): loss_dict_reduced[lkey] = lval.mean() meters.update(loss=losses_reduced.mean(), **loss_dict_reduced) # --------- tensorboard logger tb_logger = tb_info.get('tb_logger', None) if tb_logger: tb_prefix = '{}loss'.format(tb_info['prefix']) tb_logger.add_scalar(tb_prefix, losses_reduced.mean(), iteration) for key, value in loss_dict_reduced.items(): tb_prefix = "{}{}".format(tb_info['prefix'], key) tb_logger.add_scalar(tb_prefix, value, iteration) tb_prefix = '{}loss'.format(tb_info['prefix']) tb_logger.add_scalar(tb_prefix + '_z_lr', lr, iteration) optimizer.zero_grad() losses.mean().backward() torch.nn.utils.clip_grad_norm_(model.weights(), 20) optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % (checkpoint_period) == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if iteration == max_iter: checkpointer.save("model_final", **arguments) # ---------- save genotype if cfg.MODEL.FAD.PLOT and (iteration % checkpoint_period == 0): Genotype = model.genotype() fw = open(f"{cfg.OUTPUT_DIR}/genotype.log", "w") for n_m, genotype in enumerate(Genotype): logger.info("genotype = {}".format(genotype)) # write genotype for augment fw.write(f"{genotype}\n") # genotype as a image plot_path = os.path.join(cfg.OUTPUT_DIR + '/plots', "Module%d" % n_m, "Iter{:06d}".format(iteration)) caption = "Iteration {}".format(iteration) plot(genotype.normal, plot_path + "-normal", caption) model.print_alphas(logger) fw.close() total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train(model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, cfg, run_test, distributed, writer, seperate_dis): USE_DIS_GLOBAL = arguments["use_dis_global"] USE_DIS_CENTER_AWARE = arguments["use_dis_ca"] USE_DIS_CONDITIONAL = arguments["use_dis_conditional"] USE_DIS_HEAD = arguments["use_dis_ha"] used_feature_layers = arguments["use_feature_layers"] used_feature_layers = ['P7', 'P6', 'P5', 'P4', 'P3'] # dataloader data_loader_source = data_loader["source"] data_loader_target = data_loader["target"] # classified label of source domain and target domain source_label = 1.0 target_label = 0.0 # dis_lambda if USE_DIS_GLOBAL: ga_dis_lambda = arguments["ga_dis_lambda"] if USE_DIS_CENTER_AWARE: ca_dis_lambda = arguments["ca_dis_lambda"] if USE_DIS_CONDITIONAL: cond_dis_lambda = arguments["cond_dis_lambda"] if USE_DIS_HEAD: ha_dis_lambda = arguments["ha_dis_lambda"] # Start training logger = logging.getLogger("fcos_core.trainer") logger.info("Start training") # model.train() for k in model: model[k].train() meters = MetricLogger(delimiter=" ") assert len(data_loader_source) == len(data_loader_target) max_iter = max(len(data_loader_source), len(data_loader_target)) start_iter = arguments["iteration"] start_training_time = time.time() end = time.time() pytorch_1_1_0_or_later = is_pytorch_1_1_0_or_later() best_map50 = 0.0 # results = run_test(cfg, model, distributed) # exit() for iteration, ((images_s, targets_s, _), (images_t, _, _)) \ in enumerate(zip(data_loader_source, data_loader_target), start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration alpha = max( 1 - iteration / cfg.MODEL.ADV.COND_WARMUP_ITER, cfg.MODEL.ADV.COND_ALPHA ) if cfg.MODEL.ADV.COND_SMOOTH else cfg.MODEL.ADV.COND_ALPHA cf_th = cfg.MODEL.ADV.COND_CONF # in pytorch >= 1.1.0, scheduler.step() should be run after optimizer.step() if not pytorch_1_1_0_or_later: # scheduler.step() for k in scheduler: scheduler[k].step() images_s = images_s.to(device) targets_s = [target_s.to(device) for target_s in targets_s] images_t = images_t.to(device) # targets_t = [target_t.to(device) for target_t in targets_t] # optimizer.zero_grad() for k in optimizer: optimizer[k].zero_grad() ########################################################################## #################### (1): train G with source domain ##################### ########################################################################## loss_dict, features_s, score_maps_s = foward_detector( model, images_s, targets=targets_s, return_maps=True) labels = loss_dict['labels'] reg_targets = loss_dict['reg_targets'] # rename loss to indicate domain loss_dict = {k + "_gs": loss_dict[k] for k in loss_dict if 'loss' in k} losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss_gs=losses_reduced, **loss_dict_reduced) writer.add_scalar('Loss_FCOS/gs', losses, iteration) writer.add_scalar('Loss_FCOS/cls_gs', loss_dict['loss_cls_gs'], iteration) writer.add_scalar('Loss_FCOS/reg_gs', loss_dict['loss_reg_gs'], iteration) writer.add_scalar('Loss_FCOS/centerness_gs', loss_dict['loss_centerness_gs'], iteration) # losses.backward(retain_graph=True) del loss_dict, losses ########################################################################## #################### (2): train D with source domain ##################### ########################################################################## loss_dict = {} stat = {} for layer in used_feature_layers: # detatch score_map for map_type in score_maps_s[layer]: score_maps_s[layer][map_type] = score_maps_s[layer][ map_type].detach() if seperate_dis: if USE_DIS_GLOBAL: loss_dict["loss_adv_%s_ds" % layer] = \ ga_dis_lambda * model["dis_%s" % layer](features_s[layer], source_label, domain='source') if USE_DIS_CENTER_AWARE: loss_dict["loss_adv_%s_CA_ds" % layer] = \ ca_dis_lambda * model["dis_%s_CA" % layer](features_s[layer], source_label, score_maps_s[layer], domain='source') if USE_DIS_CONDITIONAL: loss_cond_l, cur_stat, idx = \ model["dis_%s_Cond" % layer](features_s[layer], source_label, score_maps_s[layer], domain='source', alpha=alpha, labels=labels[int(layer[1])-3], reg_targets=reg_targets[int(layer[1])-3], conf_th=cf_th) stat["%s_source_left" % layer] = [s / idx for s in cur_stat] loss_cond_t, cur_idx, idx = \ model["dis_%s_Cond_t" % layer](features_s[layer], source_label, score_maps_s[layer], domain='source', alpha=alpha, labels=labels[int(layer[1])-3], reg_targets=reg_targets[int(layer[1])-3], conf_th=cf_th) stat["%s_source_top" % layer] = [s / idx for s in cur_stat] loss_dict["loss_adv_%s_Cond_ds" % layer] = cond_dis_lambda * (loss_cond_l + loss_cond_t) if USE_DIS_HEAD: loss_dict["loss_adv_%s_HA_ds" % layer] = \ ha_dis_lambda * model["dis_%s_HA" % layer](source_label, score_maps_s[layer], domain='source') else: if USE_DIS_GLOBAL: loss_dict["loss_adv_%s_ds" % layer] = \ ga_dis_lambda * model["dis_P7"](features_s[layer], source_label, domain='source') if USE_DIS_CENTER_AWARE: loss_dict["loss_adv_%s_CA_ds" % layer] = \ ca_dis_lambda * model["dis_P7_CA"](features_s[layer], source_label, score_maps_s[layer], domain='source') if USE_DIS_CONDITIONAL: loss_dict["loss_adv_%s_Cond_ds" % layer] = \ cond_dis_lambda * model["dis_P7_Cond"](features_s[layer], source_label, score_maps_s[layer], domain='source', alpha=alpha) if USE_DIS_HEAD: loss_dict["loss_adv_%s_HA_ds" % layer] = \ ha_dis_lambda * model["dis_P7_HA"](source_label, score_maps_s[layer], domain='source') losses = sum(loss for loss in loss_dict.values()) writer.add_scalar('Loss_DISC/ds', losses, iteration) if USE_DIS_GLOBAL: writer.add_scalar('Loss_DISC/P3_ds', loss_dict['loss_adv_P3_ds'], iteration) writer.add_scalar('Loss_DISC/P4_ds', loss_dict['loss_adv_P4_ds'], iteration) writer.add_scalar('Loss_DISC/P5_ds', loss_dict['loss_adv_P5_ds'], iteration) writer.add_scalar('Loss_DISC/P6_ds', loss_dict['loss_adv_P6_ds'], iteration) writer.add_scalar('Loss_DISC/P7_ds', loss_dict['loss_adv_P7_ds'], iteration) if USE_DIS_CENTER_AWARE: writer.add_scalar('Loss_DISC/P3_CA_ds', loss_dict['loss_adv_P3_CA_ds'], iteration) writer.add_scalar('Loss_DISC/P4_CA_ds', loss_dict['loss_adv_P4_CA_ds'], iteration) writer.add_scalar('Loss_DISC/P5_CA_ds', loss_dict['loss_adv_P5_CA_ds'], iteration) writer.add_scalar('Loss_DISC/P6_CA_ds', loss_dict['loss_adv_P6_CA_ds'], iteration) writer.add_scalar('Loss_DISC/P7_CA_ds', loss_dict['loss_adv_P7_CA_ds'], iteration) if USE_DIS_CONDITIONAL: writer.add_scalar('Loss_DISC/P3_Cond_ds', loss_dict['loss_adv_P3_Cond_ds'], iteration) writer.add_scalar('Loss_DISC/P4_Cond_ds', loss_dict['loss_adv_P4_Cond_ds'], iteration) writer.add_scalar('Loss_DISC/P5_Cond_ds', loss_dict['loss_adv_P5_Cond_ds'], iteration) writer.add_scalar('Loss_DISC/P6_Cond_ds', loss_dict['loss_adv_P6_Cond_ds'], iteration) writer.add_scalar('Loss_DISC/P7_Cond_ds', loss_dict['loss_adv_P7_Cond_ds'], iteration) for layer in used_feature_layers: for i in range(3): writer.add_scalar( 'Stat/{}/Source_{}_left'.format(layer, i), stat['%s_source_left' % layer][i], iteration) writer.add_scalar('Stat/{}/Source_{}_top'.format(layer, i), stat['%s_source_top' % layer][i], iteration) if USE_DIS_HEAD: writer.add_scalar('Loss_DISC/P3_HA_ds', loss_dict['loss_adv_P3_HA_ds'], iteration) writer.add_scalar('Loss_DISC/P4_HA_ds', loss_dict['loss_adv_P4_HA_ds'], iteration) writer.add_scalar('Loss_DISC/P5_HA_ds', loss_dict['loss_adv_P5_HA_ds'], iteration) writer.add_scalar('Loss_DISC/P6_HA_ds', loss_dict['loss_adv_P6_HA_ds'], iteration) writer.add_scalar('Loss_DISC/P7_HA_ds', loss_dict['loss_adv_P7_HA_ds'], iteration) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss_ds=losses_reduced, **loss_dict_reduced) # losses.backward() del loss_dict, losses ########################################################################## #################### (3): train D with target domain ##################### ################################################################# loss_dict, features_t, score_maps_t = foward_detector(model, images_t, return_maps=True) assert len(loss_dict) == 1 and loss_dict[ "zero"] == 0 # loss_dict should be empty dict # loss_dict["loss_adv_Pn"] = model_dis_Pn(features_t["Pn"], target_label, domain='target') for layer in used_feature_layers: # detatch score_map for map_type in score_maps_t[layer]: score_maps_t[layer][map_type] = score_maps_t[layer][ map_type].detach() if seperate_dis: if USE_DIS_GLOBAL: loss_dict["loss_adv_%s_dt" % layer] = \ ga_dis_lambda * model["dis_%s" % layer](features_t[layer], target_label, domain='target') if USE_DIS_CENTER_AWARE: loss_dict["loss_adv_%s_CA_dt" %layer] = \ ca_dis_lambda * model["dis_%s_CA" % layer](features_t[layer], target_label, score_maps_t[layer], domain='target') if USE_DIS_CONDITIONAL: loss_cond_l, cur_stat, idx = \ model["dis_%s_Cond" % layer](features_t[layer], target_label, score_maps_t[layer], domain='target', alpha=alpha, conf_th=cf_th) stat["%s_target_left" % layer] = [s / idx for s in cur_stat] loss_cond_t, cur_stat, idx = \ model["dis_%s_Cond_t" % layer](features_t[layer], target_label, score_maps_t[layer], domain='target', alpha=alpha, conf_th=cf_th) stat["%s_target_top" % layer] = [s / idx for s in cur_stat] loss_dict["loss_adv_%s_Cond_dt" % layer] = cond_dis_lambda * (loss_cond_l + loss_cond_t) if USE_DIS_HEAD: loss_dict["loss_adv_%s_HA_dt" %layer] = \ ha_dis_lambda * model["dis_%s_HA" % layer](target_label, score_maps_t[layer], domain='target') else: if USE_DIS_GLOBAL: loss_dict["loss_adv_%s_dt" % layer] = \ ga_dis_lambda * model["dis_P7"](features_s[layer], source_label, domain='target') if USE_DIS_CENTER_AWARE: loss_dict["loss_adv_%s_CA_dt" % layer] = \ ca_dis_lambda * model["dis_P7_CA"](features_s[layer], source_label, score_maps_s[layer], domain='target') if USE_DIS_CONDITIONAL: loss_dict["loss_adv_%s_Cond_dt" % layer] = \ cond_dis_lambda * model["dis_P7_Cond"](features_s[layer], source_label, score_maps_s[layer], domain='target', alpha=alpha) if USE_DIS_HEAD: loss_dict["loss_adv_%s_HA_dt" % layer] = \ ha_dis_lambda * model["dis_P7_HA"](source_label, score_maps_s[layer], domain='target') losses = sum(loss for loss in loss_dict.values()) writer.add_scalar('Loss_DISC/dt', losses, iteration) if USE_DIS_GLOBAL: writer.add_scalar('Loss_DISC/P3_dt', loss_dict['loss_adv_P3_dt'], iteration) writer.add_scalar('Loss_DISC/P4_dt', loss_dict['loss_adv_P4_dt'], iteration) writer.add_scalar('Loss_DISC/P5_dt', loss_dict['loss_adv_P5_dt'], iteration) writer.add_scalar('Loss_DISC/P6_dt', loss_dict['loss_adv_P6_dt'], iteration) writer.add_scalar('Loss_DISC/P7_dt', loss_dict['loss_adv_P7_dt'], iteration) if USE_DIS_CENTER_AWARE: writer.add_scalar('Loss_DISC/P3_CA_dt', loss_dict['loss_adv_P3_CA_dt'], iteration) writer.add_scalar('Loss_DISC/P4_CA_dt', loss_dict['loss_adv_P4_CA_dt'], iteration) writer.add_scalar('Loss_DISC/P5_CA_dt', loss_dict['loss_adv_P5_CA_dt'], iteration) writer.add_scalar('Loss_DISC/P6_CA_dt', loss_dict['loss_adv_P6_CA_dt'], iteration) writer.add_scalar('Loss_DISC/P7_CA_dt', loss_dict['loss_adv_P7_CA_dt'], iteration) if USE_DIS_CONDITIONAL: writer.add_scalar('Loss_DISC/P3_Cond_dt', loss_dict['loss_adv_P3_Cond_dt'], iteration) writer.add_scalar('Loss_DISC/P4_Cond_dt', loss_dict['loss_adv_P4_Cond_dt'], iteration) writer.add_scalar('Loss_DISC/P5_Cond_dt', loss_dict['loss_adv_P5_Cond_dt'], iteration) writer.add_scalar('Loss_DISC/P6_Cond_dt', loss_dict['loss_adv_P6_Cond_dt'], iteration) writer.add_scalar('Loss_DISC/P7_Cond_dt', loss_dict['loss_adv_P7_Cond_dt'], iteration) for layer in used_feature_layers: for i in range(3): writer.add_scalar( 'Stat/{}/Target_{}_left'.format(layer, i), stat['%s_target_left' % layer][i], iteration) writer.add_scalar('Stat/{}/Target_{}_top'.format(layer, i), stat['%s_target_top' % layer][i], iteration) if USE_DIS_HEAD: writer.add_scalar('Loss_DISC/P3_HA_dt', loss_dict['loss_adv_P3_HA_dt'], iteration) writer.add_scalar('Loss_DISC/P4_HA_dt', loss_dict['loss_adv_P4_HA_dt'], iteration) writer.add_scalar('Loss_DISC/P5_HA_dt', loss_dict['loss_adv_P5_HA_dt'], iteration) writer.add_scalar('Loss_DISC/P6_HA_dt', loss_dict['loss_adv_P6_HA_dt'], iteration) writer.add_scalar('Loss_DISC/P7_HA_dt', loss_dict['loss_adv_P7_HA_dt'], iteration) # del "zero" (useless after backward) del loss_dict['zero'] # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss_dt=losses_reduced, **loss_dict_reduced) # # saved GRL gradient # grad_list = [] # for layer in used_feature_layers: # def save_grl_grad(grad): # grad_list.append(grad) # features_t[layer].register_hook(save_grl_grad) # # losses.backward() # # ########################################################################## # ########################################################################## # ########################################################################## # max_norm = 5 # for k in model: # torch.nn.utils.clip_grad_norm_(model[k].parameters(), max_norm) # # # optimizer.step() # for k in optimizer: # optimizer[k].step() # # if pytorch_1_1_0_or_later: # # scheduler.step() # for k in scheduler: # scheduler[k].step() # End of training batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) sample_layer = used_feature_layers[ 0] # sample any one of used feature layer if USE_DIS_GLOBAL: if seperate_dis: sample_optimizer = optimizer["dis_%s" % sample_layer] else: sample_optimizer = optimizer["dis_P7"] if USE_DIS_CENTER_AWARE: if seperate_dis: sample_optimizer = optimizer["dis_%s_CA" % sample_layer] else: sample_optimizer = optimizer["dis_P7_CA"] if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr_backbone: {lr_backbone:.6f}", "lr_fcos: {lr_fcos:.6f}", "lr_dis: {lr_dis:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr_backbone=optimizer["backbone"].param_groups[0]["lr"], lr_fcos=optimizer["fcos"].param_groups[0]["lr"], lr_dis=sample_optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_final", **arguments) results = run_test(cfg, model, distributed) for ap_key in results[0][0].results['bbox'].keys(): writer.add_scalar('mAP_val/{}'.format(ap_key), results[0][0].results['bbox'][ap_key], iteration) map50 = results[0][0].results['bbox']['AP50'] if map50 > best_map50: checkpointer.save("model_best", **arguments) best_map50 = map50 for k in model: model[k].train() total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))
def do_train_base(model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, cfg, run_test, distributed, writer): # Start training logger = logging.getLogger("fcos_core.trainer") logger.info("Start training") # model.train() for k in model: model[k].train() meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] start_training_time = time.time() end = time.time() pytorch_1_1_0_or_later = is_pytorch_1_1_0_or_later() best_map50 = 0.0 for iteration, (images_s, targets_s, _) in enumerate(data_loader, start_iter): data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration # in pytorch >= 1.1.0, scheduler.step() should be run after optimizer.step() if not pytorch_1_1_0_or_later: # scheduler.step() for k in scheduler: scheduler[k].step() images_s = images_s.to(device) targets_s = [target_s.to(device) for target_s in targets_s] # optimizer.zero_grad() for k in optimizer: optimizer[k].zero_grad() ########################################################################## #################### (1): train G ##################### ########################################################################## loss_dict, features_s, score_maps_s = foward_detector( model, images_s, targets=targets_s, return_maps=True) # rename loss to indicate domain # loss_dict = {k + "_gs": loss_dict[k] for k in loss_dict} loss_dict = {k + "_gs": loss_dict[k] for k in loss_dict if 'loss' in k} losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss_gs=losses_reduced, **loss_dict_reduced) writer.add_scalar('Loss_FCOS/gs', losses, iteration) writer.add_scalar('Loss_FCOS/cls_gs', loss_dict['loss_cls_gs'], iteration) writer.add_scalar('Loss_FCOS/reg_gs', loss_dict['loss_reg_gs'], iteration) writer.add_scalar('Loss_FCOS/centerness_gs', loss_dict['loss_centerness_gs'], iteration) losses.backward(retain_graph=True) del loss_dict, losses ########################################################################## ########################################################################## ########################################################################## # max_norm = 5 # for k in model: # torch.nn.utils.clip_grad_norm_(model[k].parameters(), max_norm) # optimizer.step() for k in optimizer: optimizer[k].step() if pytorch_1_1_0_or_later: # scheduler.step() for k in scheduler: scheduler[k].step() # End of training batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr_backbone: {lr_backbone:.6f}", "lr_fcos: {lr_fcos:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr_backbone=optimizer["backbone"].param_groups[0]["lr"], lr_fcos=optimizer["fcos"].param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_final", **arguments) results = run_test(cfg, model, distributed) for ap_key in results[0][0].results['bbox'].keys(): writer.add_scalar('mAP_val/{}'.format(ap_key), results[0][0].results['bbox'][ap_key], iteration) map50 = results[0][0].results['bbox']['AP50'] if map50 > best_map50: checkpointer.save("model_best", **arguments) best_map50 = map50 for k in model: model[k].train() total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))