def test_TensorBoard(): runner = Runner( model=TestModel, optimizer=TestOptimizer, criterion=TestCriterion, metrics=TestMetric, callbacks=pt_clb.TensorBoard(log_dir=TMP_PATH), ) runner.fit(TestLoader, epochs=2)
def test_tensorboar_CM(): runner = Runner( model=TEST_MODEL, optimizer=TEST_OPTIMIZER, criterion=TEST_CRITERION, callbacks=[ pt_clb.TensorBoardCM(), pt_clb.TensorBoard(log_dir=TMP_PATH) ], ) runner.fit(TEST_LOADER, epochs=2)
# We only test that callbacks don't crash NOT that they do what they should do TMP_PATH = "/tmp/pt_tools2/" os.makedirs(TMP_PATH, exist_ok=True) @pytest.mark.parametrize( "callback", [ pt_clb.Timer(), pt_clb.ReduceLROnPlateau(), pt_clb.CheckpointSaver(TMP_PATH, save_name="model.chpn"), pt_clb.CheckpointSaver( TMP_PATH, save_name="model.chpn", monitor=TEST_METRIC.name, mode="max" ), pt_clb.TensorBoard(log_dir=TMP_PATH), pt_clb.TensorBoardWithCM(log_dir=TMP_PATH), pt_clb.ConsoleLogger(), pt_clb.FileLogger(TMP_PATH), pt_clb.Mixup(0.2, NUM_CLASSES), pt_clb.Cutmix(1.0, NUM_CLASSES), pt_clb.ScheduledDropout(), ], ) def test_callback(callback): runner = Runner( model=TEST_MODEL, optimizer=TEST_OPTIMIZER, criterion=TEST_CRITERION, metrics=TEST_METRIC, callbacks=callback,
def main(): # Get config for this run hparams = parse_args() # Setup logger config = { "handlers": [ { "sink": sys.stdout, "format": "{time:[MM-DD HH:mm]} - {message}" }, { "sink": f"{hparams.outdir}/logs.txt", "format": "{time:[MM-DD HH:mm]} - {message}" }, ], } logger.configure(**config) logger.info(f"Parameters used for training: {hparams}") # Fix seeds for reprodusability pt.utils.misc.set_random_seed(hparams.seed) # Save config os.makedirs(hparams.outdir, exist_ok=True) yaml.dump(vars(hparams), open(hparams.outdir + "/config.yaml", "w")) # Get model model = Model(arch=hparams.arch, model_params=hparams.model_params, embedding_size=hparams.embedding_size, pooling=hparams.pooling).cuda() # Get loss # loss = LOSS_FROM_NAME[hparams.criterion](in_features=hparams.embedding_size, **hparams.criterion_params).cuda() loss = LOSS_FROM_NAME["cross_entropy"].cuda() logger.info(f"Loss for this run is: {loss}") if hparams.resume: checkpoint = torch.load( hparams.resume, map_location=lambda storage, loc: storage.cuda()) model.load_state_dict(checkpoint["state_dict"], strict=True) loss.load_state_dict(checkpoint["loss"], strict=True) if hparams.freeze_bn: freeze_batch_norm(model) # Get optimizer # optim_params = pt.utils.misc.filter_bn_from_wd(model) optim_params = list(loss.parameters()) + list( model.parameters()) # add loss params optimizer = optimizer_from_name(hparams.optim)( optim_params, lr=0, weight_decay=hparams.weight_decay, amsgrad=True) num_params = pt.utils.misc.count_parameters(model)[0] logger.info(f"Model size: {num_params / 1e6:.02f}M") # logger.info(model) # Scheduler is an advanced way of planning experiment sheduler = pt.fit_wrapper.callbacks.PhasesScheduler(hparams.phases) # Save logs TB_callback = pt_clb.TensorBoard(hparams.outdir, log_every=20) # Get dataloaders train_loader, val_loader, val_indexes = get_dataloaders( root=hparams.root, augmentation=hparams.augmentation, size=hparams.size, val_size=hparams.val_size, batch_size=hparams.batch_size, workers=hparams.workers, ) # Load validation query / gallery split and resort it according to indexes from sampler df_val = pd.read_csv(os.path.join(hparams.root, "train_val.csv")) df_val = df_val[df_val["is_train"].astype(np.bool) == False] val_is_query = df_val.is_query.values[val_indexes].astype(np.bool) logger.info(f"Start training") # Init runner runner = pt.fit_wrapper.Runner( model, optimizer, criterion=loss, callbacks=[ # pt_clb.BatchMetrics([pt.metrics.Accuracy(topk=1)]), ContestMetricsCallback( is_query=val_is_query[:1280] if hparams.debug else val_is_query ), pt_clb.Timer(), pt_clb.ConsoleLogger(), pt_clb.FileLogger(), TB_callback, CheckpointSaver(hparams.outdir, save_name="model.chpn", monitor="target", mode="max"), CheckpointSaver(hparams.outdir, save_name="model_mapr.chpn", monitor="mAP@R", mode="max"), CheckpointSaver(hparams.outdir, save_name="model_loss.chpn"), sheduler, # EMA must go after other checkpoints pt_clb.ModelEma(model, hparams.ema_decay) if hparams.ema_decay else pt_clb.Callback(), ], use_fp16=hparams. use_fp16, # use mixed precision by default. # hparams.opt_level != "O0", ) if hparams.head_warmup_epochs > 0: #Freeze model for p in model.parameters(): p.requires_grad = False runner.fit( train_loader, # val_loader=val_loader, epochs=hparams.head_warmup_epochs, steps_per_epoch=20 if hparams.debug else None, # val_steps=20 if hparams.debug else None, ) # Unfreeze model for p in model.parameters(): p.requires_grad = True if hparams.freeze_bn: freeze_batch_norm(model) # Re-init to avoid nan's in loss optim_params = list(loss.parameters()) + list(model.parameters()) optimizer = optimizer_from_name(hparams.optim)( optim_params, lr=0, weight_decay=hparams.weight_decay, amsgrad=True) runner.state.model = model runner.state.optimizer = optimizer runner.state.criterion = loss # Train runner.fit( train_loader, # val_loader=val_loader, start_epoch=hparams.head_warmup_epochs, epochs=sheduler.tot_epochs, steps_per_epoch=20 if hparams.debug else None, # val_steps=20 if hparams.debug else None, ) logger.info(f"Loading best model") checkpoint = torch.load(os.path.join(hparams.outdir, f"model.chpn")) model.load_state_dict(checkpoint["state_dict"], strict=True) # runner.state.model = model # loss.load_state_dict(checkpoint["loss"], strict=True) # Evaluate _, [acc1, map10, target, mapR] = runner.evaluate( val_loader, steps=20 if hparams.debug else None, ) logger.info( f"Val: Acc@1 {acc1:0.5f}, mAP@10 {map10:0.5f}, Target {target:0.5f}, mAP@R {mapR:0.5f}" ) # Save params used for training and final metrics into separate TensorBoard file metric_dict = { "hparam/Acc@1": acc1, "hparam/mAP@10": map10, "hparam/mAP@R": target, "hparam/Target": mapR, } # Convert all lists / dicts to avoid TB error hparams.phases hparams.phases = str(hparams.phases) hparams.model_params = str(hparams.model_params) hparams.criterion_params = str(hparams.criterion_params) with pt.utils.tensorboard.CorrectedSummaryWriter(hparams.outdir) as writer: writer.add_hparams(hparam_dict=vars(hparams), metric_dict=metric_dict)