def do_validate(conf, model, optimizer, criterion, scheduler, metrics, data_loader): """Evaluate the model on the test dataset and save to the checkpoint.""" # wait until the whole group enters this function, and then evaluate. print("Enter validation phase.") performance = validate(conf, model, optimizer, criterion, scheduler, metrics, data_loader) # remember best performance and display the val info. scheduler.best_tracker.update(performance[0], scheduler.epoch_) dispaly_best_test_stat(conf, scheduler) # save to the checkpoint. if not conf.train_fast: save_to_checkpoint( conf, { "arch": conf.arch, "current_epoch": scheduler.epoch, "local_index": scheduler.local_index, "best_perf": scheduler.best_tracker.best_perf, "optimizer": optimizer.state_dict(), "state_dict": model.state_dict(), }, scheduler.best_tracker.is_best, dirname=conf.checkpoint_dir, filename="checkpoint.pth.tar", save_all=conf.save_all_models, ) print("Finished validation.")
def do_validation( conf, coordinator, model, criterion, metrics, data_loaders, performance=None, label=None, ): """Evaluate the model on the test dataset and save to the checkpoint.""" # wait until the whole group enters this function, and then evaluate. conf.logger.log(f"Master enters the validation phase.") if performance is None: performance = get_avg_perf_on_dataloaders( conf, coordinator, model, criterion, metrics, data_loaders, label ) # remember best performance and display the val info. coordinator.update_perf(performance) dispaly_best_test_stat(conf, coordinator) # save to the checkpoint. conf.logger.log(f"Master finished the validation.") if not conf.train_fast: checkpoint.save_to_checkpoint( conf, { "arch": conf.arch, "current_comm_round": conf.graph.comm_round, "best_perf": coordinator.best_trackers["top1"].best_perf, "state_dict": model.state_dict(), }, coordinator.best_trackers["top1"].is_best, dirname=conf.checkpoint_root, filename="checkpoint.pth.tar", save_all=conf.save_all_models, ) conf.logger.log(f"Master saved to checkpoint.")