def test(self, key, out_expected): env.setup_env() print("\nTesting error of: {}".format(key)) out = test_error(key) print("expected = {}".format(out_expected)) print("measured = {}".format(out)) for k in out.keys(): self.assertAlmostEqual(out[k], out_expected[k], 2)
def time_model(): """Times model.""" # Setup training/testing environment env.setup_env() # Construct the model and loss_fun model = setup_model() loss_fun = builders.build_loss_fun().npu() # Compute model and loader timings benchmark.compute_time_model(model, loss_fun)
def time_model_and_loader(): """Times model and data loader.""" # Setup training/testing environment env.setup_env() # Construct the model and loss_fun model = setup_model() loss_fun = builders.build_loss_fun().npu() # Create data loaders train_loader = data_loader.construct_train_loader() test_loader = data_loader.construct_test_loader() # Compute model and loader timings benchmark.compute_time_full(model, loss_fun, train_loader, test_loader)
def test_model(): """Evaluates a trained model.""" # Setup training/testing environment env.setup_env() # Construct the model model, optimizer = setup_model() # Load model weights cp.load_checkpoint(cfg.TEST.WEIGHTS, model) logger.info("Loaded model weights from: {}".format(cfg.TEST.WEIGHTS)) # Create data loaders and meters test_loader = data_loader.construct_test_loader() test_meter = meters.TestMeter(len(test_loader)) # Evaluate the model test_epoch(test_loader, model, test_meter, 0)
def train_model(): """Trains the model.""" # Setup training/testing environment env.setup_env() # Construct the model, loss_fun, and optimizer model = setup_model() loss_fun = builders.build_loss_fun().cuda() optimizer = optim.construct_optimizer(model) # Load checkpoint or initial weights start_epoch = 0 if cfg.TRAIN.AUTO_RESUME and cp.has_checkpoint(): file = cp.get_last_checkpoint() epoch = cp.load_checkpoint(file, model, optimizer) logger.info("Loaded checkpoint from: {}".format(file)) start_epoch = epoch + 1 elif cfg.TRAIN.WEIGHTS: cp.load_checkpoint(cfg.TRAIN.WEIGHTS, model) logger.info("Loaded initial weights from: {}".format( cfg.TRAIN.WEIGHTS)) # Create data loaders and meters train_loader = data_loader.construct_train_loader() test_loader = data_loader.construct_test_loader() train_meter = meters.TrainMeter(len(train_loader)) test_meter = meters.TestMeter(len(test_loader)) # Create a GradScaler for mixed precision training scaler = amp.GradScaler(enabled=cfg.TRAIN.MIXED_PRECISION) # Compute model and loader timings if start_epoch == 0 and cfg.PREC_TIME.NUM_ITER > 0: benchmark.compute_time_full(model, loss_fun, train_loader, test_loader) # Perform the training loop logger.info("Start epoch: {}".format(start_epoch + 1)) best_err = np.inf for cur_epoch in range(start_epoch, cfg.OPTIM.MAX_EPOCH): # Train for one epoch params = (train_loader, model, loss_fun, optimizer, scaler, train_meter) train_epoch(*params, cur_epoch) # Compute precise BN stats if cfg.BN.USE_PRECISE_STATS: net.compute_precise_bn_stats(model, train_loader) # Evaluate the model test_epoch(test_loader, model, test_meter, cur_epoch) # Check if checkpoint is best so far (note: should checkpoint meters as well) stats = test_meter.get_epoch_stats(cur_epoch) best = stats["top1_err"] <= best_err best_err = min(stats["top1_err"], best_err) # Save a checkpoint file = cp.save_checkpoint(model, optimizer, cur_epoch, best) logger.info("Wrote checkpoint to: {}".format(file))