def main(): """Execute operation (train, test, time, etc.).""" args = parse_args() mode = args.mode config.load_cfg(args.cfg) cfg.merge_from_list(args.opts) config.assert_cfg() cfg.freeze() if mode == "info": print(builders.get_model()()) print("complexity:", net.complexity(builders.get_model())) elif mode == "train": dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.train_model) elif mode == "test": dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.test_model) elif mode == "time": dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.time_model) elif mode == "scale": cfg.defrost() cx_orig = net.complexity(builders.get_model()) scaler.scale_model() cx_scaled = net.complexity(builders.get_model()) cfg_file = config.dump_cfg() print("Scaled config dumped to:", cfg_file) print("Original model complexity:", cx_orig) print("Scaled model complexity:", cx_scaled)
def main(): config.load_cfg_fom_args("Scale a model.") config.assert_and_infer_cfg() cx_orig = net.complexity(builders.get_model()) scaler.scale_model() cx_scaled = net.complexity(builders.get_model()) cfg_file = config.dump_cfg() print("Scaled config dumped to:", cfg_file) print("Original model complexity:", cx_orig) print("Scaled model complexity:", cx_scaled)
def get_model_data(name, timings, errors): """Get model data for a single model.""" # Load model config reset_cfg() cfg.merge_from_file(model_zoo.get_config_file(name)) config_url, _, model_id, _, weight_url_full = model_zoo.get_model_info( name) # Get model complexity cx = net.complexity(builders.get_model()) # Inference time is measured in ms with a reference batch_size and num_gpus batch_size, num_gpus = 64, 1 reference = batch_size / cfg.TEST.BATCH_SIZE * cfg.NUM_GPUS / num_gpus infer_time = timings[name]["test_fw_time"] * reference * 1000 # Training time is measured in hours for 100 epochs over the ImageNet train set iterations = 1281167 / cfg.TRAIN.BATCH_SIZE * 100 train_time = timings[name]["train_fw_bw_time"] * iterations / 3600 # Gather all data about the model return { "config_url": "configs/" + config_url, "flops": round(cx["flops"] / 1e9, 1), "params": round(cx["params"] / 1e6, 1), "acts": round(cx["acts"] / 1e6, 1), "batch_size": cfg.TRAIN.BATCH_SIZE, "infer_time": round(infer_time), "train_time": round(train_time, 1), "error": round(errors[name]["top1_err"], 1), "model_id": model_id, "weight_url": weight_url_full, }
def test_complexity(self, cfg_file, cx_expected): """Test complexity of a single model with the specified config.""" cfg_init = cfg.clone() cfg.merge_from_file(cfg_file) cx = net.complexity(builders.get_model()) cfg.merge_from_other_cfg(cfg_init) self.assertEqual(cx_expected, cx)
def check_complexity_constraints(constraints): """Checks complexity constraints.""" cx, valid = None, True for p, v in constraints.CX.items(): p, min_v, max_v = p.lower(), v[0], v[1] if min_v != 0 or max_v != 0: cx = cx if cx else net.complexity(builders.get_model()) min_v = cx[p] if min_v == 0 else min_v max_v = cx[p] if max_v == 0 else max_v valid = valid and (min_v <= cx[p] <= max_v) return valid
def main(): config.load_cfg_fom_args("Train a classification model.") config.assert_and_infer_cfg() cfg.freeze() print("building model {}".format(cfg.MODEL.TYPE)) model = build_model() model.eval() x = torch.randn(1, 3, 224, 224) y = model(x) print(y.shape) model_complex = complexity(model) print(model_complex)
def dump_complexity(): """Measure the complexity of every model in the configs/ directory.""" complexity = {"date-created": str(datetime.datetime.now())} cfg_files = [os.path.join(r, f) for r, _, fs in os.walk("configs/") for f in fs] cfg_files = sorted(f for f in cfg_files if ".yaml" in f) for cfg_file in cfg_files: cfg_init = cfg.clone() cfg.merge_from_file(cfg_file) complexity[cfg_file] = net.complexity(builders.get_model()) cfg.merge_from_other_cfg(cfg_init) with open(_COMPLEXITY_FILE, "w") as file: json.dump(complexity, file, sort_keys=True, indent=4)
def setup_model(): """Sets up a model for training or testing and log the results.""" # Build the model model = builders.build_model() logger.info("Model:\n{}".format(model)) if cfg.VERBOSE else () # Log model complexity logger.info(logging.dump_log_data(net.complexity(model), "complexity")) # Transfer the model to the current GPU device err_str = "Cannot use more GPU devices than available" assert cfg.NUM_GPUS <= torch.cuda.device_count(), err_str cur_device = torch.cuda.current_device() model = model.cuda(device=cur_device) # Use multi-process data parallel model in the multi-gpu setting if cfg.NUM_GPUS > 1: # Make model replica operate on the current device ddp = torch.nn.parallel.DistributedDataParallel model = ddp(module=model, device_ids=[cur_device], output_device=cur_device) return model
def test_model(): """Evaluates the model.""" # Setup logging logging.setup_logging() # Show the config logger.info("Config:\n{}".format(cfg)) # Fix the RNG seeds (see RNG comment in core/config.py for discussion) np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Configure the CUDNN backend torch.backends.cudnn.benchmark = cfg.CUDNN.BENCHMARK # Build the model (before the loaders to speed up debugging) model = builders.build_model() logger.info("Model:\n{}".format(model)) logger.info(logging.dump_json_stats(net.complexity(model))) # Compute precise time if cfg.PREC_TIME.ENABLED: logger.info("Computing precise time...") loss_fun = builders.build_loss_fun() prec_time = net.compute_precise_time(model, loss_fun) logger.info(logging.dump_json_stats(prec_time)) net.reset_bn_stats(model) # Load model weights checkpoint.load_checkpoint(cfg.TEST.WEIGHTS, model) logger.info("Loaded model weights from: {}".format(cfg.TEST.WEIGHTS)) # Create data loaders test_loader = loader.construct_test_loader() # Create meters test_meter = meters.TestMeter(len(test_loader)) # Evaluate the model test_epoch(test_loader, model, test_meter, 0)
def setup_model(): """Sets up a model for training or testing and log the results.""" # Build the model model = builders.build_model() logger.info("Model:\n{}".format(model)) # Log model complexity logger.info(logging.dump_json_stats(net.complexity(model))) # Transfer the model to the current GPU device err_str = "Cannot use more GPU devices than available" assert cfg.NUM_GPUS <= torch.cuda.device_count(), err_str cur_device = torch.cuda.current_device() model = model.cuda(device=cur_device) # Use multi-process data parallel model in the multi-gpu setting if cfg.NUM_GPUS > 1: # Make model replica operate on the current device model = torch.nn.parallel.DistributedDataParallel( module=model, device_ids=[cur_device], output_device=cur_device, find_unused_parameters=True) # Set complexity function to be module's complexity function model.complexity = model.module.complexity return model
def setup_model(): """Sets up a model for training or testing and log the results.""" # Build the model model = builders.build_model() logger.info("Model:\n{}".format(model)) if cfg.VERBOSE else () # Log model complexity logger.info(logging.dump_log_data(net.complexity(model), "complexity")) # Transfer the model to the current GPU device err_str = "Cannot use more GPU devices than available" #assert cfg.NUM_GPUS <= torch.cuda.device_count(), err_str assert cfg.NUM_GPUS <= torch.npu.device_count(), err_str cur_device = torch.npu.current_device() model = model.to(cur_device) optimizer = optim.construct_optimizer(model) model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=128) if cfg.NUM_GPUS > 1: #Make model replica operate on the current device ddp = torch.nn.parallel.DistributedDataParallel model = ddp(model, device_ids=[cur_device], broadcast_buffers=False) return model, optimizer
def train_model(): """Trains the model.""" # Setup logging logging.setup_logging() # Show the config logger.info("Config:\n{}".format(cfg)) # Fix the RNG seeds (see RNG comment in core/config.py for discussion) np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Configure the CUDNN backend torch.backends.cudnn.benchmark = cfg.CUDNN.BENCHMARK # Build the model (before the loaders to speed up debugging) model = builders.build_model() logger.info("Model:\n{}".format(model)) logger.info(logging.dump_json_stats(net.complexity(model))) # Define the loss function loss_fun = builders.build_loss_fun() # Construct the optimizer optimizer = optim.construct_optimizer(model) # Load checkpoint or initial weights start_epoch = 0 if cfg.TRAIN.AUTO_RESUME and checkpoint.has_checkpoint(): last_checkpoint = checkpoint.get_last_checkpoint() checkpoint_epoch = checkpoint.load_checkpoint(last_checkpoint, model, optimizer) logger.info("Loaded checkpoint from: {}".format(last_checkpoint)) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.WEIGHTS: checkpoint.load_checkpoint(cfg.TRAIN.WEIGHTS, model) logger.info("Loaded initial weights from: {}".format( cfg.TRAIN.WEIGHTS)) # Compute precise time if start_epoch == 0 and cfg.PREC_TIME.ENABLED: logger.info("Computing precise time...") prec_time = net.compute_precise_time(model, loss_fun) logger.info(logging.dump_json_stats(prec_time)) net.reset_bn_stats(model) # Create data loaders train_loader = loader.construct_train_loader() test_loader = loader.construct_test_loader() # Create meters train_meter = meters.TrainMeter(len(train_loader)) test_meter = meters.TestMeter(len(test_loader)) # Perform the training loop logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.OPTIM.MAX_EPOCH): # Train for one epoch train_epoch(train_loader, model, loss_fun, optimizer, train_meter, cur_epoch) # Compute precise BN stats if cfg.BN.USE_PRECISE_STATS: net.compute_precise_bn_stats(model, train_loader) # Save a checkpoint if checkpoint.is_checkpoint_epoch(cur_epoch): checkpoint_file = checkpoint.save_checkpoint( model, optimizer, cur_epoch) logger.info("Wrote checkpoint to: {}".format(checkpoint_file)) # Evaluate the model if is_eval_epoch(cur_epoch): test_epoch(test_loader, model, test_meter, cur_epoch)
def test_complexity(key): """Measure the complexity of a single model.""" reset_cfg() cfg_file = os.path.join(_PYCLS_DIR, key) merge_from_file(cfg_file) return net.complexity(builders.get_model())