def load_model(model, pretrained=True, num_classes=1000, model_params=None, weights_path: str = None) -> torch.nn.Module: logger.info("Loading model: {}".format(model)) if model_params is None: model_params = {} if model in torchvision.models.__dict__: load_model_fn = partial(torchvision.models.__dict__[model], num_classes=num_classes, pretrained=pretrained, **model_params) elif model in custom_models.__dict__: load_model_fn = partial(custom_models.__dict__[model], num_classes=num_classes, pretrained=pretrained, **model_params) else: raise Exception("Undefined model name") loaded_model = safe_thread_call(load_model_fn) if not pretrained and weights_path is not None: sd = torch.load(weights_path, map_location='cpu') load_state(loaded_model, sd, is_resume=False) return loaded_model
def load_model(model, pretrained=True, num_classes=1000, model_params=None, weights_path: str = None) -> torch.nn.Module: """ ** WARNING: This is implemented using torch.load functionality, which itself uses Python's pickling facilities that may be used to perform arbitrary code execution during unpickling. Only load the data you trust. """ logger.info("Loading model: {}".format(model)) if model_params is None: model_params = {} if model in torchvision.models.__dict__: load_model_fn = partial(torchvision.models.__dict__[model], num_classes=num_classes, pretrained=pretrained, **model_params) elif model in custom_models.__dict__: load_model_fn = partial(custom_models.__dict__[model], num_classes=num_classes, pretrained=pretrained, **model_params) else: raise Exception("Undefined model name") loaded_model = safe_thread_call(load_model_fn) if not pretrained and weights_path is not None: sd = torch.load(weights_path, map_location='cpu') load_state(loaded_model, sd, is_resume=False) return loaded_model
def create_model(config): input_info_list = create_input_infos(config) image_size = input_info_list[0].shape[-1] ssd_net = build_ssd(config.model, config.ssd_params, image_size, config.num_classes, config) compression_ctrl, ssd_net = create_compressed_model(ssd_net, config) weights = config.get('weights') if weights: sd = torch.load(weights, map_location='cpu') load_state(ssd_net, sd) ssd_net.train() model, _ = prepare_model_for_execution(ssd_net, config) return compression_ctrl, model
def create_model(config: SampleConfig, resuming_model_sd: dict = None): input_info_list = create_input_infos(config.nncf_config) image_size = input_info_list[0].shape[-1] ssd_net = build_ssd(config.model, config.ssd_params, image_size, config.num_classes, config) weights = config.get('weights') if weights: sd = torch.load(weights, map_location='cpu') load_state(ssd_net, sd) ssd_net.to(config.device) compression_ctrl, compressed_model = create_compressed_model(ssd_net, config.nncf_config, resuming_model_sd) compressed_model, _ = prepare_model_for_execution(compressed_model, config) compressed_model.train() return compression_ctrl, compressed_model
def resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, compression_ctrl): best_acc1 = 0 if osp.isfile(resuming_checkpoint): logger.info("=> loading checkpoint '{}'".format(resuming_checkpoint)) checkpoint = torch.load(resuming_checkpoint, map_location='cpu') load_state(model, checkpoint['state_dict'], is_resume=True) if config.mode.lower() == 'train' and config.to_onnx is None: config.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] compression_ctrl.scheduler.load_state_dict(checkpoint['scheduler']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch: {}, best_acc1: {:.3f})" .format(resuming_checkpoint, checkpoint['epoch'], best_acc1)) else: logger.info("=> loaded checkpoint '{}'".format(resuming_checkpoint)) else: raise FileNotFoundError("no checkpoint found at '{}'".format(resuming_checkpoint)) return model, config, optimizer, compression_ctrl, best_acc1
def load_checkpoint(model, filename, map_location=None, strict=False): """Load checkpoint from a file or URI. Args: model (Module): Module to load checkpoint. filename (str): Either a filepath or URL or modelzoo://xxxxxxx. map_location (str): Same as :func:`torch.load`. strict (bool): Whether to allow different params for the model and checkpoint. Returns: dict or OrderedDict: The loaded checkpoint. """ checkpoint = torch.load(filename, map_location=map_location) # get state_dict from checkpoint if isinstance(checkpoint, OrderedDict): state_dict = checkpoint elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] else: raise RuntimeError( 'No state_dict found in checkpoint file {}'.format(filename)) _ = load_state(model, state_dict, strict) return checkpoint
def main_worker(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) if is_main_process(): configure_logging(logger, config) print_args(config) logger.info(config) config.device = get_device(config) dataset = get_dataset(config.dataset) color_encoding = dataset.color_encoding num_classes = len(color_encoding) if config.metrics_dump is not None: write_metrics(0, config.metrics_dump) weights = config.get('weights') model = load_model(config.model, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=num_classes, model_params=config.get('model_params', {})) compression_ctrl, model = create_compressed_model(model, config) if weights: sd = torch.load(weights, map_location='cpu') load_state(model, sd) model, model_without_dp = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() resuming_checkpoint = config.resuming_checkpoint if resuming_checkpoint is not None: if not config.pretrained: # Load the previously saved model state model, _, _, _, _ = \ load_checkpoint(model, resuming_checkpoint, config.device, compression_scheduler=compression_ctrl.scheduler) if config.to_onnx is not None: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return if config.mode.lower() == 'test': logger.info(model) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) logger.info("Trainable argument count:{params}".format(params=params)) model = model.to(config.device) loaders, w_class = load_dataset(dataset, config) _, val_loader = loaders test(model, val_loader, w_class, color_encoding, config) print_statistics(compression_ctrl.statistics()) elif config.mode.lower() == 'train': loaders, w_class = load_dataset(dataset, config) train_loader, val_loader = loaders if not resuming_checkpoint: compression_ctrl.initialize(train_loader) train(model, model_without_dp, compression_ctrl, train_loader, val_loader, w_class, color_encoding, config) else: # Should never happen...but just in case it does raise RuntimeError( "\"{0}\" is not a valid choice for execution mode.".format( config.mode))
def main_worker(current_gpu, config): ################################# # Setup experiment environment ################################# config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) if is_on_first_rank(config): configure_logging(logger, config) print_args(config) config.device = get_device(config) config.start_iter = 0 ########################## # Prepare metrics log file ########################## if config.metrics_dump is not None: write_metrics(0, config.metrics_dump) ################## # Prepare model ################## compression_ctrl, net = create_model(config) if config.distributed: config.batch_size //= config.ngpus_per_node config.workers //= config.ngpus_per_node compression_ctrl.distributed() ########################### # Criterion and optimizer ########################### params_to_optimize = get_parameter_groups(net, config) optimizer, lr_scheduler = make_optimizer(params_to_optimize, config) criterion = MultiBoxLoss(config, config['num_classes'], overlap_thresh=0.5, prior_for_matching=True, bkg_label=0, neg_mining=True, neg_pos=3, neg_overlap=0.5, encode_target=False, device=config.device) ########################### # Load checkpoint ########################### resuming_checkpoint = config.resuming_checkpoint if resuming_checkpoint: logger.info( 'Resuming training, loading {}...'.format(resuming_checkpoint)) checkpoint = torch.load(resuming_checkpoint, map_location='cpu') # use checkpoint itself in case of only state dict is saved # i.e. checkpoint is created with `torch.save(module.state_dict())` state_dict = checkpoint.get('state_dict', checkpoint) load_state(net, state_dict, is_resume=True) if config.mode.lower() == 'train' and config.to_onnx is None: compression_ctrl.scheduler.load_state_dict(checkpoint['scheduler']) optimizer.load_state_dict( checkpoint.get('optimizer', optimizer.state_dict())) config.start_iter = checkpoint.get('iter', 0) + 1 if config.to_onnx: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return ########################### # Prepare data ########################### test_data_loader, train_data_loader = create_dataloaders(config) if config.mode.lower() == 'test': with torch.no_grad(): print_statistics(compression_ctrl.statistics()) net.eval() mAp = test_net(net, config.device, test_data_loader, distributed=config.distributed) if config.metrics_dump is not None: write_metrics(mAp, config.metrics_dump) return if not resuming_checkpoint: compression_ctrl.initialize(train_data_loader) train(net, compression_ctrl, train_data_loader, test_data_loader, criterion, optimizer, config, lr_scheduler)
def main_worker(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if is_main_process(): configure_logging(logger, config) print_args(config) if config.seed is not None: manual_seed(config.seed) cudnn.deterministic = True cudnn.benchmark = False # create model model_name = config['model'] weights = config.get('weights') model = load_model(model_name, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) compression_ctrl, model = create_compressed_model(model, config) if weights: load_state(model, torch.load(weights, map_location='cpu')) model, _ = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() is_inception = 'inception' in model_name # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) params_to_optimize = get_parameter_groups(model, config) optimizer, lr_scheduler = make_optimizer(params_to_optimize, config) resuming_checkpoint = config.resuming_checkpoint best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint is not None: model, config, optimizer, compression_ctrl, best_acc1 = \ resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, compression_ctrl) if config.to_onnx is not None: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True # Data loading code train_dataset, val_dataset = create_datasets(config) train_loader, train_sampler, val_loader = create_data_loaders(config, train_dataset, val_dataset) if config.mode.lower() == 'test': print_statistics(compression_ctrl.statistics()) validate(val_loader, model, criterion, config) if config.mode.lower() == 'train': if not resuming_checkpoint: compression_ctrl.initialize(data_loader=train_loader, criterion=criterion) train(config, compression_ctrl, model, criterion, is_inception, lr_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, best_acc1)