Exemple #1
0
def load_model(model,
               pretrained=True,
               num_classes=1000,
               model_params=None,
               weights_path: str = None) -> torch.nn.Module:
    logger.info("Loading model: {}".format(model))
    if model_params is None:
        model_params = {}
    if model in torchvision.models.__dict__:
        load_model_fn = partial(torchvision.models.__dict__[model],
                                num_classes=num_classes,
                                pretrained=pretrained,
                                **model_params)
    elif model in custom_models.__dict__:
        load_model_fn = partial(custom_models.__dict__[model],
                                num_classes=num_classes,
                                pretrained=pretrained,
                                **model_params)
    else:
        raise Exception("Undefined model name")
    loaded_model = safe_thread_call(load_model_fn)
    if not pretrained and weights_path is not None:
        sd = torch.load(weights_path, map_location='cpu')
        load_state(loaded_model, sd, is_resume=False)
    return loaded_model
Exemple #2
0
def load_model(model,
               pretrained=True,
               num_classes=1000,
               model_params=None,
               weights_path: str = None) -> torch.nn.Module:
    """

       ** WARNING: This is implemented using torch.load functionality,
       which itself uses Python's pickling facilities that may be used to perform
       arbitrary code execution during unpickling. Only load the data you trust.

    """
    logger.info("Loading model: {}".format(model))
    if model_params is None:
        model_params = {}
    if model in torchvision.models.__dict__:
        load_model_fn = partial(torchvision.models.__dict__[model],
                                num_classes=num_classes,
                                pretrained=pretrained,
                                **model_params)
    elif model in custom_models.__dict__:
        load_model_fn = partial(custom_models.__dict__[model],
                                num_classes=num_classes,
                                pretrained=pretrained,
                                **model_params)
    else:
        raise Exception("Undefined model name")
    loaded_model = safe_thread_call(load_model_fn)
    if not pretrained and weights_path is not None:
        sd = torch.load(weights_path, map_location='cpu')
        load_state(loaded_model, sd, is_resume=False)
    return loaded_model
Exemple #3
0
def create_model(config):
    input_info_list = create_input_infos(config)
    image_size = input_info_list[0].shape[-1]
    ssd_net = build_ssd(config.model, config.ssd_params, image_size,
                        config.num_classes, config)
    compression_ctrl, ssd_net = create_compressed_model(ssd_net, config)
    weights = config.get('weights')
    if weights:
        sd = torch.load(weights, map_location='cpu')
        load_state(ssd_net, sd)
    ssd_net.train()
    model, _ = prepare_model_for_execution(ssd_net, config)
    return compression_ctrl, model
Exemple #4
0
def create_model(config: SampleConfig, resuming_model_sd: dict = None):
    input_info_list = create_input_infos(config.nncf_config)
    image_size = input_info_list[0].shape[-1]
    ssd_net = build_ssd(config.model, config.ssd_params, image_size, config.num_classes, config)
    weights = config.get('weights')
    if weights:
        sd = torch.load(weights, map_location='cpu')
        load_state(ssd_net, sd)

    ssd_net.to(config.device)

    compression_ctrl, compressed_model = create_compressed_model(ssd_net, config.nncf_config, resuming_model_sd)
    compressed_model, _ = prepare_model_for_execution(compressed_model, config)

    compressed_model.train()
    return compression_ctrl, compressed_model
def resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, compression_ctrl):
    best_acc1 = 0
    if osp.isfile(resuming_checkpoint):
        logger.info("=> loading checkpoint '{}'".format(resuming_checkpoint))
        checkpoint = torch.load(resuming_checkpoint, map_location='cpu')
        load_state(model, checkpoint['state_dict'], is_resume=True)
        if config.mode.lower() == 'train' and config.to_onnx is None:
            config.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            compression_ctrl.scheduler.load_state_dict(checkpoint['scheduler'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info("=> loaded checkpoint '{}' (epoch: {}, best_acc1: {:.3f})"
                        .format(resuming_checkpoint, checkpoint['epoch'], best_acc1))
        else:
            logger.info("=> loaded checkpoint '{}'".format(resuming_checkpoint))
    else:
        raise FileNotFoundError("no checkpoint found at '{}'".format(resuming_checkpoint))
    return model, config, optimizer, compression_ctrl, best_acc1
Exemple #6
0
def load_checkpoint(model, filename, map_location=None, strict=False):
    """Load checkpoint from a file or URI.

    Args:
        model (Module): Module to load checkpoint.
        filename (str): Either a filepath or URL or modelzoo://xxxxxxx.
        map_location (str): Same as :func:`torch.load`.
        strict (bool): Whether to allow different params for the model and
            checkpoint.

    Returns:
        dict or OrderedDict: The loaded checkpoint.
    """
    checkpoint = torch.load(filename, map_location=map_location)
    # get state_dict from checkpoint
    if isinstance(checkpoint, OrderedDict):
        state_dict = checkpoint
    elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
        state_dict = checkpoint['state_dict']
    else:
        raise RuntimeError(
            'No state_dict found in checkpoint file {}'.format(filename))
    _ = load_state(model, state_dict, strict)
    return checkpoint
Exemple #7
0
def main_worker(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    if is_main_process():
        configure_logging(logger, config)
        print_args(config)

    logger.info(config)

    config.device = get_device(config)
    dataset = get_dataset(config.dataset)
    color_encoding = dataset.color_encoding
    num_classes = len(color_encoding)

    if config.metrics_dump is not None:
        write_metrics(0, config.metrics_dump)

    weights = config.get('weights')
    model = load_model(config.model,
                       pretrained=config.get('pretrained', True)
                       if weights is None else False,
                       num_classes=num_classes,
                       model_params=config.get('model_params', {}))
    compression_ctrl, model = create_compressed_model(model, config)
    if weights:
        sd = torch.load(weights, map_location='cpu')
        load_state(model, sd)

    model, model_without_dp = prepare_model_for_execution(model, config)

    if config.distributed:
        compression_ctrl.distributed()

    resuming_checkpoint = config.resuming_checkpoint

    if resuming_checkpoint is not None:
        if not config.pretrained:
            # Load the previously saved model state
            model, _, _, _, _ = \
                load_checkpoint(model, resuming_checkpoint, config.device,
                                compression_scheduler=compression_ctrl.scheduler)

    if config.to_onnx is not None:
        compression_ctrl.export_model(config.to_onnx)
        logger.info("Saved to {}".format(config.to_onnx))
        return

    if config.mode.lower() == 'test':
        logger.info(model)
        model_parameters = filter(lambda p: p.requires_grad,
                                  model.parameters())
        params = sum([np.prod(p.size()) for p in model_parameters])
        logger.info("Trainable argument count:{params}".format(params=params))

        model = model.to(config.device)
        loaders, w_class = load_dataset(dataset, config)
        _, val_loader = loaders
        test(model, val_loader, w_class, color_encoding, config)
        print_statistics(compression_ctrl.statistics())
    elif config.mode.lower() == 'train':
        loaders, w_class = load_dataset(dataset, config)
        train_loader, val_loader = loaders
        if not resuming_checkpoint:
            compression_ctrl.initialize(train_loader)
        train(model, model_without_dp, compression_ctrl, train_loader,
              val_loader, w_class, color_encoding, config)
    else:
        # Should never happen...but just in case it does
        raise RuntimeError(
            "\"{0}\" is not a valid choice for execution mode.".format(
                config.mode))
Exemple #8
0
def main_worker(current_gpu, config):
    #################################
    # Setup experiment environment
    #################################
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)
    if is_on_first_rank(config):
        configure_logging(logger, config)
        print_args(config)

    config.device = get_device(config)
    config.start_iter = 0

    ##########################
    # Prepare metrics log file
    ##########################

    if config.metrics_dump is not None:
        write_metrics(0, config.metrics_dump)

    ##################
    # Prepare model
    ##################

    compression_ctrl, net = create_model(config)
    if config.distributed:
        config.batch_size //= config.ngpus_per_node
        config.workers //= config.ngpus_per_node
        compression_ctrl.distributed()

    ###########################
    # Criterion and optimizer
    ###########################

    params_to_optimize = get_parameter_groups(net, config)
    optimizer, lr_scheduler = make_optimizer(params_to_optimize, config)

    criterion = MultiBoxLoss(config,
                             config['num_classes'],
                             overlap_thresh=0.5,
                             prior_for_matching=True,
                             bkg_label=0,
                             neg_mining=True,
                             neg_pos=3,
                             neg_overlap=0.5,
                             encode_target=False,
                             device=config.device)

    ###########################
    # Load checkpoint
    ###########################

    resuming_checkpoint = config.resuming_checkpoint
    if resuming_checkpoint:
        logger.info(
            'Resuming training, loading {}...'.format(resuming_checkpoint))
        checkpoint = torch.load(resuming_checkpoint, map_location='cpu')
        # use checkpoint itself in case of only state dict is saved
        # i.e. checkpoint is created with `torch.save(module.state_dict())`
        state_dict = checkpoint.get('state_dict', checkpoint)
        load_state(net, state_dict, is_resume=True)
        if config.mode.lower() == 'train' and config.to_onnx is None:
            compression_ctrl.scheduler.load_state_dict(checkpoint['scheduler'])
            optimizer.load_state_dict(
                checkpoint.get('optimizer', optimizer.state_dict()))
            config.start_iter = checkpoint.get('iter', 0) + 1

    if config.to_onnx:
        compression_ctrl.export_model(config.to_onnx)
        logger.info("Saved to {}".format(config.to_onnx))
        return

    ###########################
    # Prepare data
    ###########################

    test_data_loader, train_data_loader = create_dataloaders(config)

    if config.mode.lower() == 'test':
        with torch.no_grad():
            print_statistics(compression_ctrl.statistics())
            net.eval()
            mAp = test_net(net,
                           config.device,
                           test_data_loader,
                           distributed=config.distributed)
            if config.metrics_dump is not None:
                write_metrics(mAp, config.metrics_dump)
            return

    if not resuming_checkpoint:
        compression_ctrl.initialize(train_data_loader)

    train(net, compression_ctrl, train_data_loader, test_data_loader,
          criterion, optimizer, config, lr_scheduler)
def main_worker(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    config.device = get_device(config)

    if is_main_process():
        configure_logging(logger, config)
        print_args(config)

    if config.seed is not None:
        manual_seed(config.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False

    # create model
    model_name = config['model']
    weights = config.get('weights')
    model = load_model(model_name,
                       pretrained=config.get('pretrained', True) if weights is None else False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'))
    compression_ctrl, model = create_compressed_model(model, config)
    if weights:
        load_state(model, torch.load(weights, map_location='cpu'))
    model, _ = prepare_model_for_execution(model, config)
    if config.distributed:
        compression_ctrl.distributed()

    is_inception = 'inception' in model_name

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(config.device)

    params_to_optimize = get_parameter_groups(model, config)
    optimizer, lr_scheduler = make_optimizer(params_to_optimize, config)

    resuming_checkpoint = config.resuming_checkpoint
    best_acc1 = 0
    # optionally resume from a checkpoint
    if resuming_checkpoint is not None:
        model, config, optimizer, compression_ctrl, best_acc1 = \
            resume_from_checkpoint(resuming_checkpoint, model,
                                   config, optimizer, compression_ctrl)

    if config.to_onnx is not None:
        compression_ctrl.export_model(config.to_onnx)
        logger.info("Saved to {}".format(config.to_onnx))
        return

    if config.execution_mode != ExecutionMode.CPU_ONLY:
        cudnn.benchmark = True

    # Data loading code
    train_dataset, val_dataset = create_datasets(config)
    train_loader, train_sampler, val_loader = create_data_loaders(config, train_dataset, val_dataset)

    if config.mode.lower() == 'test':
        print_statistics(compression_ctrl.statistics())
        validate(val_loader, model, criterion, config)

    if config.mode.lower() == 'train':
        if not resuming_checkpoint:
            compression_ctrl.initialize(data_loader=train_loader, criterion=criterion)
        train(config, compression_ctrl, model, criterion, is_inception, lr_scheduler, model_name, optimizer,
              train_loader, train_sampler, val_loader, best_acc1)