예제 #1
0
def test_load_state_interoperability(_algos, _model_wrapper, is_resume):
    config_save = get_empty_config()
    config_save['compression'] = [{
        'algorithm': algo
    } for algo in _algos['save_algos']]
    compressed_model_save, _ = create_compressed_model_and_algo_for_test(
        BasicConvTestModel(), config_save)
    model_save = _model_wrapper['save_model'](compressed_model_save)
    saved_model_state = model_save.state_dict()
    ref_num_loaded = len(saved_model_state)

    config_resume = get_empty_config()
    config_resume['compression'] = [{
        'algorithm': algo
    } for algo in _algos['load_algos']]
    compressed_model_resume, _ = create_compressed_model_and_algo_for_test(
        BasicConvTestModel(), config_resume)
    model_resume = _model_wrapper['resume_model'](compressed_model_resume)

    if not is_resume or (is_resume and _algos['is_resume_ok']):
        act_num_loaded = load_state(model_resume, saved_model_state, is_resume)

        if ('magnitude_sparsity' in _algos['load_algos'] or 'const_sparsity' in _algos['load_algos']) \
            and 'rb_sparsity' in _algos['save_algos']:
            # no need to load _mask and _uniform
            ref_num_loaded -= 2
        assert act_num_loaded == ref_num_loaded
    else:
        with pytest.raises(RuntimeError):
            load_state(model_resume, saved_model_state, is_resume)
예제 #2
0
    def test_scale_and_sign_init_for_quant_algo__after_load_state(
            self, wrap_dataloader):
        config = create_config()
        data_loader = self.create_dataloader(wrap_dataloader, config)
        config.register_extra_structs([QuantizationRangeInitArgs(data_loader)])
        _, compressed_model = self.create_algo_and_compressed_model(config)
        ref_loaded_scale_val = torch.ones((1, 1, 1, 1)) * 100
        load_state(
            compressed_model,
            {
                'module.features.0.0.pre_ops.0.op.signed_tensor':
                torch.tensor([0.]),  # quantizer of 1st conv's weights
                'module.features.1.0.pre_ops.0.op.scale':
                ref_loaded_scale_val  # quantizer of 2nd conv's weights
            })

        self.check_sign_and_scale(
            compressed_model, {
                '.*Sequential\\[0\\].*UpdateWeight.*':
                (False, torch.ones(2, 1, 1, 1)),
                '.*Sequential\\[1\\].*UpdateWeight. *':
                (True, ref_loaded_scale_val),
                '.*activation_quantizers.*Sequential\\[0\\].*': (True, 4),
                '.*activation_quantizers.*nncf_model_input*': (False, 1)
            })
예제 #3
0
def test_can_restore_binary_mask_on_magnitude_algo_resume():
    config = get_empty_config()
    config['compression'] = {
        "algorithm": "magnitude_sparsity",
        "params": {
            "weight_importance": "abs",
            "schedule": "multistep",
            "multistep_sparsity_levels": [0.3, 0.5]
        }
    }

    sparse_model, _ = create_compressed_model_and_algo_for_test(
        MagnitudeTestModel(), config)
    with torch.no_grad():
        sparse_model(torch.ones([1, 1, 10, 10]))

    config = get_empty_config()
    config["compression"] = {"algorithm": "const_sparsity"}
    const_sparse_model, _ = create_compressed_model_and_algo_for_test(
        MagnitudeTestModel(), config)

    load_state(const_sparse_model, sparse_model.state_dict())

    op = const_sparse_model.conv1.pre_ops['0']
    check_equal(ref_mask_1, op.operand.binary_mask)

    op = const_sparse_model.conv2.pre_ops['0']
    check_equal(ref_mask_2, op.operand.binary_mask)
예제 #4
0
def test_hawq_on_single_conv_without_quantizers(_seed, dataset_dir, tmp_path,
                                                params: HAWQTestParams):
    config = get_squeezenet_quantization_config(batch_size=params.batch_size)
    iter_number = params.iter_number
    tolerance = 4e-4

    model = squeezenet1_1(num_classes=10, dropout=0)
    from torchvision.models.squeezenet import model_urls
    load_state(model, model_zoo.load_url(model_urls['squeezenet1_1']))
    model = model.cuda()

    criterion = nn.CrossEntropyLoss().cuda()

    if not dataset_dir:
        dataset_dir = str(tmp_path)
    data_loader, _ = create_test_dataloaders(config, dataset_dir)
    device = next(model.parameters()).device

    for _, param in model.named_parameters():
        param.requires_grad = False
    first_conv = next(iter(get_all_modules_by_type(model, 'Conv2d').values()))
    first_conv.weight.requires_grad = True

    trace_estimator = HessianTraceEstimator(model, default_criterion_fn,
                                            criterion, device, data_loader,
                                            params.num_data_points)
    actual_state = trace_estimator.get_average_traces(max_iter=iter_number,
                                                      tolerance=tolerance)
    assert math.isclose(actual_state.item(), params.ref_trace, rel_tol=1e-09)
예제 #5
0
def test_model_can_be_loaded_with_resume(_params):
    p = _params
    sample_config_path = p['sample_config_path']
    checkpoint_path = p['checkpoint_path']

    config = SampleConfig.from_json(str(sample_config_path))
    nncf_config = NNCFConfig.from_json(str(sample_config_path))

    config.execution_mode = p['execution_mode']

    config.current_gpu = 0
    config.device = get_device(config)
    config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        config.dist_url = "tcp://127.0.0.1:9898"
        config.dist_backend = "nccl"
        config.rank = 0
        config.world_size = 1
        configure_distributed(config)

    model_name = config['model']
    model = load_model(model_name,
                       pretrained=False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'))

    model.to(config.device)
    model, compression_ctrl = create_compressed_model_and_algo_for_test(model, nncf_config)
    model, _ = prepare_model_for_execution(model, config)

    if config.distributed:
        compression_ctrl.distributed()

    checkpoint = torch.load(checkpoint_path, map_location='cpu')
    load_state(model, checkpoint['state_dict'], is_resume=True)
예제 #6
0
def test_hawq_precision_init(_seed, dataset_dir, tmp_path, mocker,
                             config_creator: Callable, filename_suffix: str):
    num_data_points = 100
    batch_size = 10
    config = config_creator(batch_size, num_data_points)
    model = MobileNetV2(num_classes=10)
    model.eval()

    criterion = nn.CrossEntropyLoss().cuda()
    if not dataset_dir:
        dataset_dir = str(tmp_path)
    train_loader, _ = create_test_dataloaders(config.get("model_size"),
                                              dataset_dir, batch_size)
    config = register_default_init_args(config, criterion, train_loader)

    mocked_trace = mocker.patch(
        'nncf.quantization.hessian_trace.HessianTraceEstimator.get_average_traces'
    )

    mock_avg_traces = get_mock_avg_traces(model)
    mocked_trace.return_value = mock_avg_traces
    from torchvision.models.mobilenet import model_urls
    load_state(model, model_zoo.load_url(model_urls['mobilenet_v2']))
    model, algo_ctrl = create_compressed_model_and_algo_for_test(model, config)
    model = model.cuda()

    all_quantizers_per_full_scope = get_all_quantizers_per_full_scope(model)
    graph = get_bitwidth_graph(algo_ctrl, model, all_quantizers_per_full_scope)

    path_to_dot = 'mobilenet_v2_mixed_bitwidth_graph_{}.dot'.format(
        filename_suffix)
    check_graph(graph,
                path_to_dot,
                os.path.join('quantized', 'hawq'),
                sort_dot_graph=False)
def test_hawq_precision_init(_seed, dataset_dir, tmp_path, mocker):
    num_data_points = 100
    batch_size = 10
    config = create_hawq_test_config(batch_size, num_data_points)
    model = squeezenet1_1_custom(num_classes=10, pretrained=False, dropout=0)

    model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)
    load_state(model, model_zoo.load_url(model_urls['squeezenet1_1']))
    model = model.cuda()
    device = next(model.parameters()).device

    criterion = nn.CrossEntropyLoss().cuda()

    if not dataset_dir:
        dataset_dir = str(tmp_path)
    train_loader, _ = create_test_dataloaders(config.model_size, dataset_dir, batch_size)
    mocked_trace = mocker.patch('nncf.quantization.hessian_trace.HessianTraceEstimator.get_average_traces')
    num_traces = len(get_all_modules_by_type(model, 'NNCFConv2d'))
    mock_avg_traces = [torch.Tensor([num_traces - i]).to(device) for i in range(num_traces)]
    mocked_trace.return_value = mock_avg_traces

    compression_ctrl.initialize(criterion=criterion, data_loader=train_loader)
    act_bitwidth_per_scope = get_bitwidth_per_scope(model)
    path_to_ref = str(TEST_ROOT / 'data/hawq_reference/squeezenet1_1_mixed_bitwidth_per_scope.json')
    compare_with_ref_if_exists(act_bitwidth_per_scope, path_to_ref)
예제 #8
0
def build_ssd_vgg(cfg, size, num_classes, config):
    ssd_vgg = SSD_VGG(cfg, size, num_classes, batch_norm=config.get('batchnorm', False))

    if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None):
        logger.debug('Loading base network...')
        basenet_weights = torch.load(config.basenet)
        new_weights = {}
        for wn, wv in basenet_weights.items():
            wn = wn.replace('features.', '')
            new_weights[wn] = wv

        load_state(ssd_vgg.basenet, new_weights, is_resume=False)
    return ssd_vgg
def build_ssd_mobilenet(cfg, size, num_classes, config):
    if size != 300:
        raise ValueError("Only Mobilenet-SSD with input size 300 is supported")
    mobilenet_ssd = MobileNetSSD(num_classes, cfg)

    if config.basenet and (config.resuming_checkpoint is None) and (config.weights is None):
        logger.debug('Loading base network...')
        basenet_weights = torch.load(config.basenet)['state_dict']
        new_weights = {}
        for wn, wv in basenet_weights.items():
            wn = wn.replace('model.', '')
            new_weights[wn] = wv

        load_state(mobilenet_ssd.basenet, new_weights, is_resume=False)
    return mobilenet_ssd
예제 #10
0
def load_torch_model(config, cuda=False):
    weights = config.get('weights')
    model = load_model(config.get('model'),
                       pretrained=config.get('pretrained', True)
                       if weights is None else False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params', {}))
    compression_ctrl, model = create_compressed_model(model, config)
    if weights:
        sd = torch.load(weights, map_location='cpu')
        load_state(model, sd)
    if cuda:
        model = model.cuda()
        model = torch.nn.DataParallel(model)
    print_statistics(compression_ctrl.statistics())
    return model
예제 #11
0
def test_load_state_sets_initialized_flag():
    config = get_basic_quantization_config()

    model = TwoConvTestModel()
    quant_model, _ = create_compressed_model_and_algo_for_test(model, config)

    load_state(quant_model, {
        'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor([1.0]),  # quantizer of 1st conv's weights
        'module.features.1.0.pre_ops.0.op.scale': torch.tensor([1.0])  # quantizer of 2nd conv's weights
    })

    quantizers = get_all_modules_by_type(quant_model, 'SymmetricQuantizer')
    for scope, module in quantizers.items():
        if 'activation_quantizers' in str(scope) or 'UpdateInputs' in str(scope):
            assert not module.initialized
        else:
            assert module.initialized
예제 #12
0
    def test_scale_and_sign_init_for_quant_algo__after_load_state(self, wrap_dataloader):
        config = self.create_config()
        algo, compressed_model = self.create_algo_and_compressed_model(config)
        load_state(compressed_model, {
            'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor([0.]),  # quantizer of 1st conv's weights
            'module.features.1.0.pre_ops.0.op.scale': torch.tensor([100])  # quantizer of 2nd conv's weights
        })

        device = next(compressed_model.parameters()).device
        data_loader = self.create_dataloader(wrap_dataloader, config, device)

        algo.initialize(data_loader)

        self.check_sign_and_scale(compressed_model, {
            '.*Sequential\\[0\\].*UpdateWeight.*': (False, 1),
            '.*Sequential\\[1\\].*UpdateWeight. *': (False, 100),
            '.*activation_quantizers.*Sequential\\[0\\].*': (True, 4),
            '.*activation_quantizers.*Sequential\\[1\\].*': (True, 24)
        })
예제 #13
0
def build_ssd_mobilenet(cfg, size, num_classes, config):
    if size != 300:
        raise ValueError("Only Mobilenet-SSD with input size 300 is supported")
    mobilenet_ssd = MobileNetSSD(num_classes, cfg)

    if config.basenet and (config.resuming_checkpoint_path is None) and (config.weights is None):
        logger.debug('Loading base network...')
        #
        # ** WARNING: torch.load functionality uses Python's pickling facilities that
        # may be used to perform arbitrary code execution during unpickling. Only load the data you
        # trust.
        #
        basenet_weights = torch.load(config.basenet)['state_dict']
        new_weights = {}
        for wn, wv in basenet_weights.items():
            wn = wn.replace('model.', '')
            new_weights[wn] = wv

        load_state(mobilenet_ssd.basenet, new_weights, is_resume=False)
    return mobilenet_ssd
예제 #14
0
def load_checkpoint(model, model_path, device_name, optimizer=None, compression_scheduler=None):
    """Loads the model from a specified directory with a specified name

    Keyword arguments:
    - model (``nn.Module``): The stored model state is copied to this model
    instance.
    - model_path: The model filename.
    - device_name: Device name for the model to be loaded into.
    - is_ddp: If true, model will be treated as a DistributedDataParallel instance
              and the actual model will be loaded into model.module
    - optimizer (``torch.optim``): The stored optimizer state is copied to this
    optimizer instance.
    - compression_ctrl: The compression scheduler for the saved state
                        to be loaded into

    Returns:
    The ``model``, ``optimizer``, epoch, mean IoU and ``compression_scheduler``, loaded from the
    checkpoint.

    """
    assert os.path.isfile(
        model_path), "The model file \"{0}\" doesn't exist.".format(model_path)

    # Load the stored model parameters to the model instance

    #
    # ** WARNING: torch.load functionality uses Python's pickling facilities that
    # may be used to perform arbitrary code execution during unpickling. Only load the data you
    # trust.
    #
    checkpoint = torch.load(model_path, map_location=device_name)
    load_state(model, checkpoint['state_dict'], is_resume=True)
    if optimizer is not None:
        optimizer.load_state_dict(checkpoint['optimizer'])
    epoch = checkpoint['epoch']
    miou = checkpoint['miou']

    if "scheduler" in checkpoint and compression_scheduler is not None:
        compression_scheduler.load_state_dict(checkpoint['scheduler'])

    return model, optimizer, epoch, miou, compression_scheduler
def resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, optimizer_scheduler, kd_loss_calculator,
                           compression_ctrl):
    best_acc1 = 0
    if osp.isfile(resuming_checkpoint):
        logger.info("=> loading checkpoint '{}'".format(resuming_checkpoint))
        checkpoint = torch.load(resuming_checkpoint, map_location='cpu')
        load_state(model, checkpoint['state_dict'], is_resume=True)
        if config.mode.lower() == 'train' and config.to_onnx is None:
            config.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            kd_loss_calculator.original_model.load_state_dict(checkpoint['original_model_state_dict'])
            compression_ctrl.scheduler.load_state_dict(checkpoint['compression_scheduler'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            optimizer_scheduler.load_state_dict(checkpoint['optimizer_scheduler'])
            logger.info("=> loaded checkpoint '{}' (epoch: {}, best_acc1: {:.3f})"
                        .format(resuming_checkpoint, checkpoint['epoch'], best_acc1))
        else:
            logger.info("=> loaded checkpoint '{}'".format(resuming_checkpoint))
    else:
        raise FileNotFoundError("no checkpoint found at '{}'".format(resuming_checkpoint))
    return model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_ctrl, best_acc1
예제 #16
0
파일: ssd_vgg.py 프로젝트: xiaming9880/nncf
def build_ssd_vgg(cfg, size, num_classes, config):
    ssd_vgg = SSD_VGG(cfg,
                      size,
                      num_classes,
                      batch_norm=config.get('batchnorm', False))

    if config.basenet and (config.resuming_checkpoint_path is
                           None) and (config.weights is None):
        print('Loading base network...')
        #
        # ** WARNING: torch.load functionality uses Python's pickling facilities that
        # may be used to perform arbitrary code execution during unpickling. Only load the data you
        # trust.
        #
        basenet_weights = torch.load(config.basenet)
        new_weights = {}
        for wn, wv in basenet_weights.items():
            wn = wn.replace('features.', '')
            new_weights[wn] = wv

        load_state(ssd_vgg.basenet, new_weights, is_resume=False)
    return ssd_vgg
예제 #17
0
def test_load_state_skips_not_matched_params__from_smaller_to_larger():
    ref_weights = torch.tensor([[[[3, 2], [2, 3]]]])
    ref_bias = torch.tensor([2.])
    model_save = BasicConvTestModel(out_channels=2)
    model_load = BasicConvTestModel(out_channels=1, weight_init=2, bias_init=2)

    num_loaded = load_state(model_load, model_save.state_dict())

    assert num_loaded == 0
    act_bias = model_load.conv.bias.data
    act_weights = model_load.conv.weight.data
    check_equal(act_bias, ref_bias)
    check_equal(act_weights, ref_weights)
예제 #18
0
def test_load_state_skips_not_matched_params__from_larger_to_smaller():
    ref_weights = BasicConvTestModel.default_weight()
    ref_bias = BasicConvTestModel.default_bias()
    model_save = BasicConvTestModel(out_channels=1, weight_init=2, bias_init=2)
    model_load = BasicConvTestModel(out_channels=2)

    num_loaded = load_state(model_load, model_save.state_dict())

    act_bias = model_load.conv.bias.data
    act_weights = model_load.conv.weight.data
    assert num_loaded == 0
    check_equal(act_bias, ref_bias)
    check_equal(act_weights, ref_weights)
예제 #19
0
def test_can_restore_binary_mask_on_magnitude_quant_algo_resume(tmp_path):
    config = get_empty_config()
    config["compression"] = [{
        "algorithm": "magnitude_sparsity",
        "params": {
            "schedule": "multistep",
            "multistep_sparsity_levels": [0.3, 0.5],
            "weight_importance": "abs"
        }
    }, {
        "algorithm": "quantization"
    }]

    sparse_model, _ = create_compressed_model_and_algo_for_test(
        MagnitudeTestModel(), config)

    # load_state doesn't support CPU + Quantization
    sparse_model = torch.nn.DataParallel(sparse_model)
    sparse_model.cuda()
    with torch.no_grad():
        sparse_model(torch.ones([1, 1, 10, 10]))

    config = get_empty_config()
    config["compression"] = [{
        "algorithm": "const_sparsity"
    }, {
        "algorithm": "quantization"
    }]
    const_sparse_model, _ = create_compressed_model_and_algo_for_test(
        MagnitudeTestModel(), config)

    load_state(const_sparse_model, sparse_model.state_dict())

    op = const_sparse_model.get_nncf_wrapped_model().conv1.pre_ops['0']
    check_equal(ref_mask_1, op.operand.binary_mask)

    op = const_sparse_model.get_nncf_wrapped_model().conv2.pre_ops['0']
    check_equal(ref_mask_2, op.operand.binary_mask)
예제 #20
0
def test_ordinary_load(algo, _model_wrapper, is_resume):
    config = get_empty_config()
    if algo:
        config['compression'] = {'algorithm': algo}

    compressed_model_save, _ = create_compressed_model_and_algo_for_test(BasicConvTestModel(), config)
    model_save = _model_wrapper['save_model'](compressed_model_save)

    compressed_model_resume, _ = create_compressed_model_and_algo_for_test(BasicConvTestModel(), config)
    model_resume = _model_wrapper['resume_model'](compressed_model_resume)

    num_loaded = load_state(model_resume, model_save.state_dict(), is_resume)

    assert num_loaded == len(model_save.state_dict())
예제 #21
0
def create_compressed_model(model: Module, config: NNCFConfig,
                            resuming_state_dict: dict = None,
                            dummy_forward_fn: Callable[[Module], Any] = None,
                            wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None,
                            dump_graphs=True,) \
    -> Tuple[CompressionAlgorithmController, NNCFNetwork]:
    """
    The main function used to produce a model ready for compression fine-tuning from an original PyTorch
    model and a configuration object.
    dummy_forward_fn
    :param model: The original model. Should have its parameters already loaded from a checkpoint or another
    source.
    :param config: A configuration object used to determine the exact compression modifications to be applied
    to the model
    :param resuming_state_dict: A PyTorch state dict object to load (strictly) into the compressed model after
    building.
    :param dummy_forward_fn: if supplied, will be used instead of a *forward* function call to build
    the internal graph representation via tracing. Specifying this is useful when the original training pipeline
    has special formats of data loader output or has additional *forward* arguments other than input tensors.
    Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according
    to the shape specified in the config object.
    :param wrap_inputs_fn: if supplied, will be used on the module's input arguments during a regular, non-dummy
    forward call before passing the inputs to the underlying compressed model. This is required if the model's input
    tensors that are important for compression are not supplied as arguments to the model's forward call directly, but
    instead are located in a container (such as list), and the model receives the container as an argument.
    wrap_inputs_fn should take as input two arguments - the tuple of positional arguments to the underlying
    model's forward call, and a dict of keyword arguments to the same. The function should wrap each tensor among the
    supplied model's args and kwargs that is important for compression (e.g. quantization) with an nncf.nncf_model_input
    function, which is a no-operation function and marks the tensors as inputs to be traced by NNCF in the internal
    graph representation. Output is the tuple of (args, kwargs), where args and kwargs are the same as were supplied in
    input, but each tensor in the original input.
    :param dump_graphs: Whether or not should also dump the internal graph representation of the
    original and compressed models in the .dot format into the log directory.
    :return: A controller for the compression algorithm (or algorithms, in which case the controller
    is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped
    as an object of NNCFNetwork."""

    # Compress model that will be deployed for the inference on target device. No need to compress parts of the
    # model that are used on training stage only (e.g. AuxLogits of Inception-v3 model) or unused modules with weights.
    # As a consequence, no need to care about spoiling BN statistics, as there're disabled in eval mode.
    model.eval()

    if dump_graphs:
        if dummy_forward_fn is None:
            input_info_list = create_input_infos(config)
            graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=True))
        else:
            graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn)

        if is_main_process():
            graph = graph_builder.build_graph(model)
            graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "original_graph.dot"))

    set_debug_log_dir(config.get("log_dir", "."))

    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching',
                                               [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    compressed_model = NNCFNetwork(
        model,
        input_infos=input_info_list,
        dummy_forward_fn=dummy_forward_fn,
        wrap_inputs_fn=wrap_inputs_fn,
        ignored_scopes=ignored_scopes,
        target_scopes=target_scopes,
        scopes_without_shape_matching=scopes_without_shape_matching)

    should_init = resuming_state_dict is None
    compression_algo_builder_list = create_compression_algorithm_builders(
        config, should_init=should_init)

    for builder in compression_algo_builder_list:
        compressed_model = builder.apply_to(compressed_model)
    compression_ctrl = compressed_model.commit_compression_changes()

    try:
        if resuming_state_dict is not None:
            load_state(compressed_model, resuming_state_dict, is_resume=True)
    finally:
        if dump_graphs and is_main_process() and compression_algo_builder_list:
            if dummy_forward_fn is None:
                compressed_graph_builder = GraphBuilder(
                    custom_forward_fn=create_dummy_forward_fn(
                        input_info_list, with_input_tracing=False))
            else:
                compressed_graph_builder = GraphBuilder(
                    custom_forward_fn=dummy_forward_fn)

            graph = compressed_graph_builder.build_graph(
                compressed_model, compressed_model.get_tracing_context())
            graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "compressed_graph.dot"))
    return compression_ctrl, compressed_model
def main_worker_binarization(current_gpu, config):
    config.current_gpu = current_gpu
    config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        configure_distributed(config)

    config.device = get_device(config)

    if is_main_process():
        configure_logging(logger, config)
        print_args(config)

    if config.seed is not None:
        manual_seed(config.seed)
        cudnn.deterministic = True
        cudnn.benchmark = False

    # create model
    model_name = config['model']
    weights = config.get('weights')
    model = load_model(model_name,
                       pretrained=config.get('pretrained', True) if weights is None else False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'))

    original_model = copy.deepcopy(model)
    compression_ctrl, model = create_compressed_model(model, config)
    if not isinstance(compression_ctrl, BinarizationController):
        raise RuntimeError("The binarization sample worker may only be run with the binarization algorithm!")

    if weights:
        load_state(model, torch.load(weights, map_location='cpu'))

    model, _ = prepare_model_for_execution(model, config)
    original_model.to(config.device)

    if config.distributed:
        compression_ctrl.distributed()

    is_inception = 'inception' in model_name

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(config.device)

    params_to_optimize = model.parameters()

    compression_config = config['compression']
    binarization_config = compression_config if isinstance(compression_config, dict) else compression_config[0]
    optimizer = get_binarization_optimizer(params_to_optimize, binarization_config)
    optimizer_scheduler = BinarizationOptimizerScheduler(optimizer, binarization_config)
    kd_loss_calculator = KDLossCalculator(original_model)

    resuming_checkpoint = config.resuming_checkpoint
    best_acc1 = 0
    # optionally resume from a checkpoint
    if resuming_checkpoint is not None:
        model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_ctrl, best_acc1 = \
            resume_from_checkpoint(resuming_checkpoint, model,
                                   config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_ctrl)

    if config.to_onnx is not None:
        compression_ctrl.export_model(config.to_onnx)
        logger.info("Saved to {}".format(config.to_onnx))
        return

    if config.execution_mode != ExecutionMode.CPU_ONLY:
        cudnn.benchmark = True

    # Data loading code
    train_dataset, val_dataset = create_datasets(config)
    train_loader, train_sampler, val_loader = create_data_loaders(config, train_dataset, val_dataset)

    if config.mode.lower() == 'test':
        print_statistics(compression_ctrl.statistics())
        validate(val_loader, model, criterion, config)

    if config.mode.lower() == 'train':
        if not resuming_checkpoint:
            compression_ctrl.initialize(data_loader=train_loader, criterion=criterion)

        batch_multiplier = (binarization_config.get("params", {})).get("batch_multiplier", 1)
        train_bin(config, compression_ctrl, model, criterion, is_inception, optimizer_scheduler, model_name, optimizer,
                  train_loader, train_sampler, val_loader, kd_loss_calculator, batch_multiplier, best_acc1)
예제 #23
0
def create_compressed_model(model: Module, config: NNCFConfig,
                            resuming_state_dict: dict = None,
                            dummy_forward_fn: Callable[[Module], Any] = None,
                            dump_graphs=True,) \
    -> Tuple[CompressionAlgorithmController, NNCFNetwork]:
    """
    The main function used to produce a model ready for compression fine-tuning from an original PyTorch
    model and a configuration object.
    dummy_forward_fn
    :param model: The original model. Should have its parameters already loaded from a checkpoint or another
    source.
    :param config: A configuration object used to determine the exact compression modifications to be applied
    to the model
    :param resuming_state_dict: A PyTorch state dict object to load (strictly) into the compressed model after
    building.
    :param dummy_forward_fn: will be used instead of a *forward* function call to build
    the internal graph representation via tracing. Specifying this is useful when the original training pipeline
    has special formats of data loader output or has additional *forward* arguments other than input tensors.
    Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according
    to the shape specified in the config object.
    :param dump_graphs: Whether or not should also dump the internal graph representation of the
    original and compressed models in the .dot format into the log directory.
    :return: A controller for the compression algorithm (or algorithms, in which case the controller
    is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped
    as an object of NNCFNetwork."""

    if dump_graphs:
        if dummy_forward_fn is None:
            input_info_list = create_input_infos(config)
            graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=True))
        else:
            graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn)

        if is_main_process():
            graph = graph_builder.build_graph(model)
            graph.dump_graph(osp.join(config.get("log_dir", "."),
                                      "original_graph.dot"),
                             extended=True)

    if is_debug():
        set_debug_log_dir(config.get("log_dir", "."))

    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching',
                                               [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    compressed_model = NNCFNetwork(
        model,
        input_infos=input_info_list,
        dummy_forward_fn=dummy_forward_fn,
        ignored_scopes=ignored_scopes,
        target_scopes=target_scopes,
        scopes_without_shape_matching=scopes_without_shape_matching)

    should_init = resuming_state_dict is None
    compression_algo_builder_list = create_compression_algorithm_builders(
        config, should_init=should_init)

    for builder in compression_algo_builder_list:
        compressed_model = builder.apply_to(compressed_model)
    compression_ctrl = compressed_model.commit_compression_changes()

    if dump_graphs and is_main_process() and compression_algo_builder_list:
        if dummy_forward_fn is None:
            compressed_graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=False))
        else:
            compressed_graph_builder = GraphBuilder(
                custom_forward_fn=dummy_forward_fn)

        graph = compressed_graph_builder.build_graph(
            compressed_model, compressed_model.get_tracing_context())
        graph.dump_graph(osp.join(config.get("log_dir", "."),
                                  "compressed_graph.dot"),
                         extended=True)

    if resuming_state_dict is not None:
        load_state(compressed_model, resuming_state_dict, is_resume=True)

    return compression_ctrl, compressed_model