def test_gnmt_quantization(_case_config):
    model = GNMT(vocab_size=32)
    model = replace_lstm(model)
    forward_fn_ = gnmt_forward_fn(seq_len=10, batch_size=3, vocab_size=32)

    config = get_basic_quantization_config(_case_config.quant_type, input_sample_size=(3, 10))
    config["compression"].update({
        "quantizable_subgraph_patterns": [["linear", "__add__"],
                                          ["sigmoid", "__mul__", "__add__"],
                                          ["__add__", "tanh", "__mul__"],
                                          ["sigmoid", "__mul__"]],
        "disable_function_quantization_hooks": True,
        "ignored_scopes": ["GNMT/ResidualRecurrentEncoder[encoder]/Embedding[embedder]",
                           "GNMT/ResidualRecurrentDecoder[decoder]/Embedding[embedder]"]})

    compressed_model = NNCFNetwork(model,
                                   input_infos=create_input_infos(config),
                                   dummy_forward_fn=forward_fn_,
                                   scopes_without_shape_matching=
                                   ['GNMT/ResidualRecurrentDecoder[decoder]/RecurrentAttention[att_rnn]/'
                                    'BahdanauAttention[attn]'])

    compression_algo_builder_list = create_compression_algorithm_builders(config)

    for builder in compression_algo_builder_list:
        compressed_model = builder.apply_to(compressed_model)
    _ = compressed_model.commit_compression_changes()
    check_model_graph(compressed_model, 'gnmt_variable.dot', _case_config.graph_dir)
Exemple #2
0
    def get_model_and_ctrl_with_applied_hw_config_quantization(model: torch.nn.Module, hw_config_dict: dict,
                                                               should_be_quantize_inputs: bool = True):
        nncf_config = get_quantization_config_without_range_init(model_size=1)
        nncf_config["compression"].update({"quantize_inputs": should_be_quantize_inputs})
        nncf_config["hw_config_type"] = "mock"

        net = NNCFNetwork(model, input_infos=[ModelInputInfo([1, 2, 1, 1])])
        hw_config = HWConfig.from_dict(hw_config_dict)
        qbuilder = QuantizationBuilder(nncf_config["compression"], should_init=False)
        qbuilder.quantizer_setup_type = QuantizerSetupType.PROPAGATION_BASED
        qbuilder.hw_config = hw_config
        net = qbuilder.apply_to(net)
        ctrl = net.commit_compression_changes()
        return net, ctrl
def create_test_quantization_env() -> QuantizationEnv:
    model = BasicConvTestModel()
    nncf_network = NNCFNetwork(model,
                               input_infos=[ModelInputInfo([1, 1, 4, 4])])
    hw_config_type = HWConfigType.VPU
    hw_config_path = HWConfig.get_path_to_hw_config(hw_config_type)
    hw_config = HWConfig.from_json(hw_config_path)
    setup = PropagationBasedQuantizerSetupGenerator(
        NNCFConfig(), nncf_network, hw_config=hw_config).generate_setup()
    experimental_builder = ExperimentalQuantizationBuilder(setup, {})
    experimental_builder.apply_to(nncf_network)
    # pylint:disable=line-too-long
    experimental_ctrl = nncf_network.commit_compression_changes(
    )  # type: ExperimentalQuantizationController
    data_loader = create_mock_dataloader({
        "sample_size": [1, 1, 4, 4],
    })
    constraints = HardwareQuantizationConstraints()
    for qid in experimental_ctrl.all_quantizations:
        qconf_constraint_list = []
        qconf = experimental_ctrl.all_quantizations[qid].get_current_config()
        bit_set = [8, 4, 2] if 'conv' in str(qid) else [8, 4]
        for bits in bit_set:
            adj_qconf = deepcopy(qconf)
            adj_qconf.bits = bits
            qconf_constraint_list.append(adj_qconf)
        constraints.add(qid, qconf_constraint_list)

    return QuantizationEnv(nncf_network,
                           experimental_ctrl,
                           constraints,
                           data_loader,
                           lambda *x: 0,
                           hw_config_type=HWConfigType.VPU,
                           params=QuantizationEnvParams(
                               compression_ratio=0.15,
                               eval_subset_ratio=1.0,
                               skip_constraint=False,
                               finetune=False,
                               bits=[2, 4, 8],
                               dump_init_precision_data=False))
def create_compressed_model(model: Module, config: NNCFConfig,
                            resuming_state_dict: dict = None,
                            dummy_forward_fn: Callable[[Module], Any] = None,
                            dump_graphs=True,) \
    -> Tuple[CompressionAlgorithmController, NNCFNetwork]:
    """
    The main function used to produce a model ready for compression fine-tuning from an original PyTorch
    model and a configuration object.
    dummy_forward_fn
    :param model: The original model. Should have its parameters already loaded from a checkpoint or another
    source.
    :param config: A configuration object used to determine the exact compression modifications to be applied
    to the model
    :param resuming_state_dict: A PyTorch state dict object to load (strictly) into the compressed model after
    building.
    :param dummy_forward_fn: will be used instead of a *forward* function call to build
    the internal graph representation via tracing. Specifying this is useful when the original training pipeline
    has special formats of data loader output or has additional *forward* arguments other than input tensors.
    Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according
    to the shape specified in the config object.
    :param dump_graphs: Whether or not should also dump the internal graph representation of the
    original and compressed models in the .dot format into the log directory.
    :return: A controller for the compression algorithm (or algorithms, in which case the controller
    is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped
    as an object of NNCFNetwork."""

    if dump_graphs:
        if dummy_forward_fn is None:
            input_info_list = create_input_infos(config)
            graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=True))
        else:
            graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn)

        if is_main_process():
            graph = graph_builder.build_graph(model)
            graph.dump_graph(osp.join(config.get("log_dir", "."),
                                      "original_graph.dot"),
                             extended=True)

    if is_debug():
        set_debug_log_dir(config.get("log_dir", "."))

    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching',
                                               [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    compressed_model = NNCFNetwork(
        model,
        input_infos=input_info_list,
        dummy_forward_fn=dummy_forward_fn,
        ignored_scopes=ignored_scopes,
        target_scopes=target_scopes,
        scopes_without_shape_matching=scopes_without_shape_matching)

    should_init = resuming_state_dict is None
    compression_algo_builder_list = create_compression_algorithm_builders(
        config, should_init=should_init)

    for builder in compression_algo_builder_list:
        compressed_model = builder.apply_to(compressed_model)
    compression_ctrl = compressed_model.commit_compression_changes()

    if dump_graphs and is_main_process() and compression_algo_builder_list:
        if dummy_forward_fn is None:
            compressed_graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=False))
        else:
            compressed_graph_builder = GraphBuilder(
                custom_forward_fn=dummy_forward_fn)

        graph = compressed_graph_builder.build_graph(
            compressed_model, compressed_model.get_tracing_context())
        graph.dump_graph(osp.join(config.get("log_dir", "."),
                                  "compressed_graph.dot"),
                         extended=True)

    if resuming_state_dict is not None:
        load_state(compressed_model, resuming_state_dict, is_resume=True)

    return compression_ctrl, compressed_model
Exemple #5
0
class TestInsertionCommands:
    @pytest.fixture()
    def setup(self):
        self.compressed_model = NNCFNetwork(
            InsertionPointTestModel(),
            [ModelInputInfo([1, 1, 10, 10])])  # type: NNCFNetwork

    conv1_module_scope = Scope.from_str(
        'InsertionPointTestModel/NNCFConv2d[conv1]')
    conv1_module_context = InputAgnosticOperationExecutionContext(
        '', conv1_module_scope, 0)
    point_for_conv1_weights = InsertionPoint(
        ia_op_exec_context=conv1_module_context,
        insertion_type=InsertionType.NNCF_MODULE_PRE_OP)
    point_for_conv1_inputs = InsertionPoint(
        ia_op_exec_context=conv1_module_context,
        insertion_type=InsertionType.NNCF_MODULE_PRE_OP)
    point_for_conv1_activations = InsertionPoint(
        ia_op_exec_context=conv1_module_context,
        insertion_type=InsertionType.NNCF_MODULE_POST_OP)

    conv2_module_scope = Scope.from_str(
        'InsertionPointTestModel/NNCFConv2d[conv2]')
    conv2_module_context = InputAgnosticOperationExecutionContext(
        '', conv2_module_scope, 0)
    point_for_conv2_weights = InsertionPoint(
        ia_op_exec_context=conv2_module_context,
        insertion_type=InsertionType.NNCF_MODULE_PRE_OP)
    point_for_conv2_inputs = InsertionPoint(
        ia_op_exec_context=conv2_module_context,
        insertion_type=InsertionType.NNCF_MODULE_PRE_OP)
    point_for_conv2_activations = InsertionPoint(
        ia_op_exec_context=conv2_module_context,
        insertion_type=InsertionType.NNCF_MODULE_POST_OP)

    linear_op_scope = Scope.from_str('InsertionPointTestModel/linear_0')
    linear_op_context = InputAgnosticOperationExecutionContext(
        'linear', linear_op_scope, 0)
    point_for_linear_weight_input = InsertionPoint(
        ia_op_exec_context=linear_op_context,
        insertion_type=InsertionType.OPERATOR_PRE_HOOK)
    point_for_linear_activation = InsertionPoint(
        ia_op_exec_context=linear_op_context,
        insertion_type=InsertionType.OPERATOR_POST_HOOK)

    relu_op_scope = Scope.from_str('InsertionPointTestModel/ReLU[relu]/relu')
    relu_op_context = InputAgnosticOperationExecutionContext(
        'relu', relu_op_scope, 0)
    point_for_relu_inputs = InsertionPoint(
        ia_op_exec_context=relu_op_context,
        insertion_type=InsertionType.OPERATOR_PRE_HOOK)
    point_for_relu_activations = InsertionPoint(
        ia_op_exec_context=relu_op_context,
        insertion_type=InsertionType.OPERATOR_POST_HOOK)

    available_points = [
        point_for_conv1_weights, point_for_conv2_weights,
        point_for_conv1_inputs, point_for_conv2_inputs,
        point_for_conv1_activations, point_for_conv2_activations,
        point_for_linear_activation, point_for_linear_weight_input,
        point_for_relu_activations, point_for_relu_inputs
    ]

    @pytest.mark.parametrize("insertion_point", available_points)
    def test_single_insertions(self, setup, insertion_point):
        if insertion_point.insertion_type in [
                InsertionType.OPERATOR_PRE_HOOK,
                InsertionType.OPERATOR_POST_HOOK
        ]:
            hook = lambda x: x
        else:
            hook = BaseOp(lambda x: x)

        command = InsertionCommand(insertion_point, hook)
        self.compressed_model.register_insertion_command(command)
        self.compressed_model.commit_compression_changes()

        #pylint:disable=protected-access
        if insertion_point.insertion_type == InsertionType.OPERATOR_PRE_HOOK:
            ctx = self.compressed_model.get_tracing_context()
            assert ctx._pre_hooks[
                command.insertion_point.ia_op_exec_context][0] is hook
        if insertion_point.insertion_type == InsertionType.OPERATOR_POST_HOOK:
            ctx = self.compressed_model.get_tracing_context()
            assert ctx._post_hooks[
                command.insertion_point.ia_op_exec_context][0] is hook
        if insertion_point.insertion_type == InsertionType.NNCF_MODULE_PRE_OP:
            module = self.compressed_model.get_module_by_scope(
                command.insertion_point.ia_op_exec_context.scope_in_model)
            assert module.pre_ops["0"] is hook

        if insertion_point.insertion_type == InsertionType.NNCF_MODULE_POST_OP:
            module = self.compressed_model.get_module_by_scope(
                command.insertion_point.ia_op_exec_context.scope_in_model)
            assert module.post_ops["0"] is hook

    priority_types = ["same", "different"]
    insertion_types = InsertionType
    priority_test_cases = list(
        itertools.product(priority_types, insertion_types))

    @staticmethod
    def check_order(iterable1: List, iterable2: List, ordering: List):
        for idx, order in enumerate(ordering):
            assert iterable1[idx] is iterable2[order]

    # pylint:disable=undefined-variable
    @pytest.mark.parametrize(
        "case",
        priority_test_cases,
        ids=[x[1].name + '-' + x[0] for x in priority_test_cases])
    def test_priority(self, case, setup):
        #pylint:disable=too-many-branches
        priority_type = case[0]
        insertion_type = case[1]
        if insertion_type in [
                InsertionType.NNCF_MODULE_PRE_OP,
                InsertionType.NNCF_MODULE_POST_OP
        ]:
            hook1 = BaseOp(lambda x: x)
            hook2 = BaseOp(lambda x: 2 * x)
            hook3 = BaseOp(lambda x: 3 * x)
        else:
            hook1 = lambda x: x
            hook2 = lambda x: 2 * x
            hook3 = lambda x: 3 * x

        if insertion_type == InsertionType.NNCF_MODULE_PRE_OP:
            point = self.point_for_conv2_weights
        elif insertion_type == InsertionType.NNCF_MODULE_POST_OP:
            point = self.point_for_conv1_activations
        elif insertion_type == InsertionType.OPERATOR_PRE_HOOK:
            point = self.point_for_linear_weight_input
        elif insertion_type == InsertionType.OPERATOR_POST_HOOK:
            point = self.point_for_relu_activations

        if priority_type == "same":
            # Same-priority commands will be executed in registration order
            command1 = InsertionCommand(point, hook1,
                                        OperationPriority.DEFAULT_PRIORITY)
            command2 = InsertionCommand(point, hook2,
                                        OperationPriority.DEFAULT_PRIORITY)
            command3 = InsertionCommand(point, hook3,
                                        OperationPriority.DEFAULT_PRIORITY)
        else:
            # Prioritized commands will be executed in ascending priority order
            command1 = InsertionCommand(
                point, hook1, OperationPriority.SPARSIFICATION_PRIORITY)
            command2 = InsertionCommand(
                point, hook2, OperationPriority.QUANTIZATION_PRIORITY)
            command3 = InsertionCommand(point, hook3,
                                        OperationPriority.DEFAULT_PRIORITY)

        self.compressed_model.register_insertion_command(command1)
        self.compressed_model.register_insertion_command(command2)
        self.compressed_model.register_insertion_command(command3)
        self.compressed_model.commit_compression_changes()

        hook_list = [hook1, hook2, hook3]

        if priority_type == "same":
            order = [0, 1, 2]
        elif priority_type == "different":
            order = [2, 0, 1]

        #pylint:disable=protected-access
        if insertion_type == InsertionType.OPERATOR_PRE_HOOK:
            ctx = self.compressed_model.get_tracing_context()
            self.check_order(ctx._pre_hooks[point.ia_op_exec_context],
                             hook_list, order)
        if insertion_type == InsertionType.OPERATOR_POST_HOOK:
            ctx = self.compressed_model.get_tracing_context()
            self.check_order(ctx._post_hooks[point.ia_op_exec_context],
                             hook_list, order)

        if insertion_type == InsertionType.NNCF_MODULE_PRE_OP:
            module = self.compressed_model.get_module_by_scope(
                point.ia_op_exec_context.scope_in_model)
            # Works because Pytorch ModuleDict is ordered
            self.check_order(list(module.pre_ops.values()), hook_list, order)

        if insertion_type == InsertionType.NNCF_MODULE_POST_OP:
            module = self.compressed_model.get_module_by_scope(
                point.ia_op_exec_context.scope_in_model)
            # Works because Pytorch ModuleDict is ordered
            self.check_order(list(module.post_ops.values()), hook_list, order)
Exemple #6
0
def create_compressed_model(model: Module, config: NNCFConfig,
                            resuming_state_dict: dict = None,
                            dummy_forward_fn: Callable[[Module], Any] = None,
                            wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None,
                            dump_graphs=True,) \
    -> Tuple[CompressionAlgorithmController, NNCFNetwork]:
    """
    The main function used to produce a model ready for compression fine-tuning from an original PyTorch
    model and a configuration object.
    dummy_forward_fn
    :param model: The original model. Should have its parameters already loaded from a checkpoint or another
    source.
    :param config: A configuration object used to determine the exact compression modifications to be applied
    to the model
    :param resuming_state_dict: A PyTorch state dict object to load (strictly) into the compressed model after
    building.
    :param dummy_forward_fn: if supplied, will be used instead of a *forward* function call to build
    the internal graph representation via tracing. Specifying this is useful when the original training pipeline
    has special formats of data loader output or has additional *forward* arguments other than input tensors.
    Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according
    to the shape specified in the config object.
    :param wrap_inputs_fn: if supplied, will be used on the module's input arguments during a regular, non-dummy
    forward call before passing the inputs to the underlying compressed model. This is required if the model's input
    tensors that are important for compression are not supplied as arguments to the model's forward call directly, but
    instead are located in a container (such as list), and the model receives the container as an argument.
    wrap_inputs_fn should take as input two arguments - the tuple of positional arguments to the underlying
    model's forward call, and a dict of keyword arguments to the same. The function should wrap each tensor among the
    supplied model's args and kwargs that is important for compression (e.g. quantization) with an nncf.nncf_model_input
    function, which is a no-operation function and marks the tensors as inputs to be traced by NNCF in the internal
    graph representation. Output is the tuple of (args, kwargs), where args and kwargs are the same as were supplied in
    input, but each tensor in the original input.
    :param dump_graphs: Whether or not should also dump the internal graph representation of the
    original and compressed models in the .dot format into the log directory.
    :return: A controller for the compression algorithm (or algorithms, in which case the controller
    is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped
    as an object of NNCFNetwork."""

    # Compress model that will be deployed for the inference on target device. No need to compress parts of the
    # model that are used on training stage only (e.g. AuxLogits of Inception-v3 model) or unused modules with weights.
    # As a consequence, no need to care about spoiling BN statistics, as there're disabled in eval mode.
    model.eval()

    if dump_graphs:
        if dummy_forward_fn is None:
            input_info_list = create_input_infos(config)
            graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=True))
        else:
            graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn)

        if is_main_process():
            graph = graph_builder.build_graph(model)
            graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "original_graph.dot"))

    set_debug_log_dir(config.get("log_dir", "."))

    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching',
                                               [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    compressed_model = NNCFNetwork(
        model,
        input_infos=input_info_list,
        dummy_forward_fn=dummy_forward_fn,
        wrap_inputs_fn=wrap_inputs_fn,
        ignored_scopes=ignored_scopes,
        target_scopes=target_scopes,
        scopes_without_shape_matching=scopes_without_shape_matching)

    should_init = resuming_state_dict is None
    compression_algo_builder_list = create_compression_algorithm_builders(
        config, should_init=should_init)

    for builder in compression_algo_builder_list:
        compressed_model = builder.apply_to(compressed_model)
    compression_ctrl = compressed_model.commit_compression_changes()

    try:
        if resuming_state_dict is not None:
            load_state(compressed_model, resuming_state_dict, is_resume=True)
    finally:
        if dump_graphs and is_main_process() and compression_algo_builder_list:
            if dummy_forward_fn is None:
                compressed_graph_builder = GraphBuilder(
                    custom_forward_fn=create_dummy_forward_fn(
                        input_info_list, with_input_tracing=False))
            else:
                compressed_graph_builder = GraphBuilder(
                    custom_forward_fn=dummy_forward_fn)

            graph = compressed_graph_builder.build_graph(
                compressed_model, compressed_model.get_tracing_context())
            graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "compressed_graph.dot"))
    return compression_ctrl, compressed_model