Esempio n. 1
0
  def deployable_model(self, src_dir, used_for_xmodel=False):
    if used_for_xmodel:
      device = torch.device('cpu')
      inputs = self._inputs.to(device)
    else:
      device = self._device
      inputs = self._inputs

    model = copy.deepcopy(self._model)
    model.load_state_dict(
        torch.load(os.path.join(src_dir, _DEPLOYABLE_MODEL_NAME)))
    qprocessor = qproc.TorchQuantProcessor(
        'test',
        model,
        inputs,
        output_dir=src_dir,
        bitwidth_w=self._bitwidth,
        bitwidth_a=self._bitwidth,
        mix_bit=self._mix_bit,
        device=device)
    self._qprocessor = qprocessor
    if used_for_xmodel:
      logging.info(
          'Forward the deployable model with data of batch_size=1 in cpu mode to dump xmodel.'
      )
    return qprocessor.quant_model()
Esempio n. 2
0
  def _to_deployable(self, trained_model, output_dir):
    if not self._quant_config or self._module_map is None:
      raise RuntimeError('Must call "trainable_model" first.')

    if hasattr(trained_model, 'conv_bn_fused') and getattr(
        trained_model, 'conv_bn_fused'):
      raise RuntimeError(
          'Not allowed to convert a fused model to a deployable model.')

    # Copy trained parameters from transformed model to original float model.
    orig_state_dict = self._model.state_dict()
    trained_state_dict = trained_model.state_dict()
    state_dict = {}
    for key in orig_state_dict:
      if '.' in key:
        module_name, weight_name = key.rsplit('.', 1)
      else:
        # Such as 'global_step'.
        module_name, weight_name = None, key
      if module_name in self._module_map:
        # Currently only for bn.
        # conv1.0.0.bn.weight -> conv1.0.1.weight
        trained_module_name = self._module_map[module_name]
        trained_key = '.'.join([trained_module_name, weight_name])
      else:
        trained_key = key
      state_dict[key] = trained_state_dict[trained_key]
      logging.vlog(3, 'state dict of {} is from {}'.format(key, trained_key))
    model = copy.deepcopy(self._model)
    model.load_state_dict(state_dict)
    model.eval()

    qprocessor = qproc.TorchQuantProcessor(
        'test',
        model,
        self._inputs,
        output_dir=self._tmp_qat_dir,
        bitwidth_w=self._bitwidth,
        bitwidth_a=self._bitwidth,
        mix_bit=self._mix_bit,
        device=self._device)

    quantizer = qprocessor.quantizer
    self._fill_in_quant_config(quantizer)

    sub_dir = os.path.join(output_dir, 'test')
    io_util.create_work_dir(sub_dir)
    # Must set adjust_pos=False first, because quantizer will modify its
    # quant info inplace when adjust_pos=True.
    # Export original (not adjusted yet) quant info for testing deployable
    # model and the accuracy should be the same with the trainable model.
    quantizer.export_quant_config(
        os.path.join(sub_dir, _QUANT_INFO_FILE_NAME), adjust_pos=False)
    quantizer.export_quant_config(
        os.path.join(output_dir, _QUANT_INFO_FILE_NAME), adjust_pos=True)

    self._qprocessor = qprocessor
    return model
Esempio n. 3
0
    def convert_to_deployable(self, trained_model, mix_bit=False):
        if not self._qinfo_to_quantizer or not self._module_map:
            raise RuntimeError('Must call "trainable_model" first.')

        # Copy trained parameters from transformed model to original float model.
        orig_state_dict = self._model.state_dict()
        trained_state_dict = trained_model.state_dict()
        state_dict = {}
        for key in orig_state_dict.keys():
            module_name, weight_name, = key.rsplit('.', 1)
            if module_name in self._module_map:
                trained_module_name = self._module_map[module_name]
                trained_key = '.'.join([trained_module_name, weight_name])
            else:
                trained_key = key
            state_dict[key] = trained_state_dict[trained_key]
        model = copy.deepcopy(self._model)
        model.load_state_dict(state_dict)
        model.eval()
        '''
    inputs = dummy_inputs(self._input_specs)
    qprocessor = qproc.TorchQuantProcessor(
        'test',
        model,
        [inp.cuda() for inp in inputs],
        mix_bit=mix_bit,
        device=torch.device('cuda'))
    '''
        inputs = self._input_args
        qprocessor = qproc.TorchQuantProcessor('test',
                                               model,
                                               inputs,
                                               mix_bit=mix_bit,
                                               device=torch.device('cuda'))

        quantizer = qprocessor.quantizer
        self._fill_in_quant_info(quantizer, self._qinfo_to_quantizer)
        quantizer.export_quant_config()

        quant_model = quantizer.quant_model
        quant_model.dump_xmodel = dump_xmodel
        self.deploy_quantizer = quantizer
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
        NndctScreenLogger().info(f"=>Deployable model is generated.")
Esempio n. 4
0
    def __init__(self, model, input_args, base_bit=8, mix_bit=False):
        self._model = model
        self._qinfo_to_quantizer = None

        # Original module name to transformed module name.
        # We can use it to convert the transformed model's state_dict keys
        # so that the original float model can load it.
        self._module_map = None
        '''
    if not isinstance(input_specs, (tuple, list)):
      input_specs = [input_specs]
    self._input_specs = input_specs
    '''
        self._input_args = input_args

        # turn off options optimization for following quantization
        option_util.set_option_value("nndct_quant_opt", 0)
        option_util.set_option_value("nndct_param_corr", False)
        option_util.set_option_value("nndct_equalization", False)

        #inputs = dummy_inputs(self._input_specs)
        inputs = self._input_args

        parser = parse.TorchParser()
        #graph = parser(self._model._get_name(), self._model, *inputs)
        graph = parser(self._model._get_name(), self._model, inputs)

        qprocessor = qproc.TorchQuantProcessor('calib',
                                               self._model,
                                               inputs,
                                               mix_bit=mix_bit,
                                               device=torch.device('cpu'))
        quantizer = qprocessor.quantizer

        # Use hard-coded value to fill in fp_pos and export quant config,
        # so that we can initialize a new TorchQuantProcessor in 'test' mode later.
        for _, group in quantizer.quant_config.items():
            for key in group:
                group[key][-1] = 4
        quantizer.export_quant_config()

        # Use quantizer's graph to build param_to_node as the quant_info is
        # generated from the quantizer's graph.
        # For example, the param 'ResNet::conv.bias' only exist in the quantizer's
        # graph because it comes from the fused conv + bias.
        param_to_node = {}
        for node in quantizer.Nndctgraph.nodes:
            for name, tensor in node.op.params.items():
                param_to_node[tensor.name] = node.name

        # Create quantizer modules and build qconfig for each node.
        node_to_qconfig = {}
        qinfo_to_quantizer = {}

        def get_num_bits(quant_info):
            return quant_info[0] if quant_info[0] == 8 else base_bit

        group_name = 'param'
        group = quantizer.quant_config[group_name]
        for param_name, info in group.items():
            # layer1.0.conv1.weight
            state_dict_key = get_short_name(param_name)
            node_name = param_to_node[param_name]
            qconfig = node_to_qconfig.get(node_name, QConfig())
            attr_name = state_dict_key.split('.')[-1]
            tqt_quantizer = TQTQuantizer(get_num_bits(info),
                                         tensor_type='param')
            setattr(qconfig, attr_name, tqt_quantizer)
            node_to_qconfig[node_name] = qconfig
            qinfo_to_quantizer[_quant_info_key(group_name,
                                               param_name)] = tqt_quantizer

        for group_name in ['input', 'output']:
            group = quantizer.quant_config[group_name]
            for node_name, info in group.items():
                qconfig = node_to_qconfig.get(node_name, QConfig())
                tqt_quantizer = TQTQuantizer(get_num_bits(info),
                                             tensor_type='blob')
                setattr(qconfig, group_name, tqt_quantizer)
                node_to_qconfig[node_name] = qconfig
                qinfo_to_quantizer[_quant_info_key(group_name,
                                                   node_name)] = tqt_quantizer
        self._qinfo_to_quantizer = qinfo_to_quantizer

        model_topo = ModelTopology()
        for node in graph.nodes:
            name = _topo_node_name(node)
            inputs = []
            for input_name in node.in_nodes:
                inputs.append(_topo_node_name(input_name))
            qconfig = node_to_qconfig.get(node.name, None)
            model_topo.add_node(TopoNode(name, qconfig, None, inputs, node.op))

        # TODO(yuwang): Output all transformed modules.
        transforms = [
            module_transform.FuseAndQuantizeConv2dBatchNorm(),
            module_transform.QuantizeLinear(),
            module_transform.ReplaceAdaptiveAvgPool2d(),
        ]

        transformer = ModuleTransformer(self._model, model_topo, transforms)
        model, self._module_map = transformer.transform()

        model.enable_quant = types.MethodType(enable_quant, model)
        model.disable_quant = types.MethodType(disable_quant, model)
        model.enable_warmup = types.MethodType(enable_warmup, model)
        model.disable_warmup = types.MethodType(disable_warmup, model)
        model.freeze_bn = types.MethodType(freeze_bn, model)
        insert_quantizer(model, node_to_qconfig)

        quantizer.quant_model = model
        self.quant_quantizer = quantizer
Esempio n. 5
0
  def __init__(self,
               model,
               inputs,
               bitwidth,
               mix_bit=False,
               device=torch.device("cuda")):

    if isinstance(model, torch.nn.DataParallel):
      raise ValueError('DataParallel object is not allowed.')

    # turn off options optimization for following quantization
    option_util.set_option_value("nndct_quant_opt", 0)
    option_util.set_option_value("nndct_param_corr", False)
    option_util.set_option_value("nndct_equalization", False)

    self._model = model
    self._inputs = inputs
    self._bitwidth = bitwidth
    self._mix_bit = mix_bit
    self._device = device

    # Original module name to transformed module name.
    # We can use it to convert the transformed model's state_dict keys
    # so that the original float model can load it.
    self._module_map = None

    self._trainable_model = None
    self._tmp_qat_dir = '.qat'

    qprocessor = qproc.TorchQuantProcessor(
        'calib',
        model,
        inputs,
        output_dir=self._tmp_qat_dir,
        bitwidth_w=self._bitwidth,
        bitwidth_a=self._bitwidth,
        mix_bit=mix_bit,
        device=device)
    quantizer = qprocessor.quantizer
    self._torch_quantizer = quantizer

    self._qinfo_keys = [
        TensorTypes.PARAM, TensorTypes.INPUT, TensorTypes.OUTPUT
    ]

    # Use hard-coded value to fill in fp_pos and export quant config,
    # so that we can initialize a new TorchQuantProcessor in 'test' mode later.
    quant_config = quantizer.quant_config
    for key, group in quant_config.items():
      if key not in self._qinfo_keys:
        continue
      for item in group:
        group[item][-1] = 4
    quantizer.export_quant_config(adjust_pos=False)

    # Use quantizer's graph to build param_to_node as the quant_info is
    # generated from the quantizer's graph.
    # For example, the param 'ResNet::conv.bias' only exist in the quantizer's
    # graph because it comes from the fused conv + bias.
    self._tensor_to_node = {}
    graph = quantizer.Nndctgraph
    for node in graph.nodes:
      for name, tensor in node.op.params.items():
        self._tensor_to_node[tensor.name] = (node.name, name)

    parser = parse.TorchParser()
    self._graph = parser(self._model._get_name(), self._model, self._inputs)

    quant_optimizer = QuantOptimizer()
    if NndctOption.nndct_partition_mode.value > 0:
      quant_optimizer._tag_quant_nodes_v2(self._raph)
    else:
      quant_optimizer._tag_quant_nodes(self._graph)

    def get_bitwidth(quant_info):
      return quant_info[0] if quant_info[0] == 8 else bitwidth

    # Create quantizer for each item in quant config.
    self._node_to_qconfig = {}
    self._quant_config = copy.deepcopy(quant_config)
    for name, group in self._quant_config.items():
      if name not in self._qinfo_keys:
        continue
      for key, qinfo in group.items():
        if name == TensorTypes.PARAM:
          node, param = self._tensor_to_node[key]
          attr = ModuleHooker._parameter_map[param]
          tensor_type = 'weight'
        else:
          node, attr = key, None
          tensor_type = 'act'

        tqt_quantizer = TQTQuantizer(get_bitwidth(qinfo), tensor_type)
        qconfig = self._node_to_qconfig.get(node, config_mod.LayerRuntimeSpec())
        if name == TensorTypes.PARAM:
          qconfig.add_weight_quantizer(attr, tqt_quantizer)
        elif name == TensorTypes.INPUT:
          qconfig.add_input_quantizer(tqt_quantizer)
        else:
          qconfig.add_output_quantizer(tqt_quantizer)

        self._node_to_qconfig[node] = qconfig
        self._quant_config[name][key] = (node, attr)
        logging.vlog(2, '[{}][{}] = ({}, {})'.format(name, key, node, attr))