Esempio n. 1
0
  def _init_quant_env(self, quant_mode, output_dir, quant_strategy):
    if isinstance(quant_mode, int):
      NndctScreenLogger().warning(f"quant_mode will not support integer value in future version. It supports string values 'calib' and 'test'.")
      qmode = quant_mode
    elif isinstance(quant_mode, str):
      if quant_mode == 'calib':
        qmode = 1
      elif quant_mode == 'test':
        qmode = 2
      else:
        NndctScreenLogger().error(f"quant_mode supported values are 'calib' and 'test'. Change it to 'calib' as calibration mode")
        qmode = 1
    else:
      NndctScreenLogger().error(f"quant_mode supported values are string 'calib' and 'test'. Change it to 'calib' as calibration mode")
      qmode = 1

    if NndctOption.nndct_quant_mode.value > 0:
      qmode = NndctOption.nndct_quant_mode.value
    
    if qmode == 1:
      NndctScreenLogger().info(f"Quantization calibration process start up...")
    elif qmode == 2:
      NndctScreenLogger().info(f"Quantization test process start up...")
      
    quantizer = TORCHQuantizer.create_from_strategy(qmode, 
                                                    output_dir, 
                                                    quant_strategy)
    return quantizer, qmode
Esempio n. 2
0
 def deploy_model(self):
     if not self._qat_proc:
         NndctScreenLogger().warning(
             f"Only quant aware training process has deployable model.")
         return
     NndctScreenLogger().info(f"=>Get deployable module.")
     return self.processor.deploy_model()
Esempio n. 3
0
    def build_torch_graph(self, graph_name, module, input_args, train=False):
        self._module = module
        NndctScreenLogger().info("Start to trace model...")
        fw_graph, params = self._trace_graph_from_model(input_args, train)
        NndctScreenLogger().info("Finish tracing.")

        self._node_kinds = {
            node.kind().split(":")[-1]
            for node in fw_graph.nodes()
        }
        if NndctOption.nndct_parse_debug.value >= 1:
            NndctDebugLogger.write(f"jit graph:\n{fw_graph}")
            NndctDebugLogger.write(
                f"\nparsing nodes types:\n{self._node_kinds}\n")

        raw_graph, raw_params = self._build_raw_graph(graph_name, fw_graph,
                                                      params)
        if NndctOption.nndct_parse_debug.value >= 2:
            NndctDebugLogger.write(f"\ntorch raw graph:\n{raw_graph}")
        opt_graph = self._opt_raw_graph(raw_graph)
        if NndctOption.nndct_parse_debug.value >= 2:
            NndctDebugLogger.write(f"\ntorch opt graph:\n{raw_graph}")

        if NndctOption.nndct_parse_debug.value >= 3:
            self._check_stub_topology(opt_graph)

        return opt_graph, raw_params
Esempio n. 4
0
    def _check_calibration_completion(self):
        ret = True
        # Check node output tensors
        for node in self.Nndctgraph.nodes:
            if self.configer.is_node_quantizable(
                    node, self.lstm) and node.in_quant_part:
                qout = self.configer.quant_output(node.name).name
                bnfp = self.get_quant_config(qout, False)
                if bnfp[1] is None:
                    if node.op.type not in [NNDCT_OP.SIGMOID, NNDCT_OP.TANH]:
                        NndctScreenLogger().warning(
                            f'Node ouptut tensor is not quantized: {node.name} type: {node.op.type}'
                        )
                        ret = False
        # Check node input tensors
        for item in self._QuantInfo['input']:
            bnfp = self._QuantInfo['input'][item]
            if bnfp[1] is None:
                NndctScreenLogger().warning(
                    f'Input tensor is not quantized: {item}')
                ret = False
        # Check node parameters
        for item in self._QuantInfo['param']:
            bnfp = self._QuantInfo['param'][item]
            if bnfp[1] is None:
                NndctScreenLogger().warning(
                    f'Parameter tensor is not quantized: {item}')
                ret = False

        return ret
Esempio n. 5
0
    def deploy(self, run_fn, run_args, fmt='xmodel'):
        NndctScreenLogger().info(f'Quantized model depoyment begin:')
        # export quantized model
        # how to handle batch size must be 1
        # check function input
        if fmt not in ['xmodel', 'onnx', 'torch_script']:
            NndctScreenLogger().error(
                f"Parameter deploy only can be set 'xmodel', 'onnx' and 'torch_script'."
            )

        # set quantizer status and run simple evaluation
        self.quantizer.quant_mode = 2
        register_output_hook(self.quantizer.quant_model, record_once=True)
        set_outputs_recorder_status(self.quantizer.quant_model, True)
        if self.quantizer.fast_finetuned:
            self.advanced_quant_setup()
        run_fn(*run_args)

        # export quantized model
        if fmt == 'xmodel':
            self.export_xmodel(self.quantizer.output_dir, deploy_check=False)
        elif fmt == 'onnx':
            self.export_onnx_model(self.quantizer.output_dir, verbose=True)
        elif fmt == 'torch_script':
            self.export_traced_torch_script(self.quantizer.output_dir,
                                            verbose=True)
        NndctScreenLogger().info(f'Quantized model depoyment end.')
Esempio n. 6
0
def prepare_quantizable_module(
    module: torch.nn.Module,
    input_args: Union[torch.Tensor, Sequence[Any]],
    export_folder: str,
    state_dict_file: Optional[str] = None,
    quant_mode: int = 1,
    device: torch.device = torch.device("cuda")
) -> Tuple[torch.nn.Module, Graph]:

    nndct_utils.create_work_dir(export_folder)

    if isinstance(state_dict_file, str):
        state_dict = torch.load(state_dict_file)
        module.load_state_dict(state_dict)

    export_file = os.path.join(export_folder,
                               module._get_name() + TorchSymbol.SCRIPT_SUFFIX)

    # switch to specified device
    module, input_args = to_device(module, input_args, device)

    # parse origin module to graph
    NndctScreenLogger().info(f"=>Parsing {module._get_name()}...")
    graph = parse_module(module, input_args)
    NndctScreenLogger().info(
        f"=>Quantizable module is generated.({export_file})")
    # recreate quantizable module from graph
    quant_module = recreate_nndct_module(graph, True, export_file).to(device)
    quant_module.train(mode=module.training)
    # hook module with graph
    connect_module_with_graph(quant_module, graph)

    return quant_module, graph
Esempio n. 7
0
    def cache_net_inpouts(self, run_fn, run_args):
        total_m, *_, available_m = list(
            map(lambda x: x / 1024,
                map(int,
                    os.popen('free -t -m').readlines()[1].split()[1:])))
        NndctScreenLogger().info(
            f"Mem status(total mem: {total_m:.2f}G, available mem: {available_m:.2f}G)."
        )
        cache_layers = []
        monitor_layers = []
        batch_layers = []
        for node in self.graph.nodes:
            # if node.op.type == NNDCT_OP.INPUT or node in end_nodes:
            if node.op.type == NNDCT_OP.INPUT or node in self._last_quant_nodes:
                cache_layers.append(node.module)
            elif self.quantizer.configer.is_conv_like(node):
                monitor_layers.append(node.module)
                if not batch_layers:
                    batch_layers.append(node.module)

        monitor_handlers = self.hook_memory_monitor(monitor_layers)
        batch_handlers = self.hook_batch_size(batch_layers)
        cache_handlers = self.hook_cache_output(cache_layers, monitor_mem=True)
        with torch.no_grad():
            run_fn(*run_args)
            # memory statistics
        total_memory_cost = 0.0
        for layer in cache_layers:
            total_memory_cost += self._mem_count[layer]
            del self._mem_count[layer]

        NndctScreenLogger().info(
            f"Memory cost by fast finetuning is {total_memory_cost:.2f} G.")
        if total_memory_cost > 0.8 * available_m:
            NndctScreenLogger().warning(
                f"There is not enought memory for fast finetuning and this process will be ignored!.Try to use a smaller calibration dataset."
            )
            return
        self.clean_hooks(monitor_handlers + cache_handlers + batch_handlers)
        net_inputs = []
        for node in self.input_nodes:
            cached_net_input = [
                out for out in self.cached_outputs[node.module]
            ]
            net_inputs.append(cached_net_input)
            del self.cached_outputs[node.module]

        net_outputs = {}
        for node in self._last_quant_nodes:
            cached_net_output = [
                out for out in self.cached_outputs[node.module]
            ]
            net_outputs[node.module] = cached_net_output
            del self.cached_outputs[node.module]

        torch.cuda.empty_cache()
        return net_inputs, net_outputs
Esempio n. 8
0
    def features_check(self):
        if self.fast_finetuned and not self._finetuned_para_loaded:
            NndctScreenLogger().warning(
                f'Fast finetuned parameters are not loaded. \
Call load_ft_param to load them.')
        if self.bias_corrected and not self._bias_corr_loaded:
            NndctScreenLogger().warning(
                f'Bias correction file is not loaded. Set \
command line option \"--nndct_param_corr\" to load it.')
Esempio n. 9
0
 def quantize(self, run_fn, run_args, ft_run_args):
     NndctScreenLogger().info(f'Model quantization calibration begin:')
     # calibration
     self.quantizer.quant_mode = 1
     if ft_run_args is not None:
         self.finetune(run_fn, ft_run_args)
         self.quantizer.fast_finetuned = True
     run_fn(*run_args)
     self.quantizer.export_quant_config()
     NndctScreenLogger().info(f'Model quantization calibration end.')
Esempio n. 10
0
 def test(self, run_fn, run_args):
     NndctScreenLogger().info(f'Quantized model test begin:')
     # test and print log message
     self.quantizer.quant_mode = 2
     if self.quantizer.fast_finetuned:
         self.advanced_quant_setup()
     log_str = run_fn(*run_args)
     NndctScreenLogger().info(
         f'Quantized model evaluation returns metric:\n {log_str}')
     NndctScreenLogger().info(f'Quantized model end.')
Esempio n. 11
0
    def _show_partition_result_on_screen(self, graph, output_dir,
                                         verbose_level):
        # pd.set_option("display.max_columns", None)
        # pd.set_option("display.max_rows", None)
        # pd.set_option("max_colwidth", 100)
        # pd.set_option("display.width", 5000)
        target_name = DPUTargetHelper.get_name(
            self._target.get_devices()[0].get_legacy_dpu_target())
        if verbose_level == 0:
            return
        elif verbose_level == 1:
            d = []
            for node in graph.nodes:
                if node.op.type in [NNDCT_OP.RETURN, NNDCT_OP.INPUT]:
                    continue
                if node.target_device is not None:
                    if node.target_device.get_device_type() == DeviceType.CPU:
                        d.append([
                            node.name, node.op.type,
                            node.target_device.get_filter_message()
                        ])
            if d:
                # df = pd.DataFrame(d, columns=["Node Name", "Op Type", "Hardware Constraints"])
                NndctScreenLogger().info(
                    f"The operators assigned to the CPU are as follows(see more details in '{os.path.join(output_dir, f'inspect_{target_name}.txt')}'):"
                )
                # print(df)
                print(
                    tabulate(d,
                             headers=[
                                 "node name", "op Type", "hardware constraints"
                             ]))
            else:
                NndctScreenLogger().info(
                    f"All the operators are assigned to the DPU(see more details in '{os.path.join(output_dir, f'inspect_{target_name}.txt')}')"
                )

        elif verbose_level == 2:
            d = []
            for node in graph.nodes:
                if node.op.type in [NNDCT_OP.RETURN, NNDCT_OP.INPUT]:
                    continue
                if node.target_device is not None:
                    d.append([
                        node.name, node.op.type,
                        node.target_device.get_device_type().value
                    ])
            # df = pd.DataFrame(d, columns=["Node_Name", "Op_Type", "Assgined_Device"])
            NndctScreenLogger().info(
                f"Operator device allocation table(see more details in '{os.path.join(output_dir, 'inspect.txt')}'):"
            )
            # print(df)
            print(
                tabulate(d,
                         headers=["node name", "op type", "assgined device"]))
Esempio n. 12
0
def dump_xmodel(output_dir="quantize_result", deploy_check=False):
    r"""converts module to xmodel for deployment
  compilation only works when quantm model = 2.
  The xmodel and some checking data will be generated under work dir.

  Args:
    deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification

  Returns:
    None
  """
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer and quantizer.quant_mode > 1:
        nndct_utils.create_work_dir(output_dir)

        # compile to xmodel

        compiler = CompilerFactory.get_compiler("xmodel")

        NndctScreenLogger().info("=>Converting to xmodel ...")
        deploy_graphs = get_deploy_graph_list(quantizer.quant_model,
                                              quantizer.Nndctgraph)
        depoly_infos = compiler.get_deloy_graph_infos(quantizer, deploy_graphs)

        for depoly_info in depoly_infos:
            try:
                compiler.do_compile(depoly_info.dev_graph,
                                    quant_config_info=depoly_info.quant_info,
                                    output_file_name=os.path.join(
                                        output_dir,
                                        depoly_info.dev_graph.name))

            except AddXopError as e:
                NndctScreenLogger().error(
                    f"Failed convert graph '{depoly_info.dev_graph.name}' to xmodel({str(e)})."
                )

            # dump data for accuracy check
            if deploy_check:
                NndctScreenLogger().info(
                    f"=>Dumping '{depoly_info.dev_graph.name}'' checking data..."
                )
                checker = DeployChecker(output_dir_name=output_dir)
                checker.update_dump_folder(f"{depoly_info.dev_graph.name}")
                checker.dump_nodes_output(
                    depoly_info.dev_graph,
                    depoly_info.quant_info,
                    round_method=quantizer.quant_opt['round_method'],
                    select_batch=False)

                NndctScreenLogger().info(
                    f"=>Finsh dumping data.({checker.dump_folder})")

        set_outputs_recorder_status(quantizer.quant_model, False)
Esempio n. 13
0
    def load_param(self):
        if self.quant_mode == 2:
            NndctScreenLogger().info(
                f"=>Loading quant model parameters.({self.param_file})")
            path = pathlib.Path(self.param_file)
            if not (path.exists()
                    and path.is_file()) or not self.fast_finetuned:
                NndctScreenLogger().error(
                    f"Fast finetuned parameter file does not exist. \
Please check calibration with fast finetune is done or not.")
                exit(2)
            self.quant_model.load_state_dict(torch.load(self.param_file))
            self._finetuned_para_loaded = True
Esempio n. 14
0
    def __init__(self,
                 graph,
                 model_type,
                 bitw,
                 bita,
                 lstm,
                 mix_bit,
                 custom_quant_ops=None):
        super().__init__(graph, model_type)
        self._QuantGroups = None
        if custom_quant_ops:
            for op in custom_quant_ops:
                if op not in self.QUANTIZABLE_OPS:
                    self.QUANTIZABLE_OPS.append(op)
                    NndctScreenLogger().info(
                        f"Convert `{op}` to quantizable op.")

        self.group_graph()
        quant_strategy = create_quant_strategy(bitw, bita, lstm, mix_bit)
        self._quant_info = quant_strategy.create_quant_config(self)
        if NndctOption.nndct_stat.value > 0:
            print('Quantization groups:')
            pp.pprint(self._QuantGroups)
            pp.pprint(self._quant_info)

        # check groups, only permit one quantizable node in one group in quant part
        ignored_list = [NNDCT_OP.SHAPE]
        for k, v in self._QuantGroups.items():
            if len(v) == 1:
                if len(self.Nndctgraph.parents(k)) == 0:
                    break
            findQuantizableNode = False
            isIgnored = False
            type_list = self.LSTM_QUANTIZABLE_OPS if lstm else self.QUANTIZABLE_OPS
            for n in v:
                node = self.get_Nndctnode(node_name=n)
                if node.op.type in type_list:
                    if findQuantizableNode:
                        NndctScreenLogger().warning(
                            f'Multiple quantizable node is found in group:')
                        NndctScreenLogger().warning(f'{v}')
                    else:
                        findQuantizableNode = True
                elif node.op.type in ignored_list:
                    isIgnored = True
            if not findQuantizableNode and not isIgnored:
                NndctScreenLogger().warning(
                    f'No quantizable node is found in group, confirm no numerical calculation in the nodes:'
                )
                NndctScreenLogger().warning(f'{v}')
Esempio n. 15
0
  def finetune_v2(self, run_fn, run_args):
    # check status
    if self.quantizer.quant_mode == 2:
      NndctScreenLogger().warning(f"Finetune function will be ignored in test mode!")
      return    
    
    # parameter finetuning
   
    with AdaQuant(processor=self):
      # calibration to get a set of quantization steps
      NndctScreenLogger().info(f"=>Preparing data for fast finetuning module parameters ...")   
      with NoQuant():
        net_inputs, net_outputs = self.cache_net_inpouts(run_fn, run_args)
      
      NndctScreenLogger().info(f"=>Find initial quantization steps for fast finetuning...")
      self.calibrate(run_fn, run_args)
      
      NndctScreenLogger().info(f"=>Fast finetuning module parameters for better quantization accuracy...")
      self.setup_test()    
      device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)  
      
      intial_net_loss = self.calc_net_loss(net_inputs, net_outputs, device)
      
      layer_act_pair = self.collect_layer_act_pair()  
      
      finetune_group = []
      for qmod, fmod in zip(self._quant_model.modules(), self._float_model.modules()):
        if hasattr(qmod, "node"):
          if (self.quantizer.configer.is_node_quantizable(qmod.node, False) and 
            len(qmod.node.op.params) > 0):     
            finetune_group.append([qmod.node, fmod])

      net_loss = intial_net_loss
      for idx, (qnode, fmod) in tqdm(enumerate(finetune_group), total=len(finetune_group)):
        is_cached = self.is_cached(qnode, len(net_inputs[0]))
        if (is_cached and idx < len(finetune_group) / 2) or (not is_cached):
          need_cache = False
        else:
          need_cache = True
                  
        net_loss = self.optimize_layer_v2(qnode, fmod, layer_act_pair, net_inputs, net_outputs, net_loss, device, need_cache)
      print(f"%%%%%%%%%%%%%%%%% final opt net loss:{net_loss.avg}")

        # print(f"{qnode.name}({need_cache}):{net_loss}")
            
    NndctScreenLogger().info(f"=>Export fast finetuned parameters ...")
    # export finetuned parameters
    self.quantizer.export_param()
Esempio n. 16
0
    def export_quant_config(self, export_file=None, adjust_pos=True):
        if NndctOption.nndct_param_corr.value > 0:
            if self.quant_mode == 1:
                # gather bias correction, how to get nn module objec?
                for node in self.Nndctgraph.nodes:
                    if node.op.type in [
                            NNDCT_OP.CONV1D, NNDCT_OP.CONV2D,
                            NNDCT_OP.CONVTRANSPOSE2D,
                            NNDCT_OP.DEPTHWISE_CONV2D, NNDCT_OP.DENSE,
                            NNDCT_OP.DEPTHWISE_CONVTRANSPOSE2D
                    ]:
                        if node.module.bias is not None:
                            self.bias_corr[node.name] = node.module.bias_corr()

                # export bias correction
                torch.save(self.bias_corr, self.bias_corr_file)
                self.bias_corrected = True

        # export quant steps
        file_name = export_file or self.export_file
        if isinstance(file_name, str):
            NndctScreenLogger().info(f"=>Exporting quant config.({file_name})")
            if adjust_pos:
                self.organize_quant_pos()
            with open(file_name, 'w') as f:
                f.write(nndct_utils.to_jsonstr(self.quant_config))
Esempio n. 17
0
    def __init__(self,
                 quant_mode: int,
                 output_dir: str,
                 quant_config: Dict[str, Union[str, int, bool]],
                 is_lstm=False):
        super().__init__(quant_mode, output_dir, quant_config, is_lstm)
        self._quant_model = None
        self._bias_corr_loaded = False
        self._finetuned_para_loaded = False
        if NndctOption.nndct_param_corr.value > 0:
            if self.quant_mode == 2:
                path = pathlib.Path(self.bias_corr_file)
                if not (path.exists() and path.is_file()):
                    NndctScreenLogger().error(
                        f"Bias correction result file does not exist. \
Please check calibration with bias correction is done or not.")
                    exit(2)
                self.bias_corr = torch.load(self.bias_corr_file)
                self._bias_corr_loaded = True
        self.exporting = False
        self.inplace = True
        self.output_dir = output_dir

        mix_bit = quant_config['mix_bit']
        if is_lstm:
            self.quant_strategy = LstmQstrategy(quant_config)
        else:
            if mix_bit:
                self.quant_strategy = TQTStrategy(quant_config)
            else:
                self.quant_strategy = DPUQstrategy(quant_config)
Esempio n. 18
0
                def forward(self, *args, **kwargs):

                    inputs = []

                    def collect_inputs(inputs, value):
                        if isinstance(value, torch.Tensor):
                            inputs.append(value)
                        elif isinstance(value, (tuple, list)):
                            for i in value:
                                collect_inputs(inputs, i)

                    for k, v in kwargs.items():
                        collect_inputs(inputs, v)

                    inptus, _ = process_inputs_and_params(self.node,
                                                          self.quantizer,
                                                          inputs=inputs)
                    try:
                        output = caller(*args, **kwargs)
                    except TypeError as e:
                        NndctScreenLogger().warning_once(
                            f"{str(e)}. The arguments of function will convert to positional arguments."
                        )
                        inputs = list(args) + list(kwargs.values())
                        output = caller(*inputs)

                    [output] = post_quant_process(self.node, [output])

                    return output
Esempio n. 19
0
    def get_fp_and_quantize(self,
                            input_tensor,
                            fp_name,
                            fp_tensor,
                            fp_stat_tensor=None,
                            node=None,
                            tensor_type='output'):  #'input'|'output'|'param'
        # Forward the graph but not quantize parameter and activation
        if (self.quant_mode < 1 or NndctOption.nndct_quant_off.value):
            return input_tensor

        if input_tensor.dtype != tf.float32 and input_tensor.dtype != tf.float64:
            NndctScreenLogger().warning_once(
                f'The tensor type of  {fp_name} is {str(input_tensor.dtype)}. Only support float32/double quantization.'
            )
            return input_tensor

        # get fixed position
        mth = 3
        if tensor_type != 'param':
            mth = 4
        bnfp = self.get_bnfp(fp_name, False, tensor_type)
        bw = bnfp[0]
        if self.quant_mode == 1:
            # must be in eager mode
            #print('---- Calculating fix pos of {}'.format(fp_name), flush=True)
            fp_tensor.assign(
                diffs_fix_pos(input=input_tensor,
                              bit_width=bw,
                              range=5,
                              method=mth))
            bnfp[1] = (int)(fp_tensor.numpy())
            # limit max fix pos to 12
            bnfp[1] = min(12, bnfp[1])
            # record fix pos of input/output by fp_stat_tensor
            if tensor_type != 'param':
                #fp_tensor.assign(stat_act_pos(fp_tensor,
                #                              fp_stat_tensor))
                self.fp_history[tensor_type][fp_name].append(bnfp[1])
                data = np.array(self.fp_history[tensor_type][fp_name])
                bnfp[1] = stats.mode(data)[0][0]
                bnfp[1] = bnfp[1].astype(np.int32).tolist()
                fp_tensor.assign(bnfp[1])
            bnfp = self.set_bnfp(fp_name, bnfp, tensor_type)

        if self.quant_mode > 0:
            # do quantization for parameter or activation
            tensor = fix_neuron(input_tensor, fp_tensor, bw, method=mth)
            if tensor_type == 'param':
                self.update_param_to_quantized(node, fp_name, tensor.numpy())

            # XXX: Temporary.
            if self._dump_input and tensor_type == 'output' and 'input' in fp_name:
                if fp_name not in self._quantized_input:
                    self._quantized_input[fp_name] = []
                self._quantized_input[fp_name].append([tensor.numpy()])

            return tensor
        else:
            return input_tensor
Esempio n. 20
0
                def forward(self, *args, **kwargs):

                    inputs = []

                    def collect_inputs(inputs, value):
                        if isinstance(value, torch.Tensor):
                            inputs.append(value)
                        elif isinstance(value, (tuple, list)):
                            for i in value:
                                collect_inputs(inputs, i)

                    for _, v in kwargs.items():
                        collect_inputs(inputs, v)

                    inputs = quantize_tensors(inputs,
                                              self.node,
                                              tensor_type='input')
                    try:
                        output = caller(*args, **kwargs)
                        if isinstance(output, torch.Tensor):
                            output = output.clone()
                    except TypeError as e:
                        NndctScreenLogger().warning_once(
                            f"{str(e)}. The arguments of function will convert to positional arguments."
                        )
                        inputs = list(args) + list(kwargs.values())
                        output = caller(*inputs)

                    output = quantize_tensors([output], self.node)[0]

                    return output
Esempio n. 21
0
    def forward(self, input):
        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]
        # check input shape
        if self.node.out_tensors[0].is_complete_tensor(
        ) and self.node.out_tensors[0].ndim == 4:
            # py_utils.blob_to_torch_format(self.node.out_tensors[0])
            if not (self.node.out_tensors[0].shape[1:] == list(
                    input.size())[1:]):
                NndctScreenLogger().warning(
                    f"The shape of input ({input.shape[1:]}) should be the same with that of dummy input ({self.node.out_tensors[0].shape[1:]})"
                )
            # py_utils.blob_to_nndct_format(self.node.out_tensors[0])
        output = qinput

        if (self.node.in_quant_part and NndctOption.nndct_stat.value > 2):
            print('Channel number of input data: {}'.format(output.shape[1]))
            print('Input data histogram: {}'.format(
                output.histc(bins=10).cpu().detach().numpy()))
            print(
                'Network input channel-wise statistic [Min, Max, Mean, Std]:')
            t = output.transpose(0, 1)
            for c in range(t.shape[0]):
                print('[{}, {}, {}, {}]'.format(t[c].min(), t[c].max(),
                                                t[c].mean(), t[c].std()))
                print('histogram: {}'.format(
                    t[c].histc(bins=10).cpu().detach().numpy()))

        if self.node.in_quant_part:
            output = quantize_tensors([output], self.node)[0]

        return output
Esempio n. 22
0
 def inspect(self,
             module: torch.nn.Module,
             input_args: Union[torch.Tensor, Tuple[Any]],
             device: torch.device = torch.device("cuda"),
             output_dir: str = "quantize_result",
             verbose_level: int = 1,
             image_format: Optional[str] = None):
     NndctScreenLogger().info(f"=>Start to inspect model...")
     self._inspector_impl.inspect(module, input_args, device, output_dir,
                                  verbose_level)
     if image_format is not None:
         available_format = ["svg", "png"]
         NndctScreenLogger().check(f"Only support dump svg or png format.",
                                   image_format in available_format)
         self._inspector_impl.export_dot_image_v2(output_dir, image_format)
     NndctScreenLogger().info(f"=>Finish inspecting.")
Esempio n. 23
0
 def __init__(self):
     if not _enable_plot:
         NndctScreenLogger().warning(
             "Please install matplotlib for visualization.")
         sys.exit(1)
     self._dir = '.nndct_quant_stat_figures'
     io.create_work_dir(self._dir)
Esempio n. 24
0
 def get_op_output_shape(self, name: str) -> List[int]:
     op = self.get_op_by_name(name)
     if op:
         return op.get_output_tensor().dims
     else:
         NndctScreenLogger().warning(
             "{name} is not in xmodel. Please check it.")
Esempio n. 25
0
    def export_dot_image(self, output_dir, format):
        assert self._graph is not None
        file_name = os.path.join(output_dir, ".".join(["inspect", format]))
        device_type_node_sets = defaultdict(list)
        for node in self._graph.nodes:
            if node.op.type == NNDCT_OP.RETURN:
                continue
            if node.target_device:
                device_type_node_sets[
                    node.target_device.get_device_type()].append(node)
            else:
                raise RuntimeError(
                    f"{node}({node.op.type}) has no target device.")

        device_type_subgraph_node_sets = defaultdict(list)
        boundaries = []
        for device_type, node_set in device_type_node_sets.items():
            subgraph_node_sets, sub_boundaries = self._get_subgraphs_and_output_boundaries(
                node_set, device_type)
            device_type_subgraph_node_sets[device_type] = subgraph_node_sets
            boundaries += sub_boundaries

        dot_graph = self._create_dot_graph(output_dir,
                                           device_type_subgraph_node_sets,
                                           boundaries)

        dot_graph.render(outfile=file_name).replace('\\', '/')
        NndctScreenLogger().info(f"Dot image is generated.({file_name})")
Esempio n. 26
0
def insert_scale_after_conv2d(module: torch.nn.Module):
    def _insert_func(op):
        insert_name = None
        conv2d_cnt = 0
        find_conv2d = False
        for op_name, c_op in op.named_children():
            if find_conv2d:
                conv2d_cnt = conv2d_cnt + 1
            if isinstance(c_op, torch.nn.Conv2d) or isinstance(
                    c_op, torch.nn.ConvTranspose2d):
                find_conv2d = True
                insert_name = op_name
            elif isinstance(c_op, torch.nn.BatchNorm2d) and (find_conv2d
                                                             == True):
                insert_name = op_name

            if conv2d_cnt == 1:
                op._modules[insert_name] = torch.nn.Sequential(
                    op._modules[insert_name],
                    channel_scale.ChannelScale(channel_scale=1.0))
                find_conv2d = False
                conv2d_cnt = 0
        if find_conv2d:
            op._modules[insert_name] = torch.nn.Sequential(
                op._modules[insert_name],
                channel_scale.ChannelScale(channel_scale=1.0))

    if any([(isinstance(submodule, torch.nn.Conv2d)
             or isinstance(submodule, torch.nn.ConvTranspose2d))
            for submodule in module.modules()]):
        module.apply(_insert_func)
        NndctScreenLogger().warning(
            f"ChannelScale has been inserted after Conv2d.")
Esempio n. 27
0
def create_quant_algo(tensor_type, quant_strategy_info, node):
  algo_config = quant_strategy_info
  quant_algo = None
  
  granularity = algo_config.get("granularity")
  if granularity == "per_channel":
    if (int(torch.__version__.split('.')[1]) < 5) and (int(torch.__version__.split('.')[0]) <= 1):
      NndctScreenLogger().error()(f"Torch should uptate to 1.5.0 or higher version if per_channel quantization")
      raise
    op_type = node.op.type
    axis = None
    #group = node.node_attr[node.op.AttrName.GROUP]
    if tensor_type != "weights":
      raise ValueError("Only support per_channel quantization for weights for now")
    if op_type in _CONV_LINEAR_TYPES:
      axis = 0
    elif op_type in _CONV_TRANSPOSE_TYPES:
      axis = 1
    quant_algo = PerChannelQuantAlgo(algo_config, axis)
  elif granularity == "per_tensor":
    method = algo_config.get("method")
    if method == "maxmin":
      quant_algo = MaxMinQuantPerTensorAlgo(algo_config)
    elif method == "percentile":
      quant_algo = PercentileQuantPerTensorAlgo(algo_config)
    elif method == "mse":
      quant_algo = MSEQuantPerTensorAlgo(algo_config)
    elif method == "entropy":
      quant_algo = EntropyQuantPerTensorAlgo(algo_config)
    
  return quant_algo
Esempio n. 28
0
    def __init__(self,
                 quant_mode: int,
                 output_dir: str,
                 quant_config,
                 is_lstm=False):
        super().__init__(quant_mode, output_dir, quant_config, is_lstm)
        self._quant_model = None
        self._bias_corr_loaded = False
        if NndctOption.nndct_param_corr.value > 0:
            if self.quant_mode == 2:
                path = pathlib.Path(self.bias_corr_file)
                if not (path.exists() and path.is_file()):
                    NndctScreenLogger().error(
                        f"Bias correction result file does not exist. \
Please check calibration with bias correction is done or not.")
                    exit(2)
                self.bias_corr = torch.load(self.bias_corr_file)
                self._bias_corr_loaded = True

        self.exporting = False
        self.inplace = True
        self.serial = True
        #self._fast_finetuned = False
        self._finetuned_para_loaded = False
        self.output_dir = output_dir

        if NndctOption.nndct_tensorrt_strategy.value:
            self.quant_strategy = TensorRTCGQStrategy(quant_config)
        else:
            self.quant_strategy = NndctCGQstrategy(quant_config)
Esempio n. 29
0
        def _graph2module(op):
            node = getattr(op, "node", None)
            for param_type, tensor in node.op.params.items():
                py_tensor_util.param_to_torch_format(tensor)

                data = np.copy(tensor.data)
                if node.op.type in [
                        NNDCT_OP.CONVTRANSPOSE2D, NNDCT_OP.CONVTRANSPOSE3D
                ] and param_type == node.op.ParamName.WEIGHTS:
                    # data = data.transpose(1, 0, 2, 3)
                    data = data.swapaxes(0, 1)
                    data = np.ascontiguousarray(data)

                if node.op.type in [
                        NNDCT_OP.DEPTHWISE_CONV2D, NNDCT_OP.DEPTHWISE_CONV3D
                ] and param_type == node.op.ParamName.WEIGHTS:
                    out_channels = node.node_config("out_channels")
                    kernel_size = node.node_config("kernel_size")
                    data = data.reshape((out_channels, 1, *kernel_size))

                if node.op.type in [
                        NNDCT_OP.DEPTHWISE_CONVTRANSPOSE2D,
                        NNDCT_OP.DEPTHWISE_CONVTRANSPOSE3D
                ] and param_type == node.op.ParamName.WEIGHTS:
                    in_channels = node.node_config("in_channels")
                    kernel_size = node.node_config("kernel_size")
                    data = data.reshape((1, in_channels, *kernel_size))
                    data = data.swapaxes(0, 1)
                    data = np.ascontiguousarray(data)

                torch_tensor = torch.from_numpy(data)
                param_name = cls._parameter_map.get(param_type,
                                                    param_type.value)
                if node.has_bound_params():
                    if hasattr(op, param_name):
                        if isinstance(getattr(op, param_name), torch.Tensor):
                            torch_tensor = torch_tensor.to(
                                getattr(op, param_name))
                        else:
                            torch_tensor = torch_tensor.to(
                                getattr(op, param_name).data)

                        if param_name in op._buffers:
                            op._buffers[param_name] = torch_tensor
                        else:
                            op._parameters[param_name] = torch.nn.Parameter(
                                torch_tensor)
                    else:
                        NndctScreenLogger().warning(
                            f"new parameter: '{param_name}' is registered in {node.name}"
                        )
                        op.register_parameter(param_name,
                                              torch.nn.Parameter(torch_tensor))
                else:
                    torch_tensor = torch_tensor.to(
                        device=GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE))
                    module.register_parameter(param_name,
                                              torch.nn.Parameter(torch_tensor))

                py_tensor_util.param_to_nndct_format(tensor)
Esempio n. 30
0
    def _init_quant_env():
        nonlocal quant_mode
        if NndctOption.nndct_quant_mode.value > 0:
            quant_mode = NndctOption.nndct_quant_mode.value

        if quant_mode == 1:
            NndctScreenLogger().info(
                f"Quantization calibration process start up...")
        elif quant_mode == 2:
            NndctScreenLogger().info(f"Quantization test process start up...")

        quantizer = TORCHQuantizer(quant_mode, output_dir, bitwidth_w,
                                   bitwidth_a)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, quant_mode)
        return quantizer, quant_mode