Esempio n. 1
0
def nndct_warn_print(string):
  if True == GLOBAL_MAP.get_ele(NNDCT_KEYS.WARN_FLAG):
    logger = GLOBAL_MAP.get_ele(NNDCT_KEYS.LOGGER)
    if logger:
      logger.warning("[NNDCT_WARN] {}".format(string))
    else:
      print("[NNDCT_WARN] {}".format(string))
Esempio n. 2
0
 def _do_map(output_name, node_name):
     if not output_name == node_name:
         if not GLOBAL_MAP.get_ele(NNDCT_KEYS.OUTPUT_TO_NODE_MAP):
             GLOBAL_MAP.set_map(NNDCT_KEYS.OUTPUT_TO_NODE_MAP, {})
         if not GLOBAL_MAP.get_ele(NNDCT_KEYS.NODE_TO_OUTPUT_MAP):
             GLOBAL_MAP.set_map(NNDCT_KEYS.NODE_TO_OUTPUT_MAP, {})
         #map output to node
         output_to_node_map = GLOBAL_MAP.get_ele(
             NNDCT_KEYS.OUTPUT_TO_NODE_MAP)
         if not output_name in output_to_node_map:
             nndct_debug_print(
                 "<map_output_and_node> map out {} and node{}".format(
                     output_name, node_name),
                 level=NNDCT_DEBUG_LVL.BUILD_GRAPH)
             output_to_node_map[output_name] = node_name
         else:
             assert output_to_node_map[
                 output_name] == node_name, "restored node name for output_name {} is {}, meet new node name {}".format(
                     output_name, output_to_node_map[output_name],
                     node_name)
         #add output to list keyed by node_name
         node_to_output_map = GLOBAL_MAP.get_ele(
             NNDCT_KEYS.NODE_TO_OUTPUT_MAP)
         if not node_name in node_to_output_map:
             node_to_output_map[node_name] = [output_name]
         else:
             node_to_output_map[node_name].append(output_name)
Esempio n. 3
0
def nndct_error_print(string):
  if True == GLOBAL_MAP.get_ele(NNDCT_KEYS.ERROR_FLAG):
    logger = GLOBAL_MAP.get_ele(NNDCT_KEYS.LOGGER)
    if logger:
      logger.error("[NNDCT_ERROR] {}".format(string))
    else:
      print("[NNDCT_ERROR] {}".format(string))
    sys.exit(1)
Esempio n. 4
0
def nndct_debug_print(string, title='', level=1):
  if True == GLOBAL_MAP.get_ele(
      NNDCT_KEYS.DEBUG_FLAG) and level <= GLOBAL_MAP.get_ele(
          NNDCT_KEYS.VERBOSE_LEVEL):
    logger = GLOBAL_MAP.get_ele(NNDCT_KEYS.LOGGER)
    if title == 'Start':
      string = "\n********************* <{} : {}> *********************".format(
          title, string)
    elif title == 'End':
      string = "\n********************* <{} : {}> *********************\n".format(
          title, string)
    if logger:
      logger.debug("[NNDCT_DEBUG_Lv_{}] {}".format(level, string))
    else:
      print("[NNDCT_DEBUG_Lv_{}] {}".format(level, string))
Esempio n. 5
0
        def _graph2module(op):
            node = getattr(op, "node", None)
            for param_type, tensor in node.op.params.items():
                py_tensor_util.param_to_torch_format(tensor)

                data = np.copy(tensor.data)
                if node.op.type in [
                        NNDCT_OP.CONVTRANSPOSE2D, NNDCT_OP.CONVTRANSPOSE3D
                ] and param_type == node.op.ParamName.WEIGHTS:
                    # data = data.transpose(1, 0, 2, 3)
                    data = data.swapaxes(0, 1)
                    data = np.ascontiguousarray(data)

                if node.op.type in [
                        NNDCT_OP.DEPTHWISE_CONV2D, NNDCT_OP.DEPTHWISE_CONV3D
                ] and param_type == node.op.ParamName.WEIGHTS:
                    out_channels = node.node_config("out_channels")
                    kernel_size = node.node_config("kernel_size")
                    data = data.reshape((out_channels, 1, *kernel_size))

                if node.op.type in [
                        NNDCT_OP.DEPTHWISE_CONVTRANSPOSE2D,
                        NNDCT_OP.DEPTHWISE_CONVTRANSPOSE3D
                ] and param_type == node.op.ParamName.WEIGHTS:
                    in_channels = node.node_config("in_channels")
                    kernel_size = node.node_config("kernel_size")
                    data = data.reshape((1, in_channels, *kernel_size))
                    data = data.swapaxes(0, 1)
                    data = np.ascontiguousarray(data)

                torch_tensor = torch.from_numpy(data)
                param_name = cls._parameter_map.get(param_type,
                                                    param_type.value)
                if node.has_bound_params():
                    if hasattr(op, param_name):
                        if isinstance(getattr(op, param_name), torch.Tensor):
                            torch_tensor = torch_tensor.to(
                                getattr(op, param_name))
                        else:
                            torch_tensor = torch_tensor.to(
                                getattr(op, param_name).data)

                        if param_name in op._buffers:
                            op._buffers[param_name] = torch_tensor
                        else:
                            op._parameters[param_name] = torch.nn.Parameter(
                                torch_tensor)
                    else:
                        NndctScreenLogger().warning(
                            f"new parameter: '{param_name}' is registered in {node.name}"
                        )
                        op.register_parameter(param_name,
                                              torch.nn.Parameter(torch_tensor))
                else:
                    torch_tensor = torch_tensor.to(
                        device=GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE))
                    module.register_parameter(param_name,
                                              torch.nn.Parameter(torch_tensor))

                py_tensor_util.param_to_nndct_format(tensor)
Esempio n. 6
0
    def forward(self, input):

        [input], _ = process_inputs_and_params(self.node,
                                               self.quantizer,
                                               inputs=[input])

        if NndctOption.nndct_quant_off.value or NndctOption.nndct_cv_app.value:
            output = super().forward(input)
            # quantize output
            [output] = post_quant_process(self.node, [output])
        elif self.quant_mode > 0:
            output = torch.empty_like(input)
            if NndctOption.nndct_tanh_sigmoid_sim.value > 0:
                NndctSigmoidSimulation(input, output)
                [output] = post_quant_process(self.node, [output])
            else:
                input_name = self.node.in_nodes[0]
                fragpos = self.quantizer.get_bnfp(input_name, False)[1]
                quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
                Ttable = SIGMOID_TABLE.table.to(quant_device)
                output = output.to(quant_device)
                NndctSigmoidTableLookup(input, Ttable, output, fragpos)
        else:
            output = super().forward(input)

        return output
Esempio n. 7
0
 def __init__(self, file_name=None):
     file_name = file_name or GLOBAL_MAP.get_ele(
         NNDCT_KEYS.MODIFIER).nndct_prefix + '.py'
     Exception.__init__(
         self,
         "The rebuilt graph mismatch with original graph, please manually modify '{}' and run again"
         .format(file_name))
Esempio n. 8
0
  def set_op_class_type(self, force_to_primitive: bool, schema: "Schema", class_type=None):
    if class_type is not None:
      self.op_class_type = TorchOpClassType.CUSTOM_FUNCTION
    elif schema is not None:
      schema2torchop = GLOBAL_MAP.get_ele(NNDCT_KEYS.TORCH_SCHEMA_OP_TABLE)
      schema_handler = SchemaHelper(schema)
      torchop = schema2torchop[schema_handler.toString()]
      self.op_class_type = torchop.op_class_type
    else:
      if force_to_primitive:
        self.op_class_type = TorchOpClassType.PRIMITIVE
      else:
        if self.op_name in dir(torch.nn):
          self.op_class_type = TorchOpClassType.NN_MODULE
          self.op_name = '.'.join(['torch', 'nn', self.op_name])

        elif self.op_name in dir(torch.nn.functional):
          self.op_class_type = TorchOpClassType.NN_FUNCTION
          self.op_name = '.'.join(['torch', 'nn', 'functional', self.op_name])

        elif self.op_name in dir(torch) and isinstance(getattr(torch, self.op_name), Callable):
          self.op_class_type = TorchOpClassType.TORCH_FUNCTION
          self.op_name = '.'.join(['torch', self.op_name])

        elif self.op_name in dir(torch.Tensor):
          self.op_class_type = TorchOpClassType.TENSOR

        else:
          self.op_class_type = TorchOpClassType.UNKNOWN
Esempio n. 9
0
 def custom_op(self, node, *args):
     node2caller = GLOBAL_MAP.get_ele(NNDCT_KEYS.NODE_CALLER_MAP)
     if node2caller is None:
         node2caller: Dict[str, Callable] = {}
         GLOBAL_MAP.set_map(NNDCT_KEYS.NODE_CALLER_MAP, node2caller)
     node2caller[node.name] = node.caller
     op = TorchCustomOperation(node.raw_kind, node.raw_kind)
     for i, arg in enumerate(args):
         op.set_config(str(i), arg)
     attrs = GLOBAL_MAP.get_ele(NNDCT_KEYS.CUSTOM_OP_ATTRS_MAP).get(
         node.raw_kind, None)
     if attrs:
         attr_vals = args[len(args) - len(attrs):]
         for name, val in zip(attrs, attr_vals):
             op.set_attr_by_name(name, val)
     return op
Esempio n. 10
0
 def wrapper(*args, **kwargs):
   error_flag = GLOBAL_MAP.get_ele(NNDCT_KEYS.ERROR_FLAG)
   if error_flag == True:
     print("[NNDCT_ERROR]", end='')
   return func(*args, **kwargs)
   if error_flag == True:
     exit(1)
Esempio n. 11
0
 def export_quant_config(self):
   """
   `export bitwidth and fixpoint info of blobs and parameters under work dir`
   """
   quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
   if quantizer and quantizer.quant_mode == 1:
     quantizer.export_quant_config()
Esempio n. 12
0
  def dump_xmodel(self, deploy_check=False):
    """
    `dump xmodel for LSTM cell`
    """
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer and quantizer.quant_mode > 1:
      compiler = CompilerFactory.get_compiler("xmodel")
      xmodel_dir = os.path.join(self._export_folder, "xmodel")
      create_work_dir(xmodel_dir)
      for info in self._modules_info.values():
        for l_num, layer_graph in enumerate(info["layers_graph"]):
          for lstm_direction, graph in layer_graph.items():
            try:
              compiler.do_compile(
                  nndct_graph=graph,
                  quant_config_info=quantizer.quant_config,
                  output_file_name=os.path.join(xmodel_dir, graph.name),
                  graph_attr_kwargs={"direction": lstm_direction})
            except Exception as e:
              print(
                  f"[NNDCT_ERROR]:failed convert nndct graph to xmodel({str(e)})."
              )

            else:
              print("[NNDCT_NOTE]:Successfully convert nndct graph to xmodel!")

      if deploy_check:
        print("[NNDCT_NOTE]: Dumping checking data...")
        checker = DeployChecker(
            output_dir_name=self._export_folder, data_format="txt")     
        
        # get timestep output
        for name, info in self._layers_info.items():
          cell = info["cell_module"]
          layer = info["layer_module"]
          graph = info["graph"]
          if layer.input is None:
            warnings.warn(
                f"[NNDCT_WARNING]: Provide inputs for '{name}' when do deploy checking",
                RuntimeWarning)
            continue
          
          set_outputs_recorder_status(cell, True)
          layer(layer.input, layer.initial_state, layer.batch_lengths)

          for timestep in range(layer.input.size()[1]):
            enable_dump_weight = True if timestep == 0 else False
            update_nndct_blob_data(cell, graph, timestep)
            checker.update_dump_folder(f"{graph.name}/frame_{timestep}")
            checker.dump_nodes_output(
                graph,
                quantizer.quant_config,
                round_method=quantizer.quant_opt['round_method'],
                enable_dump_weight=enable_dump_weight)
          
          set_outputs_recorder_status(cell, False)

        print("[NNDCT_NOTE]: Finsh dumping data.")
Esempio n. 13
0
def dump_xmodel(output_dir="quantize_result", deploy_check=False):
    r"""converts module to xmodel for deployment
  compilation only works when quantm model = 2.
  The xmodel and some checking data will be generated under work dir.

  Args:
    deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification

  Returns:
    None
  """
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer and quantizer.quant_mode > 1:
        nndct_utils.create_work_dir(output_dir)

        # compile to xmodel

        compiler = CompilerFactory.get_compiler("xmodel")

        NndctScreenLogger().info("=>Converting to xmodel ...")
        deploy_graphs = get_deploy_graph_list(quantizer.quant_model,
                                              quantizer.Nndctgraph)
        depoly_infos = compiler.get_deloy_graph_infos(quantizer, deploy_graphs)

        for depoly_info in depoly_infos:
            try:
                compiler.do_compile(depoly_info.dev_graph,
                                    quant_config_info=depoly_info.quant_info,
                                    output_file_name=os.path.join(
                                        output_dir,
                                        depoly_info.dev_graph.name))

            except AddXopError as e:
                NndctScreenLogger().error(
                    f"Failed convert graph '{depoly_info.dev_graph.name}' to xmodel({str(e)})."
                )

            # dump data for accuracy check
            if deploy_check:
                NndctScreenLogger().info(
                    f"=>Dumping '{depoly_info.dev_graph.name}'' checking data..."
                )
                checker = DeployChecker(output_dir_name=output_dir)
                checker.update_dump_folder(f"{depoly_info.dev_graph.name}")
                checker.dump_nodes_output(
                    depoly_info.dev_graph,
                    depoly_info.quant_info,
                    round_method=quantizer.quant_opt['round_method'],
                    select_batch=False)

                NndctScreenLogger().info(
                    f"=>Finsh dumping data.({checker.dump_folder})")

        set_outputs_recorder_status(quantizer.quant_model, False)
Esempio n. 14
0
 def calib_global_param(self):
     quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
     for tensor_type, algo_dict in self._QuantAlgo.items():
         for name, algo in algo_dict.items():
             if not algo.statistic_local:
                 q_config = self.get_quant_config(name, False, tensor_type)
                 if q_config[0] < 32:
                     algo.calib_global_statis(quant_device)
                     q_config[1], q_config[2], q_config[
                         3] = algo.scale, algo.zero_point, algo.float_max
                 self.set_quant_config(name, q_config, tensor_type)
Esempio n. 15
0
    def default(self, node, *args):
        schema2torchop = GLOBAL_MAP.get_ele(NNDCT_KEYS.TORCH_SCHEMA_OP_TABLE)
        schema_handler = SchemaHelper(node.schema)
        torchop = schema2torchop.get(schema_handler.toString(), None)
        if torchop is None:
            op = TorchUnknownOperation(node.raw_kind)
            return op
        node2caller = GLOBAL_MAP.get_ele(NNDCT_KEYS.NODE_CALLER_MAP)
        if node2caller is None:
            node2caller: Dict[str, Callable] = {}
            GLOBAL_MAP.set_map(NNDCT_KEYS.NODE_CALLER_MAP, node2caller)
        node2caller[node.name] = torchop.caller
        op = TorchBaseOperation(schema_handler.op_name,
                                torchop.name,
                                schema=node.schema)
        # op.set_caller(torchop.caller)
        assert len(args) == len(schema_handler.get_arguments())
        if len(args) == 1:
            return op
        arg_name_convertor = {"self": "input"}
        for inp, arg in zip(args, schema_handler.get_arguments()):
            arg_name = schema_handler.arg_name(arg)
            if torchop.op_class_type == TorchOpClassType.TENSOR and arg_name == "self":
                continue
            if arg_name in ["layout", "memory_format", "pin_memory"]:
                continue
            config_name = arg_name_convertor.get(arg_name, arg_name)
            if convert_type_str(schema_handler.arg_type(arg)).replace(
                    "?", "") == "bool":
                inp = bool(inp) if inp is not None else inp
            if convert_type_str(schema_handler.arg_type(arg)).replace(
                    "?", "") == "str":
                inp = f"'{inp}'" if inp is not None else inp

            if arg_name == "device":
                inp = f"'{self._device_type}'"
            if arg_name == "dtype":
                inp = scalar_type_to_pytorch_type[
                    inp] if inp is not None else inp
            op.set_config(config_name, inp)
        return op
Esempio n. 16
0
    def do_quantize(self, blob, name, node=None, tensor_type='input'):
        # forward quant graph but not quantize parameter and activation
        if NndctOption.nndct_quant_off.value:
            return blob

        blob_save = blob
        if isinstance(blob.values, torch.Tensor):
            blob = blob.values

        quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        if blob.device.type != quant_device.type:
            raise TypeError(
                "Device of quantizer is {}, device of model and data should match device of quantizer"
                .format(quant_device.type))

        if (NndctOption.nndct_quant_opt.value
                and NndctOption.nndct_logging_level.value > 0):
            quant_data = nndct_quant.QuantizeData(name,
                                                  blob.cpu().detach().numpy())
        # quantize the tensor
        bnfp = self.get_bnfp(name, True, tensor_type)
        #print('---- quant %s with 1/step = %g' % (name, bnfp[1]))
        # hardware cut method
        mth = 4 if self.lstm else 2
        if tensor_type == 'param':
            mth = 3

        res = py_nndct.nn.NndctFixNeuron(blob,
                                         blob,
                                         maxamp=[bnfp[0], bnfp[1]],
                                         method=mth)

        if (NndctOption.nndct_quant_opt.value
                and NndctOption.nndct_logging_level.value > 0):
            global global_snr_inv
            quant_efficiency, sqnr = quant_data.quant_efficiency(
                blob.cpu().detach().numpy(), 8)
            global_snr_inv += 1 / sqnr
            print(
                f"quant_efficiency={quant_efficiency}, global_snr_inv={global_snr_inv} {quant_data._name}\n"
            )

        # update param to nndct graph
        if tensor_type == 'param':
            self.update_param_to_nndct(node, name, res.cpu().detach().numpy())

        blob = blob_save
        res = blob_save

        return res
Esempio n. 17
0
def node_from_output(output_name, model_type):
    if model_type == 'Nndct':
        return output_name
    if model_type == 'tensorflow':
        output_name = output_name.split(':')[0]
    elif model_type == 'torch':
        if output_name.split('_')[-1] in ['backward', 'forward']:
            output_name = ''.join(output_name.split('_')[:-1])
    else:
        raise KeyError("node_from_output is not available for model type " +
                       str(model_type))
    output_map = GLOBAL_MAP.get_ele(NNDCT_KEYS.OUTPUT_TO_NODE_MAP)
    if output_map and output_name in output_map:
        return output_map[output_name]
    return output_name
Esempio n. 18
0
  def finetune_v2(self, run_fn, run_args):
    # check status
    if self.quantizer.quant_mode == 2:
      NndctScreenLogger().warning(f"Finetune function will be ignored in test mode!")
      return    
    
    # parameter finetuning
   
    with AdaQuant(processor=self):
      # calibration to get a set of quantization steps
      NndctScreenLogger().info(f"=>Preparing data for fast finetuning module parameters ...")   
      with NoQuant():
        net_inputs, net_outputs = self.cache_net_inpouts(run_fn, run_args)
      
      NndctScreenLogger().info(f"=>Find initial quantization steps for fast finetuning...")
      self.calibrate(run_fn, run_args)
      
      NndctScreenLogger().info(f"=>Fast finetuning module parameters for better quantization accuracy...")
      self.setup_test()    
      device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)  
      
      intial_net_loss = self.calc_net_loss(net_inputs, net_outputs, device)
      
      layer_act_pair = self.collect_layer_act_pair()  
      
      finetune_group = []
      for qmod, fmod in zip(self._quant_model.modules(), self._float_model.modules()):
        if hasattr(qmod, "node"):
          if (self.quantizer.configer.is_node_quantizable(qmod.node, False) and 
            len(qmod.node.op.params) > 0):     
            finetune_group.append([qmod.node, fmod])

      net_loss = intial_net_loss
      for idx, (qnode, fmod) in tqdm(enumerate(finetune_group), total=len(finetune_group)):
        is_cached = self.is_cached(qnode, len(net_inputs[0]))
        if (is_cached and idx < len(finetune_group) / 2) or (not is_cached):
          need_cache = False
        else:
          need_cache = True
                  
        net_loss = self.optimize_layer_v2(qnode, fmod, layer_act_pair, net_inputs, net_outputs, net_loss, device, need_cache)
      print(f"%%%%%%%%%%%%%%%%% final opt net loss:{net_loss.avg}")

        # print(f"{qnode.name}({need_cache}):{net_loss}")
            
    NndctScreenLogger().info(f"=>Export fast finetuned parameters ...")
    # export finetuned parameters
    self.quantizer.export_param()
Esempio n. 19
0
  def clone_quant_module(cls, quant_module):
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
  
    if _is_module_hooked(quant_module):
      cls.detach_node_from_module(quant_module)
      cls.hook_module_with_quantizer(quant_module, None)
      new_quant_module = copy.deepcopy(quant_module)
      cls.hook_module_with_node(quant_module, quantizer.graph)
      cls.hook_module_with_quantizer(quant_module, quantizer)
      new_graph = Graph(graph_name=quantizer.graph.name)
      new_graph.clone_from(quantizer.graph)
      cls.hook_module_with_node(new_quant_module, new_graph)
      cls.hook_module_with_quantizer(new_quant_module, quantizer)
    else:
      new_quant_module = copy.deepcopy(quant_module)

    return new_quant_module
Esempio n. 20
0
def build_aten_torch_ops_table():
  op_gathering_fns = (_get_tensor_ops, 
                      _get_nn_functional_ops, 
                      _get_torchscript_builtins, 
                      _get_global_builtins, 
                      _get_math_builtins,
                      )
  schema2torchop = GLOBAL_MAP.get_ele(NNDCT_KEYS.TORCH_SCHEMA_OP_TABLE)
  # schema_lut = GLOBAL_MAP.get_ele(NNDCT_KEYS.SCHEMA_LUT)
  if not schema2torchop:
    
    schema2torchop: Dict[str, TorchOp] = {}
    GLOBAL_MAP.set_map(NNDCT_KEYS.TORCH_SCHEMA_OP_TABLE, schema2torchop)

    # schema_lut: Dict[Tuple(str, int), "Schema"] = {}
    for fn in op_gathering_fns:
      fn()
Esempio n. 21
0
def dump_xmodel(output_dir="quantize_result", deploy_check=False):
    r"""converts module to xmodel for deployment
  compilation only works when quantm model = 2.
  The xmodel and some checking data will be generated under work dir.

  Args:
    deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification

  Returns:
    None
  """
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer and quantizer.quant_mode > 1:
        nndct_utils.create_work_dir(output_dir)

        # compile to xmodel
        try:
            compiler = CompilerFactory.get_compiler("xmodel")
            NndctScreenLogger().info("=>Converting to xmodel ...")
            compiler.do_compile(nndct_graph=quantizer.Nndctgraph,
                                quant_config_info=quantizer.quant_config,
                                output_file_name=os.path.join(
                                    output_dir, quantizer.Nndctgraph.name))

        except AddXopError as e:
            NndctScreenLogger().error(
                f"Failed convert nndct graph to xmodel({str(e)}).")
        else:
            NndctScreenLogger().info(
                f"=>Successfully convert to xmodel.({compiler.xmodel_file})")

        # dump data for accuracy checkvim
        if deploy_check:
            NndctScreenLogger().info("=>Dumping checking data...")
            update_nndct_blob_data(quantizer.quant_model, quantizer.Nndctgraph)
            checker = DeployChecker(output_dir_name=output_dir)
            checker.dump_nodes_output(
                quantizer.Nndctgraph,
                quantizer.quant_config,
                round_method=quantizer.quant_opt['round_method'])

            set_outputs_recorder_status(quantizer.quant_model, False)
            NndctScreenLogger().info(
                f"=>Finsh dumping data.({checker.dump_folder})")
Esempio n. 22
0
    def export_onnx_model(self, output_dir, verbose=False):
        from torch.onnx import register_custom_op_symbolic
        from torch.onnx.symbolic_helper import parse_args
        import sys
        torch_version = torch.__version__.split('.')
        if int(torch_version[0]) == 1 and int(torch_version[1]) < 7:
            NndctScreenLogger().error(
                f'Only supprt exporting onnx model with pytorch 1.7 and later version'
            )
            return

        @parse_args("v", "i", "i", "f", "i", "i", "i", "i")
        def symbolic_fix_neuron(g, input, valmin, valmax, valamp, zero_point,
                                method, device_id, inplace):
            #print(f'{valmax} {valamp} {method} {device_id}')
            if valamp < sys.float_info.min:
                scale = torch.tensor(sys.float_info.max).float(
                )  # Avoid exportor generating double type
            else:
                scale = torch.tensor(
                    1.0 /
                    valamp).float()  # Avoid exportor generating double type
            zero_point = torch.tensor(
                0, dtype=torch.int8)  # ONNX requires zero_point to be tensor
            return g.op("DequantizeLinear",
                        g.op("QuantizeLinear", input, scale, zero_point),
                        scale, zero_point)

        register_custom_op_symbolic("vai::fix_neuron", symbolic_fix_neuron, 9)
        output_file = os.path.join(
            output_dir, f"{self.quantizer.quant_model._get_name()}_int.onnx")
        opset_version = torch.onnx.symbolic_helper._onnx_stable_opsets[-1]
        device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        self.quantizer.reset_status_for_exporting()
        model, input_args = to_device(self.quantizer.quant_model,
                                      self._example_inputs, device)
        torch.onnx.export(self.quantizer.quant_model,
                          input_args,
                          output_file,
                          verbose=verbose,
                          opset_version=opset_version)
Esempio n. 23
0
 def export_traced_torch_script(self, output_dir, verbose=False):
     torch_version = torch.__version__.split('.')
     if int(torch_version[0]) == 1 and int(torch_version[1]) < 7:
         NndctScreenLogger().error(
             f'Only supprt exporting torch script with pytorch 1.7 and later version'
         )
         return
     self.quantizer.reset_status_for_exporting()
     device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
     force_cpu = os.getenv('NNDCT_FORCE_CPU_DUMP')
     if force_cpu is not None:
         device = torch.device('cpu')
         GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device)
     model, input_args = to_device(self.quantizer.quant_model,
                                   self._example_inputs, device)
     script_module = torch.jit.trace(model, input_args, check_trace=False)
     output_file = os.path.join(
         output_dir, f"{self.quantizer.quant_model._get_name()}_int.pt")
     if verbose is True:
         print(script_module.inlined_graph)
     torch.jit.save(script_module, output_file)
Esempio n. 24
0
    def forward(self, input):

        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]

        if NndctOption.nndct_quant_off.value or NndctOption.nndct_cv_app.value:
            output = super().forward(qinput)
            output = quantize_tensors([output], self.node)[0]
        elif self.quant_mode > 0:
            output = torch.empty_like(qinput)
            if NndctOption.nndct_tanh_sigmoid_sim.value > 0:
                NndctTanhSimulation(input, output)
                output = quantize_tensors([output], self.node)[0]
            else:
                input_name = self.node.in_nodes[0]
                fragpos = self.quantizer.get_quant_config(input_name, False)[1]
                quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
                Ttable = TANH_TABLE.table.to(quant_device)
                output = output.to(quant_device)
                NndctTanhTableLookup(input, Ttable, output, fragpos)
        else:
            output = super().forward(qinput)

        return output
Esempio n. 25
0
def maybe_get_quantizer(quantizer=None):
    quantizer = quantizer or GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer:
        return quantizer.quant_mode, quantizer
    else:
        return GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_MODE), None
Esempio n. 26
0
    def do_scan(self, res, name, node=None, tensor_type='input'):
        # keep quantization steps after fast finetune
        if self.keep_fp:
            return self.do_quantize(res, name, node, tensor_type)

        # forward quant graph but not quantize parameter and activation
        if NndctOption.nndct_quant_off.value:
            if self.inplace:
                return res
            else:
                return res.clone().detach()

        res_save = None
        if isinstance(res.values, torch.Tensor):
            res_save = res
            res = res.values.data

        if res.dtype != torch.float32 and res.dtype != torch.double:
            NndctScreenLogger().warning_once(
                f'The tensor type of  {node.name} is {str(res.dtype)}. Only support float32/double quantization.'
            )
            return res_save if res_save is not None else res

        quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        if res.device.type != quant_device.type:
            raise TypeError(
                "Device of quantizer is {}, device of model and data should match device of quantizer"
                .format(quant_device.type))

        # get fixed position
        bnfp = self.get_quant_config(name, False, tensor_type)

        # hardware cut method
        mth = 4 if self.lstm else 2

        if NndctOption.nndct_use_torch_quantizer.value is True:
            mth = -1
        elif tensor_type == 'param':
            mth = 3

        scope = 5 if NndctOption.nndct_diffs_mode.value == "mse" else 1
        # set fix pos scanning scope to 1 for some type of tensors
        if (node.op.type in [NNDCT_OP.INPUT, NNDCT_OP.QUANT_STUB]):
            scope = 1
        if (self.lstm and tensor_type == 'input'):
            scope = 1
            res = res.detach().clone()

        Tbuffer = torch.empty_like(res).to(quant_device)
        Tfixpos = torch.tensor(
            [1], dtype=torch.get_default_dtype()).to(quant_device)

        # activation always calculate fix pos
        # calcualte fix pos if it is None
        # always calculate fis pos in finetune mode

        if tensor_type != 'param' or bnfp[1] is None or self.quant_mode == 3:
            py_nndct.nn.NndctDiffsFixPos(Tinput=res,
                                         Tbuffer=Tbuffer,
                                         Tfixpos=Tfixpos,
                                         bit_width=bnfp[0],
                                         range=scope,
                                         method=mth)
            bnfp[1] = (int)(Tfixpos.item())
            # limit max fix pos to 12 if bit width <= 8, others limit to 15
            if bnfp[0] <= 8 or self.lstm:
                max_fp = NndctOption.nndct_max_fix_position.value
                bnfp[1] = min(max_fp, bnfp[1])
            else:
                bnfp[1] = min(15, bnfp[1])
            # record fix pos of activation
            if tensor_type != 'param':
                self.config_history[tensor_type][name].append(bnfp[1])
                if (NndctOption.nndct_stat.value > 1):
                    print(
                        f'---- fp history: {stats.mode(np.array(self.config_history[tensor_type][name]))}'
                    )
                data = np.array(self.config_history[tensor_type][name])
                bnfp[1] = stats.mode(data)[0][0]
                bnfp[1] = bnfp[1].astype(np.int32).tolist()
            self.set_quant_config(name, bnfp, tensor_type)
            if (NndctOption.nndct_stat.value > 1):
                print('---- quant %s tensor: %s with bw = %d and fp = %g' %
                      (tensor_type, name, bnfp[0], bnfp[1]))

            # get 2^bit_width and 2^fracpos
            bnfp = self.get_quant_config(name, True, tensor_type)

            if (NndctOption.nndct_stat.value > 2):
                quant_data = nndct_quant.QuantizeData(
                    name,
                    res.cpu().detach().numpy())

            # do quantization for parameter or activation
            res = fake_quantize_per_tensor(res, bnfp[1], 0, -bnfp[0],
                                           bnfp[0] - 1, mth, self.inplace)

            if (NndctOption.nndct_stat.value > 2):
                #quant_data.all_close(res.cpu().detach().numpy())
                global global_snr_inv
                quant_efficiency, sqnr = quant_data.quant_efficiency(
                    res.cpu().detach().numpy(), math.log2(bnfp[0]))
                global_snr_inv += 1 / sqnr
                if quant_efficiency < 3.0:
                    print(
                        f"quant_efficiency={quant_efficiency}, {quant_data._name}\n"
                    )
                    print('Statistic [Min, Max, Mean, Std]:')
                    print('[{}, {}, {}, {}]'.format(res.min(), res.max(),
                                                    res.mean(), res.std()))
                    print('histogram: {}'.format(
                        res.histc(bins=10).cpu().detach().numpy()))
                    t = res
                    if tensor_type != 'param':
                        t = res.transpose(0, 1)
                    print('Channel number:{}'.format(t.shape[0]))
                    print('Channel-wise statistic [Min, Max, Mean, Std]:')
                    for c in range(t.shape[0]):
                        print('[{}, {}, {}, {}]'.format(
                            t[c].min(), t[c].max(), t[c].mean(), t[c].std()))
                        print('histogram: {}'.format(
                            t[c].histc(bins=10).cpu().detach().numpy()))

        if res_save is not None:
            res_save.values.data = res
            res = res_save

        return res
Esempio n. 27
0
    def do_quantize(self, blob, name, node=None, tensor_type='input'):
        # forward quant graph but not quantize parameter and activation
        if NndctOption.nndct_quant_off.value:
            if self.inplace:
                return blob
            else:
                return blob.clone().detach()

        blob_save = None
        if isinstance(blob.values, torch.Tensor):
            blob_save = blob
            blob = blob.values.data

        if blob.dtype != torch.float32 and blob.dtype != torch.double:
            NndctScreenLogger().warning_once(
                f'The tensor type of  {node.name} is {str(blob.dtype)}. Only support float32/double quantization.'
            )
            return blob_save if blob_save is not None else blob

        quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        if blob.device.type != quant_device.type:
            raise TypeError(
                "Device of quantizer is {}, device of model and data should match device of quantizer"
                .format(quant_device.type))

        if (NndctOption.nndct_stat.value > 2):
            quant_data = nndct_quant.QuantizeData(name,
                                                  blob.cpu().detach().numpy())
        # quantize the tensor
        bnfp = self.get_quant_config(name, True, tensor_type)
        if (NndctOption.nndct_stat.value > 1):
            print('---- quant %s tensor: %s with 1/step = %g' %
                  (tensor_type, name, bnfp[1]))
        # hardware cut method
        mth = 4 if self.lstm else 2

        if NndctOption.nndct_use_torch_quantizer.value is True:
            mth = -1
        elif tensor_type == 'param':
            mth = 3

        res = fake_quantize_per_tensor(blob, bnfp[1], 0, -bnfp[0], bnfp[0] - 1,
                                       mth, self.inplace)

        if (NndctOption.nndct_stat.value > 2):
            global global_snr_inv
            quant_efficiency, sqnr = quant_data.quant_efficiency(
                res.cpu().detach().numpy(), 8)
            global_snr_inv += 1 / sqnr
            if quant_efficiency < 3.0:
                print(
                    f"quant_efficiency={quant_efficiency}, global_snr_inv={global_snr_inv} {quant_data._name}\n"
                )
                print(
                    'Network input channel-wise statistic [Min, Max, Mean, Std]:'
                )
                print('[{}, {}, {}, {}]'.format(res.min(), res.max(),
                                                res.mean(), res.std()))
                print('histogram: {}'.format(
                    res.histc(bins=10).cpu().detach().numpy()))
                t = res
                if tensor_type != 'param':
                    t = res.transpose(0, 1)
                print('Channel number:{}'.format(t.shape[0]))
                print('Channel-wise statistic [Min, Max, Mean, Std]:')
                for c in range(t.shape[0]):
                    print('[{}, {}, {}, {}]'.format(t[c].min(), t[c].max(),
                                                    t[c].mean(), t[c].std()))
                    print('histogram: {}'.format(
                        t[c].histc(bins=10).cpu().detach().numpy()))

        # update param to nndct graph
        if tensor_type == 'param' and not self.exporting:
            self.update_param_to_nndct(node, name, res.cpu().detach().numpy())

        if blob_save is not None:
            blob_save.values.data = res
            res = blob_save

        return res
Esempio n. 28
0
    def finetune(self, run_fn, run_args):
        if self.quantizer.quant_mode == 2:
            NndctScreenLogger().warning(
                f"Finetune function will be ignored in test mode!")
            return
        NndctScreenLogger().info(
            f"=>Finetuning module parameters for better quantization accuracy... "
        )

        # backup option value
        opt_bak_param_corr = NndctOption.nndct_param_corr.value
        set_option_value("nndct_param_corr", 0)

        # cache input and output
        #print("**** cache input and output")
        last_quant_nodes = self.collect_last_quant_nodes()
        with torch.no_grad():
            hook_mods = []
            for node in self.graph.nodes:
                if node.op.type == NNDCT_OP.INPUT or \
                node in last_quant_nodes:
                    # (self.quantizer.configer.is_node_quantizable(node, False) and
                    # len(node.op.params) > 0):
                    hook_mods.append(node.module)

            handlers = self.hook_cache_output(hook_mods)

            set_option_value("nndct_quant_off", True)
            run_fn(*run_args)
            self.clean_hooks(handlers)

            # for mod in self.quant_model.modules():
            #   if hasattr(mod, "node") and mod.node.op.type in [NNDCT_OP.DENSE, NNDCT_OP.CONV2D, NNDCT_OP.DEPTHWISE_CONV2D, NNDCT_OP.CONVTRANSPOSE2D]:
            #     self._float_weights[mod.node].append(mod.weight.detach().cpu())

        torch.cuda.empty_cache()

        # calibration to get a set of quantization steps
        #print("****calibration to get float model tensor values")
        for mod in self.quant_model.modules():
            if hasattr(mod, "param_quantized"):
                setattr(mod, "param_quantized", False)

        # evaluation to get float model tensors
        set_option_value("nndct_quant_off", False)
        with torch.no_grad():
            run_fn(*run_args)
        torch.cuda.empty_cache()

        #print("****Parameter finetuning")
        device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        graph_searcher = GraphSearcher(self.graph)
        node_sets = graph_searcher.find_nodes_from_type([
            PatternType(pattern=[NNDCT_OP.CONV2D, NNDCT_OP.RELU]),
            PatternType(pattern=[NNDCT_OP.CONV2D, NNDCT_OP.RELU6]),
            PatternType(pattern=[NNDCT_OP.DEPTHWISE_CONV2D, NNDCT_OP.RELU]),
            PatternType(pattern=[NNDCT_OP.DEPTHWISE_CONV2D, NNDCT_OP.RELU6]),
            PatternType(pattern=[NNDCT_OP.CONVTRANSPOSE2D, NNDCT_OP.RELU])
        ])

        layer_act_group = {}
        for _, node_list in node_sets.items():
            for nodeset in node_list:
                conv, act = nodeset
                layer_act_group[conv] = act

        # to avoid quantization steps change among parameter finetuning
        self.quantizer.quant_mode = 2

        net_inputs = []
        for node in self.input_nodes:
            cached_net_input = [
                out for out in self.cached_outputs[node.module]
            ]
            net_inputs.append(cached_net_input)

        # last_quant_nodes = self.collect_last_quant_nodes()
        last_quant_mods = [node.module for node in last_quant_nodes]

        handlers = self.hook_cache_output(last_quant_mods, hook_type="single")
        net_loss = self.eval_loss(net_inputs, last_quant_mods, device)
        self.clean_hooks(handlers)
        # model.clean_hooks()
        torch.cuda.empty_cache()

        finetune_group = {}
        # hook_mods = []
        for qmod, fmod in zip(self._quant_model.modules(),
                              self._float_model.modules()):
            if hasattr(qmod, "node"):
                if (self.quantizer.configer.is_node_quantizable(
                        qmod.node, False) and len(qmod.node.op.params) > 0):
                    finetune_group[qmod.node] = [qmod, fmod]

                    # hook_mods.append(fmod)
        # self.hook_cache_output(hook_mods, hook_type="single")

        for node, module_pair in finetune_group.items():
            # if self.quantizer.configer.is_node_quantizable(node, False) and \
            #   len(node.op.params) > 0:
            quant_layer, float_layer = module_pair
            pn_node = self.graph.parents(node)[0]
            handlers = self.hook_cache_output([pn_node.module],
                                              hook_type="single")
            layer_inputs = []
            with torch.no_grad():
                for input_args in zip(*net_inputs):
                    new_input_args = []
                    for ip in input_args:
                        if isinstance(ip, torch.Tensor):
                            new_input_args.append(ip.to(device))
                    _ = self.quant_model(*new_input_args)

                    layer_inputs.append(
                        self.cached_output[pn_node.module].detach().cpu())
            self.clean_hooks(handlers)
            del self.cached_output[pn_node.module]
            #print(f"Tuning {node.name}")
            net_loss = self.optimize_layer(node, float_layer, layer_inputs,
                                           layer_act_group, net_inputs,
                                           net_loss, last_quant_mods, device)
            del layer_inputs
            torch.cuda.empty_cache()

        # recover quantizer status
        for node in self.graph.nodes:
            for _, fp_history in self.quantizer.fp_history.items():
                if node.name in fp_history:
                    fp_history[node.name].clear()
        for mod in self.quant_model.modules():
            if hasattr(mod, "param_quantized"):
                setattr(mod, "param_quantized", False)
        for mod in self.quant_model.modules():
            if hasattr(mod, "param_saved"):
                setattr(mod, "param_saved", False)
        self.quantizer.quant_mode = 1
        set_option_value("nndct_param_corr", opt_bak_param_corr)

        # export finetuned parameters
        self.quantizer.export_param()
Esempio n. 29
0
def dump_xmodel(output_dir="quantize_result", deploy_check=False, lstm_app=False):
  r"""converts module to xmodel for deployment
  compilation only works when quantm model = 2.
  The xmodel and some checking data will be generated under work dir.

  Args:
    deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification

  Returns:
    None
  """
  quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
  if quantizer and quantizer.quant_mode > 1:
    nndct_utils.create_work_dir(output_dir)
    
    # compile to xmodel
    
    compiler = CompilerFactory.get_compiler("xmodel")
      
    NndctScreenLogger().info("=>Converting to xmodel ...")
    deploy_graphs = get_deploy_graph_list(quantizer.quant_model, quantizer.Nndctgraph)
    #depoly_infos = compiler.get_deloy_graph_infos(quantizer, deploy_graphs)
    xmodel_depoly_infos, dump_deploy_infos = compiler.get_xmodel_and_dump_infos(quantizer, deploy_graphs)
    if not lstm_app:
      for node in xmodel_depoly_infos[0].dev_graph.nodes:
        error_out = False
        if node.op.type not in [NNDCT_OP.INPUT, NNDCT_OP.QUANT_STUB]:
          continue
        for i, tensor in enumerate(node.out_tensors):
          if tensor.shape and tensor.shape[0] != 1:
            NndctScreenLogger().error(f"Batch size must be 1 when exporting xmodel.")
            error_out = True
            break
        if error_out:
          break
      
    for depoly_info in dump_deploy_infos:
      # dump data for accuracy check
      if deploy_check:
        NndctScreenLogger().info(f"=>Dumping '{depoly_info.dev_graph.name}'' checking data...")
        if lstm_app:
          checker = DeployChecker(output_dir_name=output_dir, data_format='txt')
          checker.update_dump_folder(f"{depoly_info.dev_graph.name}/frame_0")
          select_batch = True
        else:
          checker = DeployChecker(output_dir_name=output_dir)
          checker.update_dump_folder(f"{depoly_info.dev_graph.name}")
          select_batch = False
        checker.dump_nodes_output(
            depoly_info.dev_graph,
            depoly_info.quant_info,
            round_method=quantizer.quant_opt['round_method'], select_batch=select_batch)
        
        NndctScreenLogger().info(f"=>Finsh dumping data.({checker.dump_folder})")
      
    for depoly_info in xmodel_depoly_infos:
      try:
        xgraph = compiler.do_compile(
            depoly_info.dev_graph,
            quant_config_info=depoly_info.quant_info,
            output_file_name=os.path.join(output_dir, depoly_info.dev_graph.name))

      except AddXopError as e:
        NndctScreenLogger().error(f"Failed convert graph '{depoly_info.dev_graph.name}' to xmodel.")
        raise e
      
      compiler.verify_xmodel(depoly_info.dev_graph, xgraph)
    set_outputs_recorder_status(quantizer.quant_model, False)
Esempio n. 30
0
    def do_scan(self, res, name, node=None, tensor_type='input'):
        # forward quant graph but not quantize parameter and activation
        if NndctOption.nndct_quant_off.value:
            if self.inplace:
                return res
            else:
                return res.clone().detach()

        res_save = None
        if isinstance(res.values, torch.Tensor):
            res_save = res
            res = res.values.data

        quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        if res.device.type != quant_device.type:
            raise TypeError(
                "Device of quantizer is {}, device of model and data should match device of quantizer"
                .format(quant_device.type))

        # get fixed position
        q_config = self.get_quant_config(name, False, tensor_type)

        # turn off quantization if bit width is more than 32
        if q_config[0] >= 32:
            if self.inplace:
                return res
            else:
                return res.clone().detach()

        q_algorithm = self.get_quant_algo(name, tensor_type)
        # get quant algorithm
        #if tensor_type != 'param' or q_config[1] is None or q_config[2] is None:
        if q_algorithm.calib_or_not(tensor_type):
            #q_algorithm = self.get_quant_algo(name, tensor_type)
            q_algorithm.calibrate(res)

            if q_algorithm.statistic_local:
                # quant_tensor = q_algorithm.fake_quantize(res, self.inplace)
                # if self.inplace:
                #   res.data = quant_tensor.data.clone()
                # else:
                #   res = quant_tensor

                q_config[1] = q_algorithm.scale
                q_config[2] = q_algorithm.zero_point
                q_config[3] = q_algorithm.float_max
                if tensor_type != 'param':
                    self.config_history[tensor_type][name].append(
                        [q_config[1], q_config[2], q_config[3]])
                    data = np.array(
                        self.config_history[tensor_type][name]).transpose(
                            1, 0)
                    q_config[1], q_config[2], q_config[
                        3] = q_algorithm.act_scale_stats(data)
                    #q_algorithm.scale, q_algorithm.zero_point, q_algorithm.float_max = q_config[1], q_config[2], q_config[3]
                self.set_quant_config(name, q_config, tensor_type)

                quant_tensor = q_algorithm.fake_quantize(res, self.inplace)
                if self.inplace:
                    res.data = quant_tensor.data.clone()
                else:
                    res = quant_tensor

        if res_save is not None:
            res_save.values.data = res
            res = res_save
        return res