Example #1
0
    def __init__(self,
                 quant_mode: str,
                 module: torch.nn.Module,
                 input_args: Union[torch.Tensor, Sequence[Any]] = None,
                 state_dict_file: Optional[str] = None,
                 output_dir: str = "quantize_result",
                 bitwidth_w: int = 8,
                 bitwidth_a: int = 8,
                 mix_bit: bool = False,
                 device: torch.device = torch.device("cuda"),
                 lstm_app: bool = False):
        # Check arguments type
        self._check_args(module, input_args)

        # Check device available
        if device.type == "cuda":
            if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ):
                device = torch.device("cpu")
                NndctScreenLogger().warning(
                    f"CUDA is not available, change device to CPU")

        # Transform torch module to quantized module format
        nndct_utils.create_work_dir(output_dir)

        # Create a quantizer object, which can control all quantization flow,
        quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w,
                                          bits_bias=bitwidth_a,
                                          bits_activation=bitwidth_a,
                                          mix_bit=mix_bit)
        quantizer, qmode = self._init_quant_env(quant_mode, output_dir,
                                                quant_strategy)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device)
        if lstm_app: option_util.set_option_value("nndct_cv_app", False)
        else: option_util.set_option_value("nndct_cv_app", True)

        # Prepare quantizable module
        quant_module, graph = prepare_quantizable_module(
            module=module,
            input_args=input_args,
            export_folder=output_dir,
            state_dict_file=state_dict_file,
            quant_mode=qmode,
            device=device)

        # enable record outputs of per layer
        if qmode > 1:
            register_output_hook(quant_module, record_once=True)
            set_outputs_recorder_status(quant_module, True)

        # intialize quantizer
        quantizer.setup(graph, False, lstm_app)

        # hook module with quantizer
        # connect_module_with_quantizer(quant_module, quantizer)
        quantizer.quant_model = quant_module

        self.quantizer = quantizer
        self.adaquant = None
Example #2
0
    def __init__(self,
                 quant_mode: str,
                 module: torch.nn.Module,
                 input_args: Union[torch.Tensor, Sequence[Any]] = None,
                 state_dict_file: Optional[str] = None,
                 output_dir: str = "quantize_result",
                 bitwidth_w: int = 8,
                 bitwidth_a: int = 8,
                 device: torch.device = torch.device("cuda"),
                 lstm_app: bool = True):
        self._export_folder = output_dir
        # Check arguments type
        self._check_args(module)

        # Check device available
        if device.type == "cuda":
            if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ):
                device = torch.device("cpu")
                NndctScreenLogger().warning(
                    f"CUDA is not available, change device to CPU")

        # Transform torch module to quantized module format
        nndct_utils.create_work_dir(output_dir)

        # turn off weights equalization and bias correction
        option_util.set_option_value("nndct_quant_opt", 0)
        option_util.set_option_value("nndct_param_corr", False)
        option_util.set_option_value("nndct_equalization", False)
        option_util.set_option_value("nndct_cv_app", False)

        transformed_module = convert_lstm(module)
        script_module = torch.jit.script(transformed_module)
        quant_module, graph = prepare_quantizable_module(
            module=script_module,
            input_args=None,
            export_folder=output_dir,
            state_dict_file=state_dict_file,
            quant_mode=quant_mode,
            device=device)

        quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w,
                                          bits_bias=bitwidth_w,
                                          bits_activation=bitwidth_a)

        quantizer, qmode = self._init_quant_env(quant_mode, output_dir,
                                                quant_strategy)
        quantizer.quant_model = quant_module.to(device)

        quantizer.setup(graph, rnn_front_end=True, lstm=True)

        self.quantizer = quantizer
Example #3
0
  def __init__(self,
               quant_mode: str,
               module: torch.nn.Module,
               input_args: Union[torch.Tensor, Sequence[Any]] = None,
               state_dict_file: Optional[str] = None,
               output_dir: str = "quantize_result",
               bitwidth_w: int = 8,
               bitwidth_a: int = 8,
               device: torch.device = torch.device("cuda"),
               lstm_app: bool = True):
    self._export_folder = output_dir
    # Check arguments type
    self._check_args(module)
    
    # Check device available
    if device.type == "cuda":
      if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ):
        device = torch.device("cpu")
        NndctScreenLogger().warning(f"CUDA is not available, change device to CPU")
    
    # Transform torch module to quantized module format
    nndct_utils.create_work_dir(output_dir)
    
    # turn off weights equalization and bias correction
    option_util.set_option_value("nndct_quant_opt", 0)
    option_util.set_option_value("nndct_param_corr", False)
    option_util.set_option_value("nndct_equalization", False)
    
    # Create a quantizer object, which can control all quantization flow,
    #if quant_strategy == None:
    quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w,
                                      bits_bias=bitwidth_w,
                                      bits_activation=bitwidth_a)
    quantizer, qmode = self._init_quant_env(quant_mode, 
                                            output_dir,
                                            quant_strategy)
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode)
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device)
    
    standard_RNNs, customized_RNNs = self._analyse_module(module)

    if len(standard_RNNs) == 0 and len(customized_RNNs) == 0:
      raise RuntimeError(
          f"The top module '{module._get_name()}' should have one LSTM module at least."
      )

    self._modules_info = defaultdict(dict)

    # process customized Lstm
    for layer_name, layer_module in customized_RNNs.items():
      for cell_name, cell_module in layer_module.named_children():
        lstm_direction = "forward" if layer_module.go_forward else "backward"
        full_cell_name = ".".join([layer_name, cell_name])
        layer_graph = self._get_customized_LSTM_graph(full_cell_name,
                                                      cell_module,
                                                      layer_module.input_size,
                                                      layer_module.hidden_size,
                                                      layer_module.memory_size)
        self._modules_info[full_cell_name]["layers_graph"] = [{
            lstm_direction: layer_graph
        }]
        self._modules_info[full_cell_name]["stack_mode"] = None
        self._modules_info[full_cell_name]["layer_module"] = layer_module

    # process standard Lstm
    for name, rnn_module in standard_RNNs.items():
      layers_graph = self._get_standard_RNN_graph(
          graph_name=name, lstm_module=rnn_module)
      self._modules_info[name]["layers_graph"] = layers_graph
      self._modules_info[name]["input_size"] = [rnn_module.input_size
                                                ] * rnn_module.num_layers
      self._modules_info[name]["hidden_size"] = [rnn_module.hidden_size
                                                 ] * rnn_module.num_layers
      self._modules_info[name]["memory_size"] = [rnn_module.hidden_size
                                                 ] * rnn_module.num_layers
      self._modules_info[name][
          "stack_mode"] = "bidirectional" if rnn_module.bidirectional else "unidirectional"
      self._modules_info[name][
          "batch_first"] = True if rnn_module.batch_first is True else False

      if rnn_module.mode == 'LSTM':
        self._modules_info[name]["mode"] = "LSTM"
      elif rnn_module.mode == "GRU": 
        self._modules_info[name]["mode"] = "GRU"
    # merge multi graphs into a graph
    top_graph = self._merge_subgraphs()
    
    # turn on quantizer
    #if quant_mode:
    quantizer.setup(top_graph, rnn_front_end=True, lstm=True)
    
    # write and reload quantizable cell module
    module_graph_map = self._rebuild_layer_module()
    
    # replace float module with quantizale module
    for name, info in self._modules_info.items():
      if info["stack_mode"] is not None:
        self._build_stack_lstm_module(info)
      else:
        info["QLSTM"] = list(info["layers_module"][0].values())[0]
      module = self._insert_QuantLstm_in_top_module(module, name, info)

    # move modules info into layers info
    self._convert_modules_info_to_layers(module_graph_map)

    # hook module with quantizer
    # connect_module_with_quantizer(quant_module, quantizer)
    quantizer.quant_model = module

    self.quantizer = quantizer