def __init__(self, quant_mode: str, module: torch.nn.Module, input_args: Union[torch.Tensor, Sequence[Any]] = None, state_dict_file: Optional[str] = None, output_dir: str = "quantize_result", bitwidth_w: int = 8, bitwidth_a: int = 8, mix_bit: bool = False, device: torch.device = torch.device("cuda"), lstm_app: bool = False): # Check arguments type self._check_args(module, input_args) # Check device available if device.type == "cuda": if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ): device = torch.device("cpu") NndctScreenLogger().warning( f"CUDA is not available, change device to CPU") # Transform torch module to quantized module format nndct_utils.create_work_dir(output_dir) # Create a quantizer object, which can control all quantization flow, quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w, bits_bias=bitwidth_a, bits_activation=bitwidth_a, mix_bit=mix_bit) quantizer, qmode = self._init_quant_env(quant_mode, output_dir, quant_strategy) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device) if lstm_app: option_util.set_option_value("nndct_cv_app", False) else: option_util.set_option_value("nndct_cv_app", True) # Prepare quantizable module quant_module, graph = prepare_quantizable_module( module=module, input_args=input_args, export_folder=output_dir, state_dict_file=state_dict_file, quant_mode=qmode, device=device) # enable record outputs of per layer if qmode > 1: register_output_hook(quant_module, record_once=True) set_outputs_recorder_status(quant_module, True) # intialize quantizer quantizer.setup(graph, False, lstm_app) # hook module with quantizer # connect_module_with_quantizer(quant_module, quantizer) quantizer.quant_model = quant_module self.quantizer = quantizer self.adaquant = None
def __init__(self, quant_mode: str, module: torch.nn.Module, input_args: Union[torch.Tensor, Sequence[Any]] = None, state_dict_file: Optional[str] = None, output_dir: str = "quantize_result", bitwidth_w: int = 8, bitwidth_a: int = 8, device: torch.device = torch.device("cuda"), lstm_app: bool = True): self._export_folder = output_dir # Check arguments type self._check_args(module) # Check device available if device.type == "cuda": if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ): device = torch.device("cpu") NndctScreenLogger().warning( f"CUDA is not available, change device to CPU") # Transform torch module to quantized module format nndct_utils.create_work_dir(output_dir) # turn off weights equalization and bias correction option_util.set_option_value("nndct_quant_opt", 0) option_util.set_option_value("nndct_param_corr", False) option_util.set_option_value("nndct_equalization", False) option_util.set_option_value("nndct_cv_app", False) transformed_module = convert_lstm(module) script_module = torch.jit.script(transformed_module) quant_module, graph = prepare_quantizable_module( module=script_module, input_args=None, export_folder=output_dir, state_dict_file=state_dict_file, quant_mode=quant_mode, device=device) quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w, bits_bias=bitwidth_w, bits_activation=bitwidth_a) quantizer, qmode = self._init_quant_env(quant_mode, output_dir, quant_strategy) quantizer.quant_model = quant_module.to(device) quantizer.setup(graph, rnn_front_end=True, lstm=True) self.quantizer = quantizer
def __init__(self, quant_mode: str, module: torch.nn.Module, input_args: Union[torch.Tensor, Sequence[Any]] = None, state_dict_file: Optional[str] = None, output_dir: str = "quantize_result", bitwidth_w: int = 8, bitwidth_a: int = 8, device: torch.device = torch.device("cuda"), lstm_app: bool = True): self._export_folder = output_dir # Check arguments type self._check_args(module) # Check device available if device.type == "cuda": if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ): device = torch.device("cpu") NndctScreenLogger().warning(f"CUDA is not available, change device to CPU") # Transform torch module to quantized module format nndct_utils.create_work_dir(output_dir) # turn off weights equalization and bias correction option_util.set_option_value("nndct_quant_opt", 0) option_util.set_option_value("nndct_param_corr", False) option_util.set_option_value("nndct_equalization", False) # Create a quantizer object, which can control all quantization flow, #if quant_strategy == None: quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w, bits_bias=bitwidth_w, bits_activation=bitwidth_a) quantizer, qmode = self._init_quant_env(quant_mode, output_dir, quant_strategy) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device) standard_RNNs, customized_RNNs = self._analyse_module(module) if len(standard_RNNs) == 0 and len(customized_RNNs) == 0: raise RuntimeError( f"The top module '{module._get_name()}' should have one LSTM module at least." ) self._modules_info = defaultdict(dict) # process customized Lstm for layer_name, layer_module in customized_RNNs.items(): for cell_name, cell_module in layer_module.named_children(): lstm_direction = "forward" if layer_module.go_forward else "backward" full_cell_name = ".".join([layer_name, cell_name]) layer_graph = self._get_customized_LSTM_graph(full_cell_name, cell_module, layer_module.input_size, layer_module.hidden_size, layer_module.memory_size) self._modules_info[full_cell_name]["layers_graph"] = [{ lstm_direction: layer_graph }] self._modules_info[full_cell_name]["stack_mode"] = None self._modules_info[full_cell_name]["layer_module"] = layer_module # process standard Lstm for name, rnn_module in standard_RNNs.items(): layers_graph = self._get_standard_RNN_graph( graph_name=name, lstm_module=rnn_module) self._modules_info[name]["layers_graph"] = layers_graph self._modules_info[name]["input_size"] = [rnn_module.input_size ] * rnn_module.num_layers self._modules_info[name]["hidden_size"] = [rnn_module.hidden_size ] * rnn_module.num_layers self._modules_info[name]["memory_size"] = [rnn_module.hidden_size ] * rnn_module.num_layers self._modules_info[name][ "stack_mode"] = "bidirectional" if rnn_module.bidirectional else "unidirectional" self._modules_info[name][ "batch_first"] = True if rnn_module.batch_first is True else False if rnn_module.mode == 'LSTM': self._modules_info[name]["mode"] = "LSTM" elif rnn_module.mode == "GRU": self._modules_info[name]["mode"] = "GRU" # merge multi graphs into a graph top_graph = self._merge_subgraphs() # turn on quantizer #if quant_mode: quantizer.setup(top_graph, rnn_front_end=True, lstm=True) # write and reload quantizable cell module module_graph_map = self._rebuild_layer_module() # replace float module with quantizale module for name, info in self._modules_info.items(): if info["stack_mode"] is not None: self._build_stack_lstm_module(info) else: info["QLSTM"] = list(info["layers_module"][0].values())[0] module = self._insert_QuantLstm_in_top_module(module, name, info) # move modules info into layers info self._convert_modules_info_to_layers(module_graph_map) # hook module with quantizer # connect_module_with_quantizer(quant_module, quantizer) quantizer.quant_model = module self.quantizer = quantizer