def prepare_quantizable_module( module: torch.nn.Module, input_args: Union[torch.Tensor, Sequence[Any]], export_folder: str, state_dict_file: Optional[str] = None, quant_mode: int = 1, device: torch.device = torch.device("cuda") ) -> Tuple[torch.nn.Module, Graph]: nndct_utils.create_work_dir(export_folder) if isinstance(state_dict_file, str): state_dict = torch.load(state_dict_file) module.load_state_dict(state_dict) export_file = os.path.join(export_folder, module._get_name() + TorchSymbol.SCRIPT_SUFFIX) # switch to specified device module, input_args = to_device(module, input_args, device) # parse origin module to graph NndctScreenLogger().info(f"=>Parsing {module._get_name()}...") graph = parse_module(module, input_args) NndctScreenLogger().info( f"=>Quantizable module is generated.({export_file})") # recreate quantizable module from graph quant_module = recreate_nndct_module(graph, True, export_file).to(device) quant_module.train(mode=module.training) # hook module with graph connect_module_with_graph(quant_module, graph) return quant_module, graph
def __init__(self, quant_mode: str, module: torch.nn.Module, input_args: Union[torch.Tensor, Sequence[Any]] = None, state_dict_file: Optional[str] = None, output_dir: str = "quantize_result", bitwidth_w: int = 8, bitwidth_a: int = 8, mix_bit: bool = False, device: torch.device = torch.device("cuda"), lstm_app: bool = False): # Check arguments type self._check_args(module, input_args) # Check device available if device.type == "cuda": if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ): device = torch.device("cpu") NndctScreenLogger().warning( f"CUDA is not available, change device to CPU") # Transform torch module to quantized module format nndct_utils.create_work_dir(output_dir) # Create a quantizer object, which can control all quantization flow, quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w, bits_bias=bitwidth_a, bits_activation=bitwidth_a, mix_bit=mix_bit) quantizer, qmode = self._init_quant_env(quant_mode, output_dir, quant_strategy) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device) if lstm_app: option_util.set_option_value("nndct_cv_app", False) else: option_util.set_option_value("nndct_cv_app", True) # Prepare quantizable module quant_module, graph = prepare_quantizable_module( module=module, input_args=input_args, export_folder=output_dir, state_dict_file=state_dict_file, quant_mode=qmode, device=device) # enable record outputs of per layer if qmode > 1: register_output_hook(quant_module, record_once=True) set_outputs_recorder_status(quant_module, True) # intialize quantizer quantizer.setup(graph, False, lstm_app) # hook module with quantizer # connect_module_with_quantizer(quant_module, quantizer) quantizer.quant_model = quant_module self.quantizer = quantizer self.adaquant = None
def dump_xmodel(self, deploy_check=False): """ `dump xmodel for LSTM cell` """ quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER) if quantizer and quantizer.quant_mode > 1: compiler = CompilerFactory.get_compiler("xmodel") xmodel_dir = os.path.join(self._export_folder, "xmodel") create_work_dir(xmodel_dir) for info in self._modules_info.values(): for l_num, layer_graph in enumerate(info["layers_graph"]): for lstm_direction, graph in layer_graph.items(): try: compiler.do_compile( nndct_graph=graph, quant_config_info=quantizer.quant_config, output_file_name=os.path.join(xmodel_dir, graph.name), graph_attr_kwargs={"direction": lstm_direction}) except Exception as e: print( f"[NNDCT_ERROR]:failed convert nndct graph to xmodel({str(e)})." ) else: print("[NNDCT_NOTE]:Successfully convert nndct graph to xmodel!") if deploy_check: print("[NNDCT_NOTE]: Dumping checking data...") checker = DeployChecker( output_dir_name=self._export_folder, data_format="txt") # get timestep output for name, info in self._layers_info.items(): cell = info["cell_module"] layer = info["layer_module"] graph = info["graph"] if layer.input is None: warnings.warn( f"[NNDCT_WARNING]: Provide inputs for '{name}' when do deploy checking", RuntimeWarning) continue set_outputs_recorder_status(cell, True) layer(layer.input, layer.initial_state, layer.batch_lengths) for timestep in range(layer.input.size()[1]): enable_dump_weight = True if timestep == 0 else False update_nndct_blob_data(cell, graph, timestep) checker.update_dump_folder(f"{graph.name}/frame_{timestep}") checker.dump_nodes_output( graph, quantizer.quant_config, round_method=quantizer.quant_opt['round_method'], enable_dump_weight=enable_dump_weight) set_outputs_recorder_status(cell, False) print("[NNDCT_NOTE]: Finsh dumping data.")
def dump_xmodel(output_dir="quantize_result", deploy_check=False): r"""converts module to xmodel for deployment compilation only works when quantm model = 2. The xmodel and some checking data will be generated under work dir. Args: deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification Returns: None """ quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER) if quantizer and quantizer.quant_mode > 1: nndct_utils.create_work_dir(output_dir) # compile to xmodel compiler = CompilerFactory.get_compiler("xmodel") NndctScreenLogger().info("=>Converting to xmodel ...") deploy_graphs = get_deploy_graph_list(quantizer.quant_model, quantizer.Nndctgraph) depoly_infos = compiler.get_deloy_graph_infos(quantizer, deploy_graphs) for depoly_info in depoly_infos: try: compiler.do_compile(depoly_info.dev_graph, quant_config_info=depoly_info.quant_info, output_file_name=os.path.join( output_dir, depoly_info.dev_graph.name)) except AddXopError as e: NndctScreenLogger().error( f"Failed convert graph '{depoly_info.dev_graph.name}' to xmodel({str(e)})." ) # dump data for accuracy check if deploy_check: NndctScreenLogger().info( f"=>Dumping '{depoly_info.dev_graph.name}'' checking data..." ) checker = DeployChecker(output_dir_name=output_dir) checker.update_dump_folder(f"{depoly_info.dev_graph.name}") checker.dump_nodes_output( depoly_info.dev_graph, depoly_info.quant_info, round_method=quantizer.quant_opt['round_method'], select_batch=False) NndctScreenLogger().info( f"=>Finsh dumping data.({checker.dump_folder})") set_outputs_recorder_status(quantizer.quant_model, False)
def __init__(self, quant_mode: str, module: torch.nn.Module, input_args: Union[torch.Tensor, Sequence[Any]] = None, state_dict_file: Optional[str] = None, output_dir: str = "quantize_result", bitwidth_w: int = 8, bitwidth_a: int = 8, device: torch.device = torch.device("cuda"), lstm_app: bool = True): self._export_folder = output_dir # Check arguments type self._check_args(module) # Check device available if device.type == "cuda": if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ): device = torch.device("cpu") NndctScreenLogger().warning( f"CUDA is not available, change device to CPU") # Transform torch module to quantized module format nndct_utils.create_work_dir(output_dir) # turn off weights equalization and bias correction option_util.set_option_value("nndct_quant_opt", 0) option_util.set_option_value("nndct_param_corr", False) option_util.set_option_value("nndct_equalization", False) option_util.set_option_value("nndct_cv_app", False) transformed_module = convert_lstm(module) script_module = torch.jit.script(transformed_module) quant_module, graph = prepare_quantizable_module( module=script_module, input_args=None, export_folder=output_dir, state_dict_file=state_dict_file, quant_mode=quant_mode, device=device) quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w, bits_bias=bitwidth_w, bits_activation=bitwidth_a) quantizer, qmode = self._init_quant_env(quant_mode, output_dir, quant_strategy) quantizer.quant_model = quant_module.to(device) quantizer.setup(graph, rnn_front_end=True, lstm=True) self.quantizer = quantizer
def dump_nodes_output(self, nndct_graph: Graph, quant_configs: NndctQuantInfo, round_method: int, enable_dump_weight: bool = True, select_batch: bool = False) -> NoReturn: nndct_utils.create_work_dir(self._full_folder) if self._quant_off: self._dump_floating_model(nndct_graph, enable_dump_weight, round_method, select_batch) else: self._dump_floating_model(nndct_graph, enable_dump_weight, round_method, select_batch) self._dump_fixed_model(nndct_graph, quant_configs, enable_dump_weight, round_method, select_batch) self._dump_graph_info(nndct_graph, quant_configs)
def dump_xmodel(output_dir="quantize_result", deploy_check=False): r"""converts module to xmodel for deployment compilation only works when quantm model = 2. The xmodel and some checking data will be generated under work dir. Args: deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification Returns: None """ quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER) if quantizer and quantizer.quant_mode > 1: nndct_utils.create_work_dir(output_dir) # compile to xmodel try: compiler = CompilerFactory.get_compiler("xmodel") NndctScreenLogger().info("=>Converting to xmodel ...") compiler.do_compile(nndct_graph=quantizer.Nndctgraph, quant_config_info=quantizer.quant_config, output_file_name=os.path.join( output_dir, quantizer.Nndctgraph.name)) except AddXopError as e: NndctScreenLogger().error( f"Failed convert nndct graph to xmodel({str(e)}).") else: NndctScreenLogger().info( f"=>Successfully convert to xmodel.({compiler.xmodel_file})") # dump data for accuracy checkvim if deploy_check: NndctScreenLogger().info("=>Dumping checking data...") update_nndct_blob_data(quantizer.quant_model, quantizer.Nndctgraph) checker = DeployChecker(output_dir_name=output_dir) checker.dump_nodes_output( quantizer.Nndctgraph, quantizer.quant_config, round_method=quantizer.quant_opt['round_method']) set_outputs_recorder_status(quantizer.quant_model, False) NndctScreenLogger().info( f"=>Finsh dumping data.({checker.dump_folder})")
def dump_xmodel(output_dir="quantize_result", deploy_check=False, lstm_app=False): r"""converts module to xmodel for deployment compilation only works when quantm model = 2. The xmodel and some checking data will be generated under work dir. Args: deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification Returns: None """ quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER) if quantizer and quantizer.quant_mode > 1: nndct_utils.create_work_dir(output_dir) # compile to xmodel compiler = CompilerFactory.get_compiler("xmodel") NndctScreenLogger().info("=>Converting to xmodel ...") deploy_graphs = get_deploy_graph_list(quantizer.quant_model, quantizer.Nndctgraph) #depoly_infos = compiler.get_deloy_graph_infos(quantizer, deploy_graphs) xmodel_depoly_infos, dump_deploy_infos = compiler.get_xmodel_and_dump_infos(quantizer, deploy_graphs) if not lstm_app: for node in xmodel_depoly_infos[0].dev_graph.nodes: error_out = False if node.op.type not in [NNDCT_OP.INPUT, NNDCT_OP.QUANT_STUB]: continue for i, tensor in enumerate(node.out_tensors): if tensor.shape and tensor.shape[0] != 1: NndctScreenLogger().error(f"Batch size must be 1 when exporting xmodel.") error_out = True break if error_out: break for depoly_info in dump_deploy_infos: # dump data for accuracy check if deploy_check: NndctScreenLogger().info(f"=>Dumping '{depoly_info.dev_graph.name}'' checking data...") if lstm_app: checker = DeployChecker(output_dir_name=output_dir, data_format='txt') checker.update_dump_folder(f"{depoly_info.dev_graph.name}/frame_0") select_batch = True else: checker = DeployChecker(output_dir_name=output_dir) checker.update_dump_folder(f"{depoly_info.dev_graph.name}") select_batch = False checker.dump_nodes_output( depoly_info.dev_graph, depoly_info.quant_info, round_method=quantizer.quant_opt['round_method'], select_batch=select_batch) NndctScreenLogger().info(f"=>Finsh dumping data.({checker.dump_folder})") for depoly_info in xmodel_depoly_infos: try: xgraph = compiler.do_compile( depoly_info.dev_graph, quant_config_info=depoly_info.quant_info, output_file_name=os.path.join(output_dir, depoly_info.dev_graph.name)) except AddXopError as e: NndctScreenLogger().error(f"Failed convert graph '{depoly_info.dev_graph.name}' to xmodel.") raise e compiler.verify_xmodel(depoly_info.dev_graph, xgraph) set_outputs_recorder_status(quantizer.quant_model, False)
torch.ops.load_library(lib_abspath) except ImportError as e: NndctScreenLogger().error(f"{str(e)}") sys.exit(1) else: NndctScreenLogger().info(f"Loading NNDCT kernels...") else: if os.path.exists(os.path.join(_cur_dir, "kernel")): from .kernel import NN_PATH else: NN_PATH = _cur_dir try: cwd = NN_PATH lib_path = os.path.join(cwd, "lib") create_work_dir(lib_path) cpu_src_path = os.path.join(cwd, "../../../csrc/cpu") source_files = [] for name in os.listdir(cpu_src_path): if name.split(".")[-1] in ["cpp", "cc", "c"]: source_files.append(os.path.join(cpu_src_path, name)) extra_include_paths = [ os.path.join(cwd, "../../../include/cpu"), os.path.join(cwd, "include") ] with_cuda = False #if torch.cuda.is_available() and "CUDA_HOME" in os.environ: if "CUDA_HOME" in os.environ: cuda_src_path = os.path.join(cwd, "../../../csrc/cuda")
def __init__(self, quant_mode: str, module: torch.nn.Module, input_args: Union[torch.Tensor, Sequence[Any]] = None, state_dict_file: Optional[str] = None, output_dir: str = "quantize_result", bitwidth_w: int = 8, bitwidth_a: int = 8, device: torch.device = torch.device("cuda"), lstm_app: bool = True): self._export_folder = output_dir # Check arguments type self._check_args(module) # Check device available if device.type == "cuda": if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ): device = torch.device("cpu") NndctScreenLogger().warning(f"CUDA is not available, change device to CPU") # Transform torch module to quantized module format nndct_utils.create_work_dir(output_dir) # turn off weights equalization and bias correction option_util.set_option_value("nndct_quant_opt", 0) option_util.set_option_value("nndct_param_corr", False) option_util.set_option_value("nndct_equalization", False) # Create a quantizer object, which can control all quantization flow, #if quant_strategy == None: quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w, bits_bias=bitwidth_w, bits_activation=bitwidth_a) quantizer, qmode = self._init_quant_env(quant_mode, output_dir, quant_strategy) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device) standard_RNNs, customized_RNNs = self._analyse_module(module) if len(standard_RNNs) == 0 and len(customized_RNNs) == 0: raise RuntimeError( f"The top module '{module._get_name()}' should have one LSTM module at least." ) self._modules_info = defaultdict(dict) # process customized Lstm for layer_name, layer_module in customized_RNNs.items(): for cell_name, cell_module in layer_module.named_children(): lstm_direction = "forward" if layer_module.go_forward else "backward" full_cell_name = ".".join([layer_name, cell_name]) layer_graph = self._get_customized_LSTM_graph(full_cell_name, cell_module, layer_module.input_size, layer_module.hidden_size, layer_module.memory_size) self._modules_info[full_cell_name]["layers_graph"] = [{ lstm_direction: layer_graph }] self._modules_info[full_cell_name]["stack_mode"] = None self._modules_info[full_cell_name]["layer_module"] = layer_module # process standard Lstm for name, rnn_module in standard_RNNs.items(): layers_graph = self._get_standard_RNN_graph( graph_name=name, lstm_module=rnn_module) self._modules_info[name]["layers_graph"] = layers_graph self._modules_info[name]["input_size"] = [rnn_module.input_size ] * rnn_module.num_layers self._modules_info[name]["hidden_size"] = [rnn_module.hidden_size ] * rnn_module.num_layers self._modules_info[name]["memory_size"] = [rnn_module.hidden_size ] * rnn_module.num_layers self._modules_info[name][ "stack_mode"] = "bidirectional" if rnn_module.bidirectional else "unidirectional" self._modules_info[name][ "batch_first"] = True if rnn_module.batch_first is True else False if rnn_module.mode == 'LSTM': self._modules_info[name]["mode"] = "LSTM" elif rnn_module.mode == "GRU": self._modules_info[name]["mode"] = "GRU" # merge multi graphs into a graph top_graph = self._merge_subgraphs() # turn on quantizer #if quant_mode: quantizer.setup(top_graph, rnn_front_end=True, lstm=True) # write and reload quantizable cell module module_graph_map = self._rebuild_layer_module() # replace float module with quantizale module for name, info in self._modules_info.items(): if info["stack_mode"] is not None: self._build_stack_lstm_module(info) else: info["QLSTM"] = list(info["layers_module"][0].values())[0] module = self._insert_QuantLstm_in_top_module(module, name, info) # move modules info into layers info self._convert_modules_info_to_layers(module_graph_map) # hook module with quantizer # connect_module_with_quantizer(quant_module, quantizer) quantizer.quant_model = module self.quantizer = quantizer
def quantize_modules(self, top_module: torch.nn.Module) -> torch.nn.Module: """ `prepare quantizable LSTM sub modules.` Args: top_module (torch.nn.Module): Top Module in which LSTM need to do quantization Raises: RuntimeError: The top module should have one LSTM at least. Returns: torch.nn.Module: Top Module in which LSTM sub modules are transformed to quantizible module """ standard_RNNs, customized_RNNs = self._analyse_module(top_module) if len(standard_RNNs) == 0 and len(customized_RNNs) == 0: raise RuntimeError( f"The top module '{top_module._get_name()}' should have one LSTM module at least." ) nndct_utils.create_work_dir(self._export_folder) self._modules_info = defaultdict(dict) # process customized Lstm for layer_name, layer_module in customized_RNNs.items(): for cell_name, cell_module in layer_module.named_children(): lstm_direction = "forward" if layer_module.go_forward else "backward" full_cell_name = ".".join([layer_name, cell_name]) layer_graph = self._get_customized_LSTM_graph(full_cell_name, cell_module, layer_module.input_size, layer_module.hidden_size, layer_module.memory_size) self._modules_info[full_cell_name]["layers_graph"] = [{ lstm_direction: layer_graph }] self._modules_info[full_cell_name]["stack_mode"] = None self._modules_info[full_cell_name]["layer_module"] = layer_module # process standard Lstm for name, module in standard_RNNs.items(): layers_graph = self._get_standard_RNN_graph( graph_name=name, lstm_module=module) self._modules_info[name]["layers_graph"] = layers_graph self._modules_info[name]["input_size"] = [module.input_size ] * module.num_layers self._modules_info[name]["hidden_size"] = [module.hidden_size ] * module.num_layers self._modules_info[name]["memory_size"] = [module.hidden_size ] * module.num_layers self._modules_info[name][ "stack_mode"] = "bidirectional" if module.bidirectional else "unidirectional" self._modules_info[name][ "batch_first"] = True if module.batch_first is True else False if module.mode == 'LSTM': self._modules_info[name]["mode"] = "LSTM" elif module.mode == "GRU": self._modules_info[name]["mode"] = "GRU" # merge multi graphs into a graph top_graph = self._merge_subgraphs() # turn on quantizer if self._quant_mode: quantizer = TORCHQuantizer(self._quant_mode, self._export_folder, self._bit_w, self._bit_a) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, self._quant_mode) quantizer.setup(top_graph, lstm=True) # write and reload quantizable cell module module_graph_map = self._rebuild_layer_module() # hook quantizer and module if self._quant_mode is not None: self._hook_quant_module_with_quantizer(quantizer) # replace float module with quantizale module for name, info in self._modules_info.items(): if info["stack_mode"] is not None: self._build_stack_lstm_module(info) else: info["QLSTM"] = list(info["layers_module"][0].values())[0] top_module = self._insert_QuantLstm_in_top_module(top_module, name, info) # move modules info into layers info self._convert_modules_info_to_layers(module_graph_map) return top_module
def torch_quantizer(quant_mode: int, module: torch.nn.Module, input_args: Union[torch.Tensor, Sequence[Any]], state_dict_file: Optional[str] = None, output_dir: str = "quantize_result", bitwidth_w: int = 8, bitwidth_a: int = 8) -> TORCHQuantizer: def _check_args(): nonlocal module if not isinstance(module, torch.nn.Module): raise TypeError(f"type of 'module' should be 'torch.nn.Module'.") if not isinstance(input_args, (tuple, list, torch.Tensor)): raise TypeError( f"type of input_args should be tuple/list/torch.Tensor.") device = None if isinstance(input_args, torch.Tensor): device = input_args.device else: for inp in input_args: if isinstance(inp, torch.Tensor): device = inp.device break if device: module = module.to(device) def _init_quant_env(): nonlocal quant_mode if NndctOption.nndct_quant_mode.value > 0: quant_mode = NndctOption.nndct_quant_mode.value if quant_mode == 1: NndctScreenLogger().info( f"Quantization calibration process start up...") elif quant_mode == 2: NndctScreenLogger().info(f"Quantization test process start up...") quantizer = TORCHQuantizer(quant_mode, output_dir, bitwidth_w, bitwidth_a) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, quant_mode) return quantizer, quant_mode # Check arguments type _check_args() # Transform torch module to quantized module format nndct_utils.create_work_dir(output_dir) # Create a quantizer object, which can control all quantization flow, quantizer, quant_mode = _init_quant_env() quant_module, graph = prepare_quantizable_module( module=module, input_args=input_args, export_folder=output_dir, state_dict_file=state_dict_file, quant_mode=quant_mode) # enable record outputs of per layer if quant_mode > 1: set_outputs_recorder_status(quant_module, True) # intialize quantizer quantizer.setup(graph) # hook module with quantizer connect_module_with_quantizer(quant_module, quantizer) quantizer.quant_model = quant_module return quantizer
def dump_nodes_output(self, nndct_graph: Graph, quant_configs: NndctQuantInfo, round_method: int, enable_dump_weight=True) -> NoReturn: def _dump_floating_model() -> NoReturn: for node in nndct_graph.nodes: if enable_dump_weight: for _, param_tensor in node.op.params.items(): self.dump_tensor_to_file(param_tensor.name, param_tensor.data, round_method=round_method) if len(node.out_tensors) > 1: raise RuntimeError( "Don't support multi-output op:'{} {}' for deploying!". format(node.name, node.op.type)) for tensor in node.out_tensors: self.dump_tensor_to_file(node.name, tensor.data, round_method=round_method) def _dump_fixed_model() -> NoReturn: for node in nndct_graph.nodes: if enable_dump_weight: for _, param_tensor in node.op.params.items(): if param_tensor.name in quant_configs['params']: bit_width, fix_point = quant_configs['params'][ param_tensor.name] self.dump_tensor_to_file(param_tensor.name + NNDCT_KEYS.FIX_OP_SUFFIX, param_tensor.data, bit_width, fix_point, round_method=round_method) if len(node.out_tensors) > 1: raise RuntimeError( "Don't support multi-output op:'{} {}' for deploying!". format(node.name, node.op.type)) if node.name in quant_configs['blobs']: for tensor in node.out_tensors: bit_width, fix_point = quant_configs['blobs'][ node.name] self.dump_tensor_to_file(node.name + NNDCT_KEYS.FIX_OP_SUFFIX, tensor.data, bit_width, fix_point, round_method=round_method) def _dump_graph_info() -> NoReturn: # dump tensor shape information file_name = os.path.join(self._full_folder, "shape.txt") with open(file_name, "w") as file_obj: for node in nndct_graph.nodes: if node.name in quant_configs['blobs']: for tensor in node.out_tensors: try: file_obj.write("{}: {}\n".format( tensor.data.shape, node.name)) except AttributeError as e: NndctScreenLogger().warning( f"{tensor.name} is not tensor.It's shape info is ignored." ) nndct_utils.create_work_dir(self._full_folder) if self._quant_off: _dump_floating_model() else: _dump_floating_model() _dump_fixed_model() _dump_graph_info()
def __init__(self, quant_mode: str, module: torch.nn.Module, input_args: Union[torch.Tensor, Sequence[Any]] = None, state_dict_file: Optional[str] = None, output_dir: str = "quantize_result", bitwidth_w: int = 8, bitwidth_a: int = 8, device: torch.device = torch.device("cuda"), lstm_app: bool = True, quant_config_file: Optional[str] = None): self._export_folder = output_dir # Check arguments type self._check_args(module) # Check device available if device.type == "cuda": if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ): device = torch.device("cpu") NndctScreenLogger().warning(f"CUDA is not available, change device to CPU") # Transform torch module to quantized module format nndct_utils.create_work_dir(output_dir) # turn off weights equalization and bias correction option_util.set_option_value("nndct_quant_opt", 0) option_util.set_option_value("nndct_param_corr", False) option_util.set_option_value("nndct_equalization", False) option_util.set_option_value("nndct_cv_app", False) # Parse the quant config file QConfiger = RNNTorchQConfig() #if quant_config_file: QConfiger.parse_config_file(quant_config_file, bit_width_w = bitwidth_w, bit_width_a = bitwidth_a) qconfig = QConfiger.qconfig #bitwidth_w = qconfig['weight']['bit_width'] #bitwidth_b = qconfig['bias']['bit_width'] #bitwidth_a = qconfig['activation']['bit_width'] #mix_bit = qconfig['mix_bit'] transformed_module = convert_lstm(module) script_module = torch.jit.script(transformed_module) quant_module, graph = prepare_quantizable_module( module=script_module, input_args=None, export_folder=output_dir, state_dict_file=state_dict_file, quant_mode=quant_mode, device=device) #qstrategy_factory = QstrategyFactory() #quant_strategy = qstrategy_factory.create_qstrategy(qconfig) #quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w, # bits_bias=bitwidth_w, # bits_activation=bitwidth_a) quantizer, qmode = self._init_quant_env(quant_mode, output_dir, qconfig, is_lstm=True) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_CONFIG, qconfig) quantizer.quant_model = quant_module.to(device) quantizer.setup(graph, rnn_front_end=True, lstm=True) self.quantizer = quantizer
def __init__(self, quant_mode: str, module: torch.nn.Module, input_args: Union[torch.Tensor, Sequence[Any]] = None, state_dict_file: Optional[str] = None, output_dir: str = "quantize_result", bitwidth_w: int = 8, bitwidth_a: int = 8, mix_bit: bool = False, device: torch.device = torch.device("cuda"), lstm_app: bool = False, custom_quant_ops: Optional[List[str]] = None, quant_config_file: Optional[str] = None): # Check arguments type self._check_args(module, input_args) # Check device available if device.type == "cuda": if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ): device = torch.device("cpu") NndctScreenLogger().warning( f"CUDA is not available, change device to CPU") # Transform torch module to quantized module format nndct_utils.create_work_dir(output_dir) # Parse the quant config file QConfiger = TorchQConfig() #if quant_config_file: QConfiger.parse_config_file(quant_config_file, bit_width_w=bitwidth_w, bit_width_a=bitwidth_a, mix_bit=mix_bit) qconfig = QConfiger.qconfig #bitwidth_w = qconfig['weights']['bit_width'] #bitwidth_b = qconfig['bias']['bit_width'] #bitwidth_a = qconfig['activation']['bit_width'] #mix_bit = qconfig['mix_bit'] # Create a quantizer object, which can control all quantization flow, #qstrategy_factory = QstrategyFactory() #quant_strategy = qstrategy_factory.create_qstrategy(qconfig) #quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w, # bits_bias=bitwidth_a, # bits_activation=bitwidth_a, # mix_bit=mix_bit) quantizer, qmode = self._init_quant_env(quant_mode, output_dir, qconfig) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_CONFIG, qconfig) if lstm_app: option_util.set_option_value("nndct_cv_app", False) else: option_util.set_option_value("nndct_cv_app", True) # Prepare quantizable module quant_module, graph = prepare_quantizable_module( module=module, input_args=input_args, export_folder=output_dir, state_dict_file=state_dict_file, quant_mode=qmode, device=device) # enable record outputs of per layer if qmode > 1: register_output_hook(quant_module, record_once=True) set_outputs_recorder_status(quant_module, True) # intialize quantizer quantizer.setup(graph, False, lstm_app, custom_quant_ops=custom_quant_ops) #if qmode > 1: # quantizer.features_check() # hook module with quantizer # connect_module_with_quantizer(quant_module, quantizer) quantizer.quant_model = quant_module self._example_inputs = input_args self._lstm_app = lstm_app self.quantizer = quantizer self.adaquant = None # dump blob dist if NndctOption.nndct_visualize.value is True: visualize_tensors(quantizer.quant_model)