def _quantize(self, xgraph, stop=None, subgraphs_only=True): # (str, boolean) -> None """ Start MSE quantization """ # Graph pass to construct new graph with quantization layers graph_pass = XGraphPassAddMSEQuantLayers( bitwidth=self.bitwidth, mse_opt_num=self.mse_opt_num, subgraphs_only=subgraphs_only, output_png='tvm_mse_quant.png' if logger.getEffectiveLevel() <= 10 else None, name=xgraph.get_name()) xgraph = graph_pass.execute(xgraph=xgraph) self.quant_xgraph = xgraph self.runtime = pyxir.build(self.quant_xgraph, target='cpu') # Run graph to set Variable layer thresholds in graph fancy_logger.banner("EXECUTE QUANTIZATION GRAPH") inpts = self.inputs_func(0) out, params = self.runtime.optimize(inpts) logger.info("Done executing graph") # logger.info(out.shape, out) # logger.info(thresholds) logger.info("Retrieving quantization parameters...") self._retrieve_quant_params(params, xgraph, subgraphs_only)
def _run_network_cpu(xgraph, inputs, batch_size=1): """ Optimize graph with using basic optimizations and run on a CPU """ # Optimize graph optimizer = XGraphBasicOptimizer(xgraph) opt_xgraph = optimizer.optimize() # Build for CPU execution rt_mod = pyxir.build(opt_xgraph, target='cpu') # Execute res = pyxir.run(rt_mod, inputs, [], batch_size=batch_size) return res
def __init__(self, xgraph, inputs_func, work_dir=os.path.join(os.getcwd()), quant_iter=1): # super(XGraphBaseSubgraphQuantizer, self).__init__(xgraph) self.subgraph_Xps = XGraphBaseSubgraphQuantizer.xgraph_partitioner\ .get_subgraphs(self.xgraph) # Maps external (graph) to internal (subgraph) inputs for each subgraph self.subgraph_input_map = {} self.subgraph_inputs = {} self.subgraph_input_names = [] for Xp in self.subgraph_Xps: sub_xgraph = XGraphBaseSubgraphQuantizer.xgraph_factory.\ build_from_xlayer(Xp.subgraph_data, name=Xp.name) self.subgraph_input_map[Xp.name] = {} input_names = sub_xgraph.get_input_names() for b, in_name in zip(Xp.bottoms, input_names): self.subgraph_input_names.append(b) self.subgraph_inputs[b] = None self.subgraph_input_map[Xp.name][b] = in_name # Setup executable graph self.runtime = pyxir.build(self.xgraph, target='cpu', last_layers=self.subgraph_input_names) self.inputs_func = inputs_func self.work_dir = work_dir os.makedirs(self.work_dir, exist_ok=True) self.quant_iter = quant_iter
def __init__(self, xgraph, inputs_func, bitwidth=8, work_dir=os.path.join(os.getcwd(), 'work'), quant_iter=1): # super(XGraphDefaultQuantizer, self).__init__(xgraph) # Setup executable graph self.runtime = pyxir.build(self.xgraph, target='cpu') self.inputs_func = inputs_func self.work_dir = work_dir self._bitwidth = bitwidth self._quant_param = QuantParamFactory() self._quant_layers = {} self.XFDNN_OP_2_QUANT_FUNC = { 'Input': InputQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Output': DefaultQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Constant': SkipQuant(self._quant_param, self._quant_layers, self._bitwidth), # BASIC NN OPS 'Dense': DefaultQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Softmax': DefaultQuant(self._quant_param, self._quant_layers, self._bitwidth), 'ReLU': DefaultQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Tanh': DefaultQuant(self._quant_param, self._quant_layers, self._bitwidth), # MATH 'Scale': ScaleQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Eltwise': EltwiseQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Concat': ConcatQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Mean': DefaultQuant(self._quant_param, self._quant_layers, self._bitwidth), 'BatchNorm': BatchNormQuant(self._quant_param, self._quant_layers, self._bitwidth), # CONVOLUTION 'Convolution': ConvQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Conv2DTranspose': ConvQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Pooling': PoolQuant(self._quant_param, self._quant_layers, self._bitwidth), # OTHER 'Reshape': DefaultQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Squeeze': DefaultQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Flatten': DefaultQuant(self._quant_param, self._quant_layers, self._bitwidth), 'Transpose': DefaultQuant(self._quant_param, self._quant_layers, self._bitwidth), }
bn_handling=BatchNormHandling.MERGE_AND_QUANTIZE) # Finetune the model # . . . # Export to ONNX onnx_filename = 'dpuv2_resnet18.onnx' export_dpuv2_onnx(model, input_shape=IN_SIZE, input_t=inp, export_path=onnx_filename) # Load ONNX into PyXIR onnx_model = onnx.load(onnx_filename) xgraph = from_onnx(onnx_model) xgraph = pyxir.partition(xgraph, [target]) xgraph = pyxir.optimize(xgraph, target) work_dir = os.path.join(file_dir, f'{target}_quant_trained_resnet18_workdir') inputs = np.random.randn(*IN_SIZE) def inputs_func(iter): return {'inp.1': inputs} xgraph = pyxir.quantize(xgraph, target, inputs_func, work_dir=work_dir) pyxir.build(xgraph, target, work_dir=work_dir, build_dir=work_dir, runtime='cpu-np')