def _Convert(cls, ctx: Graph, node: Node, opset: int, **kwargs): formula = node.attrs["quantization_formula"] if formula == "cambricon": raise ValueError("invalid quantization formula: " + formula) dequant_node = ctx.InsertNewNodeOnOutput( "DequantizeLinear", node.output_tensor_names[0], name=id_util.UniqueStr(node.name), ) if opset < 13: scale_node: Node = node.input_nodes[1] scale_np: np.ndarray = scale_node.get_tensor_value(as_list=False) if scale_np.size != 1: raise RuntimeError( "per-channel mode is not supported in version 10") else: node.attrs["axis"] = 0 dequant_node.attrs["axis"] = 0 dequant_node.input_tensor_names = [ node.output_tensor_names[0], node.input_tensor_names[1], node.input_tensor_names[2], ] ctx.set_dtype( dequant_node.output_tensor_names[0], ctx.get_dtype(node.input_tensor_names[0]), ) ctx.CopyShape(node.output_tensor_names[0], dequant_node.output_tensor_names[0])
def _Convert(cls, ctx: Graph, node: Node, opset: int, **kwargs): bit = node.attrs["quantization_bit"] scheme = node.attrs["quantization_scheme"] per_layer = node.attrs["per_layer_quantization"] formula = node.attrs["quantization_formula"] if not per_layer and opset == 10: raise NotImplementedError( "per-channel mode is not supported in version 10") input_node: Node = node.input_nodes[0] input_np: np.ndarray = input_node.get_tensor_value(as_list=False) input_np = (input_np.flatten() if formula == "cambricon" or per_layer else input_np.reshape((input_np.shape[0], -1))) def get_min_or_max_value(get_min: bool, pre_func: Optional[Callable] = None): data = input_np.copy() func = np.min if get_min else np.max if pre_func is not None: data = pre_func(data) result = func( data, axis=-1 if formula == "cambricon" or per_layer else 1) return result.flatten() input_np_abs_max = get_min_or_max_value(False, np.abs) if formula == "google": if scheme == "symmetric": denominator = 2.0**(bit - 1) - 1 scale = input_np_abs_max / denominator zero_point = np.array([0] * scale.shape[0], dtype=np.int8) elif scheme == "affine": input_np_min = get_min_or_max_value(True) denominator = 2.0**bit - 1 scale = (get_min_or_max_value(False) - input_np_min) / denominator zero_point = (-np.round(input_np_min / scale)).astype(np.uint8) else: raise ValueError("invalid quantization scheme: " + scheme) elif formula == "cambricon": scale = np.floor(np.log2(input_np_abs_max)) - (bit - 2) zero_point = np.array([0], dtype=np.int8) else: raise ValueError("invalid quantization formula: " + formula) ctx.RemoveNode(node.name) ctx.MakeConst(node.output_tensor_names[0], scale) ctx.MakeConst(node.output_tensor_names[1], zero_point)
def ProcessFlowGraph( flow_graph, model_save_dir, continue_on_error=False, opset=None, extra_opset=None, shape_override=None, ): opset = util.FindOpset(opset) logger.info("Using opset <onnx, %s>", opset) if opset > schemas.get_max_supported_opset_version(): logger.warning( "Currently installed onnx package %s is too low to support opset %s, " "please upgrade onnx package to avoid potential conversion issue.", util.get_onnx_version(), opset, ) if shape_override is None: shape_override = {} (onnx_nodes, op_cnt, attr_cnt, dtypes, output_shapes,) = FlowToOnnxNaive( flow_graph, shape_override ) g = Graph(onnx_nodes, model_save_dir, output_shapes, dtypes, opset, extra_opset,) # create ops mapping for the desired opsets ops_mapping = handler.flow_op.CreateMapping(g.opset, g.extra_opset) # some nodes may already copied into inner Graph, so remove them from main Graph. TopologicalSort(g, continue_on_error) mapped_op, unmapped_op, exceptions = FlowOnnxMapping(g, ops_mapping) if unmapped_op: logger.error("Unsupported ops: %s", unmapped_op) if exceptions and not continue_on_error: raise exceptions[0] # onnx requires topological sorting TopologicalSort(g, continue_on_error) g.UpdateProto() logger.debug( "Summay Stats:\n" "\toneflow ops: {}\n" "\toneflow attr: {}\n" "\tonnx mapped: {}\n" "\tonnx unmapped: {}".format(op_cnt, attr_cnt, mapped_op, unmapped_op) ) return g
def Version_10(cls, ctx: Graph, node: Node, **kwargs): bit = node.attrs["quantization_bit"] scheme = node.attrs["quantization_scheme"] formula = node.attrs["quantization_formula"] moving_max_node: Node = node.input_nodes[2] moving_max_np: np.ndarray = moving_max_node.get_tensor_value( as_list=False) moving_min_node: Node = node.input_nodes[3] moving_min_np: np.ndarray = moving_min_node.get_tensor_value( as_list=False) _zero = np.array([0], dtype=np.int8) if formula == "google": if scheme == "symmetric": denominator = 2.0**(bit - 1) - 1 scale = moving_max_np / denominator zero_point = _zero elif scheme == "affine": denominator = 2.0**bit - 1 scale = (moving_max_np - moving_min_np) / denominator zero_point = ((-np.round(moving_min_np / scale)).astype( np.uint8).flatten()) else: raise ValueError("invalid quantization scheme: " + scheme) elif formula == "cambricon": scale = np.floor(np.log2(moving_max_np)) - (bit - 2) zero_point = _zero else: raise ValueError("invalid quantization formula: " + formula) ctx.RemoveNode(node.name) ctx.MakeConst(node.output_tensor_names[0], scale.flatten()) ctx.MakeConst(node.output_tensor_names[1], zero_point)