def _Convert(cls, ctx: Graph, node: Node, opset: int, **kwargs): bit = node.attrs["quantization_bit"] scheme = node.attrs["quantization_scheme"] per_layer = node.attrs["per_layer_quantization"] formula = node.attrs["quantization_formula"] if not per_layer and opset == 10: raise NotImplementedError( "per-channel mode is not supported in version 10") input_node: Node = node.input_nodes[0] input_np: np.ndarray = input_node.get_tensor_value(as_list=False) input_np = (input_np.flatten() if formula == "cambricon" or per_layer else input_np.reshape((input_np.shape[0], -1))) def get_min_or_max_value(get_min: bool, pre_func: Optional[Callable] = None): data = input_np.copy() func = np.min if get_min else np.max if pre_func is not None: data = pre_func(data) result = func( data, axis=-1 if formula == "cambricon" or per_layer else 1) return result.flatten() input_np_abs_max = get_min_or_max_value(False, np.abs) if formula == "google": if scheme == "symmetric": denominator = 2.0**(bit - 1) - 1 scale = input_np_abs_max / denominator zero_point = np.array([0] * scale.shape[0], dtype=np.int8) elif scheme == "affine": input_np_min = get_min_or_max_value(True) denominator = 2.0**bit - 1 scale = (get_min_or_max_value(False) - input_np_min) / denominator zero_point = (-np.round(input_np_min / scale)).astype(np.uint8) else: raise ValueError("invalid quantization scheme: " + scheme) elif formula == "cambricon": scale = np.floor(np.log2(input_np_abs_max)) - (bit - 2) zero_point = np.array([0], dtype=np.int8) else: raise ValueError("invalid quantization formula: " + formula) ctx.RemoveNode(node.name) ctx.MakeConst(node.output_tensor_names[0], scale) ctx.MakeConst(node.output_tensor_names[1], zero_point)
def Version_10(cls, ctx: Graph, node: Node, **kwargs): bit = node.attrs["quantization_bit"] scheme = node.attrs["quantization_scheme"] formula = node.attrs["quantization_formula"] moving_max_node: Node = node.input_nodes[2] moving_max_np: np.ndarray = moving_max_node.get_tensor_value( as_list=False) moving_min_node: Node = node.input_nodes[3] moving_min_np: np.ndarray = moving_min_node.get_tensor_value( as_list=False) _zero = np.array([0], dtype=np.int8) if formula == "google": if scheme == "symmetric": denominator = 2.0**(bit - 1) - 1 scale = moving_max_np / denominator zero_point = _zero elif scheme == "affine": denominator = 2.0**bit - 1 scale = (moving_max_np - moving_min_np) / denominator zero_point = ((-np.round(moving_min_np / scale)).astype( np.uint8).flatten()) else: raise ValueError("invalid quantization scheme: " + scheme) elif formula == "cambricon": scale = np.floor(np.log2(moving_max_np)) - (bit - 2) zero_point = _zero else: raise ValueError("invalid quantization formula: " + formula) ctx.RemoveNode(node.name) ctx.MakeConst(node.output_tensor_names[0], scale.flatten()) ctx.MakeConst(node.output_tensor_names[1], zero_point)