def test_dump_tensor(self): model, dataloader = self.cv_session augment = ONNXRTAugment(ONNXModel(model), dataloader, [], self.augment_path, iterations=[0, 1], white_nodes=["conv"]) map_dumped_tensors = augment.dump_tensor() assert "conv" in map_dumped_tensors["activation"][0] assert "C" in map_dumped_tensors["activation"][0]["conv"] assert "conv" in map_dumped_tensors["activation"][1] assert "C" in map_dumped_tensors["activation"][1]["conv"] model, dataloader = self.cv_session augment = ONNXRTAugment(ONNXModel(model), dataloader, [], self.augment_path, iterations=[0], white_nodes=["conv", "relu"]) map_dumped_tensors = augment.dump_tensor(weight=True) assert "conv" in map_dumped_tensors["activation"][0] assert "relu" in map_dumped_tensors["activation"][0] assert "conv" in map_dumped_tensors["weight"] model, dataloader = self.nlp_session augment = ONNXRTAugment(ONNXModel(model), dataloader, [], self.augment_path, iterations=[0], white_nodes=["gather"]) map_dumped_tensors = augment.dump_tensor() assert "gather" in map_dumped_tensors["activation"][0]
def test_dump_calibration(self): model, dataloader = self.cv_session augment = ONNXRTAugment(ONNXModel(model), dataloader, ["Conv", "Relu"], self.augment_path, iterations=[0]) calib_params = augment.dump_calibration() assert "A" in calib_params and "B" in calib_params and "D" in calib_params and "C" in calib_params
def set_tensor(self, model, tensor_dict): from onnx import numpy_helper from lpot.model.onnx_model import ONNXModel from lpot.adaptor.ox_utils.util import quantize_data_with_scale_zo from lpot.adaptor.ox_utils.util import quantize_data_per_channel if not isinstance(model, ONNXModel): model = ONNXModel(model) assert "QuantizeLinear" in [node.op_type for node in model.model.graph.node], \ 'adaptor.set_tensor only accept int8 model' input_name_to_nodes = model.input_name_to_nodes for tensor_name, tensor_value in tensor_dict.items(): if not tensor_name.endswith('_quantized'): tensor_name += '_quantized' not_filter = False scale_tensor, zo_tensor = model.get_scale_zo(tensor_name) if scale_tensor is None or zo_tensor is None: not_filter = True else: scale_value = numpy_helper.to_array(scale_tensor) zo_value = numpy_helper.to_array(zo_tensor) assert len(input_name_to_nodes[tensor_name]) == 1, \ 'quantized filter weight should be input of only one node' node = input_name_to_nodes[tensor_name][0] #TBD only for conv bias node_name = node.name.replace('_quant', '') assert node_name in self.q_config q_type = self.q_config[node_name]['weight']['dtype'] if not_filter: new_tensor_value = self._requantize_bias( model, tensor_name, tensor_value) elif self.q_config[node_name]['weight'][ 'granularity'] == 'per_tensor': new_tensor_value = quantize_data_with_scale_zo( tensor_value, q_type, scale_value, zo_value) else: new_tensor_value = quantize_data_per_channel( tensor_value, q_type, scale_value, zo_value) model.set_initializer(tensor_name, new_tensor_value) return model
def _get_quantize_params(self, model, data_loader, q_config, iterations): from lpot.adaptor.ox_utils.onnxrt_mid import ONNXRTAugment from lpot.model.onnx_model import ONNXModel if not isinstance(model, ONNXModel): model = ONNXModel(model) black_nodes = [node for node in q_config if q_config[node] == 'fp32'] white_nodes = [node for node in q_config if q_config[node] != 'fp32'] augment = ONNXRTAugment(model, \ data_loader, self.quantizable_op_types, \ os.path.join(self.work_space, 'augmented_model.onnx'), \ black_nodes=black_nodes, white_nodes=white_nodes, \ iterations=list(range(0, q_config['calib_iteration']))) quantize_params = augment.dump_calibration() return quantize_params
def inspect_tensor(self, model, data_loader, op_list=[], iteration_list=[], inspect_type='activation', save_to_disk=False): '''The function is used by tune strategy class for dumping tensor info. ''' from lpot.adaptor.ox_utils.onnxrt_mid import ONNXRTAugment from lpot.model.onnx_model import ONNXModel if not isinstance(model, ONNXModel): model = ONNXModel(model) augment = ONNXRTAugment(model, data_loader, [], \ os.path.join(self.work_space, 'augment_for_inspect.onnx'), \ iterations=iteration_list, white_nodes=op_list) tensors = augment.dump_tensor(activation=(inspect_type != 'weight'), weight=(inspect_type != 'activation')) if save_to_disk: np.savez(tensors, os.path.join(self.work_space, 'dumped_tensors.npz')) return tensors
def dump_tensor(self, activation=True, weight=False): if "QuantizeLinear" in [ node.op_type for node in self.model.graph.node ]: self.augment_nodes = ["DequantizeLinear"] self.already_quantized = True activation_only = not weight self.augment_graph(activation_only=activation_only, output_only=True) _, output_dicts_list = self.get_intermediate_outputs() output_dicts = {} for output_dicts_iter in output_dicts_list: for output_name in output_dicts_iter: if output_name not in output_dicts: output_dicts[output_name] = [] output_dicts[output_name].append( output_dicts_iter[output_name]) iters = len(output_dicts_list) map_node_activation = [{} for _ in range(iters)] map_node_weight = {} self.white_nodes = [ node.replace('_quant', '') for node in self.white_nodes ] augmengted_wrapper = ONNXModel(self.augmented_model) map_output = augmengted_wrapper.output_name_to_node map_input = augmengted_wrapper.input_name_to_nodes model_output_names = [t.name for t in self.model.graph.output] model_initializer_names = [ t.name for t in self.model.graph.initializer ] for tensor_name, tensors in output_dicts.items(): if tensor_name.endswith('_scale') or tensor_name.endswith( '_zero_point'): continue # don't dump scale and zero_point if tensor_name in model_initializer_names: nodes = [node for node in map_input[tensor_name] \ if node.name.replace('_quant', '') in self.white_nodes] else: nodes = [map_output[tensor_name]] for node in nodes: node_name = node.name.replace('_quant', '') if tensor_name in model_output_names and node_name not in self.white_nodes: continue while node_name not in self.white_nodes: node = augmengted_wrapper.get_parents( node, output_name_to_node=map_output)[0] node_name = node.name.replace('_quant', '') if node_name not in map_node_weight: map_node_weight[node_name] = {} if tensor_name not in model_initializer_names: for i in range(iters): map_node_activation[i][node_name] = \ {tensor_name.replace('_quantized', ''): tensors[i]} else: map_node_weight[node_name].update({tensor_name.replace('_quantized', ''): \ tensors[0]}) dumped_tensors_map = {} if weight: dumped_tensors_map.update({"weight": map_node_weight}) if activation: dumped_tensors_map.update({"activation": map_node_activation}) return dumped_tensors_map
def test_quant_param_calculation(self): '''TEST_CONFIG_6''' # Relu # | \ # Conv \ # | \ # Relu | # | Conv # Conv / # \ / # | # Add input0 = helper.make_tensor_value_info('input0', TensorProto.FLOAT, [1, 3, 1, 3]) output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 3, 1, 3]) X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X1_weight') X1_bias = generate_input_initializer([3], np.float32, 'X1_bias') X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X3_weight') X3_bias = generate_input_initializer([3], np.float32, 'X3_bias') X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X5_weight') X5_bias = generate_input_initializer([3], np.float32, 'X5_bias') relu_node_1 = onnx.helper.make_node('Relu', ['input0'], ['X1'], name='Relu1') conv_node_1 = onnx.helper.make_node('Conv', ['X1', 'X1_weight', 'X1_bias'], ['X2'], name='Conv1') relu_node_2 = onnx.helper.make_node('Relu', ['X2'], ['X3'], name='Relu2') conv_node_2 = onnx.helper.make_node('Conv', ['X3', 'X3_weight', 'X3_bias'], ['X4'], name='Conv2') conv_node_3 = onnx.helper.make_node('Conv', ['X1', 'X5_weight', 'X5_bias'], ['X5'], name='Conv3') add_node = onnx.helper.make_node('Add', ['X4', 'X5'], ['output'], name='Add') graph = helper.make_graph([ relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3, add_node ], 'test_graph_5', [input0], [output]) graph.initializer.add().CopyFrom(X1_weight) graph.initializer.add().CopyFrom(X1_bias) graph.initializer.add().CopyFrom(X3_weight) graph.initializer.add().CopyFrom(X3_bias) graph.initializer.add().CopyFrom(X5_weight) graph.initializer.add().CopyFrom(X5_bias) model = helper.make_model(graph) data_reader = TestDataset() augmented_model_path = os.path.join(self.work_space, './augmented_test_model_5.onnx') augment = ONNXRTAugment(ONNXModel(model), data_reader, ['Conv', 'MatMul'], augmented_model_path) #test calculation of quantization params #TO_DO: check rmin/rmax quantization_params_dict = augment.dump_calibration() node_output_names, output_dicts_list = augment.get_intermediate_outputs( ) dict_for_quantization = augment._map_calibration( node_output_names, output_dicts_list) #check the size of the quantization dictionary self.assertEqual(len(quantization_params_dict), 11) #check the computation of zp and scale for key, value in quantization_params_dict.items(): self.assertTrue(value is not None) self.assertTrue(len(value) == 2) thresholds = dict_for_quantization[key] rmin = min(thresholds[0], 0) rmax = max(thresholds[1], 0) if key == 'X2': #next_node is Relu if rmin < 0: rmin = 0 scale_expected = np.float32((rmax - rmin) / 255 if rmin != rmax else 1) zp_expected = np.uint8( round(max(0, min(255, (0 - rmin) / scale_expected)))) zp_actual = value[0] scale_actual = value[1] self.assertEqual(zp_expected, zp_actual) self.assertEqual(scale_expected, scale_actual) print('Finished' + ' test calculation of quantization params.')
def test_augment_graph(self): ''' TEST_CONFIG_1''' # Conv # | # Clip # | # MatMul A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3]) E = helper.make_tensor_value_info('E', TensorProto.FLOAT, [1, 1, 5, 1]) F = helper.make_tensor_value_info('F', TensorProto.FLOAT, [1, 1, 5, 1]) conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], name='Conv', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) clip_node = onnx.helper.make_node('Clip', ['C'], ['D'], name='Clip') matmul_node = onnx.helper.make_node('MatMul', ['D', 'E'], ['F'], name='MatMul') graph = helper.make_graph([conv_node, clip_node, matmul_node], 'test_graph_1', [A, B, E], [F]) model = helper.make_model(graph) # Augmenting graph data_reader = None augmented_model_path = os.path.join(self.work_space, './augmented_test_model_1.onnx') augment = ONNXRTAugment(ONNXModel(model), data_reader, ['Conv', 'MatMul'], augmented_model_path) augment.augment_nodes = ["ReduceMin", "ReduceMax"] augment.augment_graph() augmented_model = augment.augmented_model onnx.save(augmented_model, augmented_model_path) # Checking if each added ReduceMin and ReduceMax node and its output exists augmented_model_node_names = [ node.name for node in augmented_model.graph.node ] augmented_model_outputs = [ output.name for output in augmented_model.graph.output ] added_node_names = ['A_ReduceMin', 'A_ReduceMax', 'B_ReduceMin', 'B_ReduceMax', 'C_ReduceMin', \ 'C_ReduceMax', 'D_ReduceMin', 'D_ReduceMax', 'F_ReduceMin', 'F_ReduceMax'] added_outputs = ['A_ReduceMin', 'A_ReduceMax', 'B_ReduceMin', 'B_ReduceMax', 'C_ReduceMin', \ 'C_ReduceMax', 'D_ReduceMin', 'D_ReduceMax', 'F_ReduceMin', 'F_ReduceMax'] # Original 3 nodes + added ReduceMin/Max nodes * 6 (exlude graph input/output) self.assertEqual(len(augmented_model_node_names), 15) # Original 1 graph output + added outputs * 6 self.assertEqual(len(augmented_model_outputs), 13) for name in added_node_names: self.assertTrue(name in augmented_model_node_names) for output in added_outputs: self.assertTrue(output in augmented_model_outputs) print('Finished TEST_CONFIG_1') '''TEST_CONFIG_2''' # Conv # | # Conv G = helper.make_tensor_value_info('G', TensorProto.FLOAT, [1, 1, 5, 5]) H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 1, 3, 3]) J = helper.make_tensor_value_info('J', TensorProto.FLOAT, [1, 1, 3, 3]) K = helper.make_tensor_value_info('K', TensorProto.FLOAT, [1, 1, 5, 5]) conv_node_1 = onnx.helper.make_node('Conv', ['G', 'H'], ['I'], name='Conv', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) conv_node_2 = onnx.helper.make_node('Conv', ['I', 'J'], ['K'], name='Conv', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) graph = helper.make_graph([conv_node_1, conv_node_2], 'test_graph_2', [G, H, J], [K]) model = helper.make_model(graph) # Augmenting graph data_reader = None augmented_model_path = os.path.join(self.work_space, './augmented_test_model_2.onnx') augment = ONNXRTAugment(ONNXModel(model), data_reader, ['Conv', 'MatMul'], augmented_model_path) augment.augment_nodes = ["ReduceMin", "ReduceMax"] augment.augment_graph() augmented_model = augment.augmented_model onnx.save(augmented_model, augmented_model_path) augmented_model_node_names = [ node.name for node in augmented_model.graph.node ] augmented_model_outputs = [ output.name for output in augmented_model.graph.output ] added_node_names = ['I_ReduceMin', 'I_ReduceMax', 'J_ReduceMin', 'J_ReduceMax', 'H_ReduceMin', 'H_ReduceMax', \ 'G_ReduceMin', 'G_ReduceMax', 'K_ReduceMin', 'K_ReduceMax'] added_outputs = ['I_ReduceMin', 'I_ReduceMax', 'J_ReduceMin', 'J_ReduceMax', 'H_ReduceMin', 'H_ReduceMax',\ 'G_ReduceMin', 'G_ReduceMax', 'K_ReduceMin', 'K_ReduceMax'] # Original 2 nodes + added ReduceMin/Max nodes * 4 self.assertEqual(len(augmented_model_node_names), 12) # Original 1 graph output + added outputs * 4 self.assertEqual(len(augmented_model_outputs), 11) for name in added_node_names: self.assertTrue(name in augmented_model_node_names) for output in added_outputs: self.assertTrue(output in augmented_model_outputs) print('Finished TEST_CONFIG_2') '''TEST_CONFIG_3''' # Relu # | # Conv \ # | | # Clip | # | / # MatMul L = helper.make_tensor_value_info('L', TensorProto.FLOAT, [1, 1, 5, 5]) N = helper.make_tensor_value_info('N', TensorProto.FLOAT, [1, 1, 3, 3]) Q = helper.make_tensor_value_info('Q', TensorProto.FLOAT, [1, 1, 5, 5]) relu_node = onnx.helper.make_node('Relu', ['L'], ['M'], name='Relu') conv_node = onnx.helper.make_node('Conv', ['M', 'N'], ['O'], name='Conv', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) clip_node = onnx.helper.make_node('Clip', ['O'], ['P'], name='Clip') matmul_node = onnx.helper.make_node('MatMul', ['P', 'M'], ['Q'], name='MatMul') graph = helper.make_graph( [relu_node, conv_node, clip_node, matmul_node], 'test_graph_3', [L, N], [Q]) model = helper.make_model(graph) # Augmenting graph data_reader = None augmented_model_path = os.path.join(self.work_space, './augmented_test_model_3.onnx') augment = ONNXRTAugment(ONNXModel(model), data_reader, ['Conv', 'MatMul'], augmented_model_path) augment.augment_nodes = ["ReduceMin", "ReduceMax"] augment.augment_graph() augmented_model = augment.augmented_model onnx.save(augmented_model, augmented_model_path) augmented_model_node_names = [ node.name for node in augmented_model.graph.node ] augmented_model_outputs = [ output.name for output in augmented_model.graph.output ] added_node_names = ['O_ReduceMin', 'O_ReduceMax', 'Q_ReduceMin', 'Q_ReduceMax', 'N_ReduceMin', \ 'N_ReduceMax', 'P_ReduceMin', 'P_ReduceMax', 'M_ReduceMin', 'M_ReduceMax'] added_outputs = ['O_ReduceMin', 'O_ReduceMax', 'Q_ReduceMin', 'Q_ReduceMax', 'N_ReduceMin', \ 'N_ReduceMax', 'P_ReduceMin', 'P_ReduceMax', 'M_ReduceMin', 'M_ReduceMax'] # Original 4 nodes + added ReduceMin/Max nodes * 8 self.assertEqual(len(augmented_model_node_names), 14) # Original 1 graph output + added outputs * 8 self.assertEqual(len(augmented_model_outputs), 11) for name in added_node_names: self.assertTrue(name in augmented_model_node_names) for output in added_outputs: self.assertTrue(output in augmented_model_outputs) print('Finished TEST_CONFIG_3') '''TEST_CONFIG_4''' # Attention # | # MatMul Attention_weight = helper.make_tensor_value_info( 'Attention_weight', TensorProto.FLOAT, [13, 7]) Attention_bias = helper.make_tensor_value_info('Attention_bias', TensorProto.FLOAT, [13, 7]) Attention_mask = helper.make_tensor_value_info('Attention_mask', TensorProto.INT32, [13, 7]) S = helper.make_tensor_value_info('S', TensorProto.FLOAT, [13, 7]) T = helper.make_tensor_value_info('T', TensorProto.FLOAT, [13, 7]) attention_node = onnx.helper.make_node( 'Attention', ['Attention_weight', 'Attention_bias', 'Attention_mask'], ['R'], name='Attention') matmul_node = onnx.helper.make_node('MatMul', ['R', 'S'], ['T'], name='MatMul') graph = helper.make_graph( [attention_node, matmul_node], 'test_graph_4', [Attention_weight, Attention_bias, Attention_mask, S], [T]) model = helper.make_model(graph) # Augmenting graph data_reader = None augmented_model_path = os.path.join(self.work_space, './augmented_test_model_4.onnx') augment = ONNXRTAugment(ONNXModel(model), data_reader, ['Conv', 'MatMul', 'Attention'], augmented_model_path) augment.augment_nodes = ["ReduceMin", "ReduceMax"] augment.augment_graph() augmented_model = augment.augmented_model onnx.save(augmented_model, augmented_model_path) augmented_model_node_names = [ node.name for node in augmented_model.graph.node ] augmented_model_outputs = [ output.name for output in augmented_model.graph.output ] added_node_names = ['Attention_bias_ReduceMin', 'Attention_bias_ReduceMax', 'Attention_weight_ReduceMin', \ 'Attention_weight_ReduceMax', 'S_ReduceMin', 'S_ReduceMax', 'R_ReduceMin', 'R_ReduceMax', 'T_ReduceMin', 'T_ReduceMax'] added_outputs = ['Attention_bias_ReduceMin', 'Attention_bias_ReduceMax', 'Attention_weight_ReduceMin', \ 'Attention_weight_ReduceMax', 'S_ReduceMin', 'S_ReduceMax', 'R_ReduceMin', 'R_ReduceMax', 'T_ReduceMin', 'T_ReduceMax'] # Original 2 nodes + added ReduceMin/Max nodes * 5 self.assertEqual(len(augmented_model_node_names), 12) # Original 1 graph output + added outputs * 5 self.assertEqual(len(augmented_model_outputs), 11) for name in added_node_names: self.assertTrue(name in augmented_model_node_names) for output in added_outputs: self.assertTrue(output in augmented_model_outputs) print('Finished TEST_CONFIG_4') # QAttention # | # QuantizeLinear Attention_weight = helper.make_tensor_value_info( 'weight_quantized', TensorProto.INT8, [13, 7]) weight_quantized = generate_input_initializer([13, 7], np.int8, 'weight_quantized') Attention_bias = helper.make_tensor_value_info('bias', TensorProto.FLOAT, [13, 7]) bias = generate_input_initializer([13, 7], np.float32, 'bias') Input_scale = helper.make_tensor_value_info('input_scale', TensorProto.FLOAT, [1]) input_scale = generate_input_initializer([1], np.float32, 'input_scale') Weight_scale = helper.make_tensor_value_info('weight_scale', TensorProto.FLOAT, [1]) weight_scale = generate_input_initializer([1], np.float32, 'weight_scale') Attention_mask = helper.make_tensor_value_info('mask', TensorProto.INT32, [13, 7]) mask = generate_input_initializer([13, 7], np.int32, 'mask') Input_zo = helper.make_tensor_value_info('input_zero_point', TensorProto.INT8, [1]) input_zero_point = generate_input_initializer([1], np.int8, 'input_zero_point') Weight_zo = helper.make_tensor_value_info('weight_zero_point', TensorProto.INT8, [1]) weight_zero_point = generate_input_initializer([1], np.int8, 'weight_zero_point') Q_scale = helper.make_tensor_value_info('attn_output_scale', TensorProto.FLOAT, [1]) attn_output_scale = generate_input_initializer([1], np.float32, 'attn_output_scale') Q_zo = helper.make_tensor_value_info('attn_output_zero_point', TensorProto.INT8, [1]) attn_output_zero_point = generate_input_initializer( [1], np.int8, 'attn_output_zero_point') Output = helper.make_tensor_value_info('output', TensorProto.INT8, [13, 7]) attention_node = onnx.helper.make_node('QAttention', [ 'weight_quantized', 'bias', 'input_scale', 'weight_scale', 'mask', 'input_zero_point', 'weight_zero_point' ], ['attn_output'], name='attention_quant') qlinear_node = onnx.helper.make_node( 'QuantizeLinear', ['attn_output', 'attn_output_scale', 'attn_output_zero_point'], ['attn_output_quantized'], name='attn_output_QuantizeLinear') graph = helper.make_graph( [attention_node, qlinear_node], 'test_graph_5', [ Attention_weight, Attention_bias, Input_scale, Weight_scale, Attention_mask, Input_zo, Weight_zo, Q_scale, Q_zo ], [Output]) graph.initializer.add().CopyFrom(weight_quantized) graph.initializer.add().CopyFrom(bias) graph.initializer.add().CopyFrom(input_scale) graph.initializer.add().CopyFrom(weight_scale) graph.initializer.add().CopyFrom(mask) graph.initializer.add().CopyFrom(input_zero_point) graph.initializer.add().CopyFrom(weight_zero_point) graph.initializer.add().CopyFrom(attn_output_scale) graph.initializer.add().CopyFrom(attn_output_zero_point) model = helper.make_model(graph) # Augmenting graph data_reader = None augmented_model_path = os.path.join(self.work_space, './augmented_test_model_5.onnx') augment = ONNXRTAugment(ONNXModel(model), data_reader, [], augmented_model_path, white_nodes=['attention']) augment.augment_nodes = ['DequantizeLinear'] augment.already_quantized = True augment.augment_graph(activation_only=True, output_only=True) augmented_model = augment.augmented_model onnx.save(augmented_model, augmented_model_path) augmented_model_node_names = [ node.name for node in augmented_model.graph.node ] augmented_model_outputs = [ output.name for output in augmented_model.graph.output ] added_outputs = ['attn_output'] self.assertEqual(len(augmented_model_node_names), 2) self.assertEqual(len(augmented_model_outputs), 2) for output in added_outputs: self.assertTrue(output in augmented_model_outputs) print('Finished TEST_CONFIG_5') # QuantizeLinear # | # QLinearConv # | # DequantizeLinear A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) A_scale = helper.make_tensor_value_info('A_scale', TensorProto.FLOAT, [1]) a_scale = generate_input_initializer([1], np.float32, 'A_scale') A_zo = helper.make_tensor_value_info('A_zero_point', TensorProto.INT8, [1]) a_zero_point = generate_input_initializer([1], np.int8, 'A_zero_point') B_scale = helper.make_tensor_value_info('B_scale', TensorProto.FLOAT, [1]) b_scale = generate_input_initializer([1], np.float32, 'B_scale') B_zo = helper.make_tensor_value_info('B_zero_point', TensorProto.INT8, [1]) b_zero_point = generate_input_initializer([1], np.int8, 'B_zero_point') C = helper.make_tensor_value_info('C', TensorProto.INT8, [1, 1, 5, 5]) c = generate_input_initializer([1, 1, 5, 5], np.int8, 'C') C_scale = helper.make_tensor_value_info('C_scale', TensorProto.FLOAT, [1]) c_scale = generate_input_initializer([1], np.float32, 'C_scale') C_zo = helper.make_tensor_value_info('C_zero_point', TensorProto.INT8, [1]) c_zero_point = generate_input_initializer([1], np.int8, 'C_zero_point') E = helper.make_tensor_value_info('E', TensorProto.INT32, [1]) e = generate_input_initializer([1], np.int32, 'E') D_scale = helper.make_tensor_value_info('D_scale', TensorProto.FLOAT, [1]) d_scale = generate_input_initializer([1], np.float32, 'D_scale') D_zo = helper.make_tensor_value_info('D_zero_point', TensorProto.INT8, [1]) d_zero_point = generate_input_initializer([1], np.int8, 'D_zero_point') D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) quantize_node = onnx.helper.make_node('QuantizeLinear', ['A', 'A_scale', 'A_zero_point'], ['B'], name='A_QuantizeLinear') conv_node = onnx.helper.make_node('QLinearConv', [ 'B', 'B_scale', 'B_zero_point', 'C', 'C_scale', 'C_zero_point', 'D_scale', 'D_zero_point', 'E' ], ['D_quantized'], name='conv_quant', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) dequantize_node = onnx.helper.make_node( 'DequantizeLinear', ['D_quantized', 'D_scale', 'D_zero_point'], ['D'], name='D_DequantizeLinear') graph = helper.make_graph( [quantize_node, conv_node, dequantize_node], 'test_graph_5', [A, A_scale, A_zo, C, C_scale, C_zo, E, D_scale, D_zo], [D]) graph.initializer.add().CopyFrom(a_scale) graph.initializer.add().CopyFrom(a_zero_point) graph.initializer.add().CopyFrom(b_scale) graph.initializer.add().CopyFrom(b_zero_point) graph.initializer.add().CopyFrom(c) graph.initializer.add().CopyFrom(c_scale) graph.initializer.add().CopyFrom(c_zero_point) graph.initializer.add().CopyFrom(e) graph.initializer.add().CopyFrom(d_scale) graph.initializer.add().CopyFrom(d_zero_point) model = helper.make_model(graph) # Augmenting graph data_reader = None augmented_model_path = os.path.join(self.work_space, './augmented_test_model_6.onnx') augment = ONNXRTAugment(ONNXModel(model), data_reader, [], augmented_model_path, white_nodes=['conv']) augment.augment_nodes = ["DequantizeLinear"] augment.already_quantized = True augment.augment_graph(activation_only=True, output_only=True) augmented_model = augment.augmented_model onnx.save(augmented_model, augmented_model_path) augmented_model_node_names = [ node.name for node in augmented_model.graph.node ] augmented_model_outputs = [ output.name for output in augmented_model.graph.output ] added_node_names = ['D_quantized_DequantizeLinear'] added_outputs = ['D_quantized_output'] self.assertEqual(len(augmented_model_node_names), 4) self.assertEqual(len(augmented_model_outputs), 2) for name in added_node_names: self.assertTrue(name in augmented_model_node_names) for output in added_outputs: self.assertTrue(output in augmented_model_outputs)
def setUp(self): # Relu # | \ # Conv \ # | \ # Relu | # | Conv # Conv / # \ / # | # Add input0 = helper.make_tensor_value_info('input0', TensorProto.FLOAT, [1, 3, 1, 3]) output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 3, 1, 3]) X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X1_weight') X1_bias = generate_input_initializer([3], np.float32, 'X1_bias') X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X3_weight') X3_bias = generate_input_initializer([3], np.float32, 'X3_bias') X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X5_weight') X5_bias = generate_input_initializer([3], np.float32, 'X5_bias') relu_node_1 = onnx.helper.make_node('Relu', ['input0'], ['X1'], name='Relu1') conv_node_1 = onnx.helper.make_node('Conv', ['X1', 'X1_weight', 'X1_bias'], ['X2'], name='Conv1') relu_node_2 = onnx.helper.make_node('Relu', ['X2'], ['X3'], name='Relu2') conv_node_2 = onnx.helper.make_node('Conv', ['X3', 'X3_weight', 'X3_bias'], ['X4'], name='Conv2') conv_node_3 = onnx.helper.make_node('Conv', ['X1', 'X5_weight', 'X5_bias'], ['X5'], name='Conv3') add_node = onnx.helper.make_node('Add', ['X4', 'X5'], ['output'], name='Add') graph = helper.make_graph([ relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3, add_node ], 'test_graph_6', [input0], [output]) graph.initializer.add().CopyFrom(X1_weight) graph.initializer.add().CopyFrom(X1_bias) graph.initializer.add().CopyFrom(X3_weight) graph.initializer.add().CopyFrom(X3_bias) graph.initializer.add().CopyFrom(X5_weight) graph.initializer.add().CopyFrom(X5_bias) model = helper.make_model(graph) test_model_path = './test_model_6.onnx' onnx.save(model, test_model_path) model = onnx.load(test_model_path) self.model = ONNXModel(model) # QuantizeLinear # | # QLinearConv # | # DequantizeLinear A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) A_scale = helper.make_tensor_value_info('A_scale', TensorProto.FLOAT, [1]) a_scale = generate_input_initializer([1], np.float32, 'A_scale') A_zo = helper.make_tensor_value_info('A_zero_point', TensorProto.INT8, [1]) a_zero_point = generate_input_initializer([1], np.int8, 'A_zero_point') B_scale = helper.make_tensor_value_info('B_scale', TensorProto.FLOAT, [1]) b_scale = generate_input_initializer([1], np.float32, 'B_scale') B_zo = helper.make_tensor_value_info('B_zero_point', TensorProto.INT8, [1]) b_zero_point = generate_input_initializer([1], np.int8, 'B_zero_point') C = helper.make_tensor_value_info('C', TensorProto.INT8, [1, 1, 5, 5]) c = generate_input_initializer([1, 1, 5, 5], np.int8, 'C') C_scale = helper.make_tensor_value_info('C_scale', TensorProto.FLOAT, [1]) c_scale = generate_input_initializer([1], np.float32, 'C_scale') C_zo = helper.make_tensor_value_info('C_zero_point', TensorProto.INT8, [1]) c_zero_point = generate_input_initializer([1], np.int8, 'C_zero_point') E = helper.make_tensor_value_info('E', TensorProto.INT32, [1]) e = generate_input_initializer([1], np.int32, 'E') D_scale = helper.make_tensor_value_info('D_scale', TensorProto.FLOAT, [1]) d_scale = generate_input_initializer([1], np.float32, 'D_scale') D_zo = helper.make_tensor_value_info('D_zero_point', TensorProto.INT8, [1]) d_zero_point = generate_input_initializer([1], np.int8, 'D_zero_point') D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) quantize_node = onnx.helper.make_node('QuantizeLinear', ['A', 'A_scale', 'A_zero_point'], ['B_quantized'], name='A_QuantizeLinear') conv_node = onnx.helper.make_node('QLinearConv', [ 'B_quantized', 'B_scale', 'B_zero_point', 'C_quantized', 'C_scale', 'C_zero_point', 'D_scale', 'D_zero_point', 'E' ], ['D_quantized'], name='conv_quant', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) dequantize_node = onnx.helper.make_node( 'DequantizeLinear', ['D_quantized', 'D_scale', 'D_zero_point'], ['D'], name='D_DequantizeLinear') graph = helper.make_graph( [quantize_node, conv_node, dequantize_node], 'test_graph_7', [A, A_scale, A_zo, C, C_scale, C_zo, E, D_scale, D_zo], [D]) graph.initializer.add().CopyFrom(a_scale) graph.initializer.add().CopyFrom(a_zero_point) graph.initializer.add().CopyFrom(b_scale) graph.initializer.add().CopyFrom(b_zero_point) graph.initializer.add().CopyFrom(c) graph.initializer.add().CopyFrom(c_scale) graph.initializer.add().CopyFrom(c_zero_point) graph.initializer.add().CopyFrom(e) graph.initializer.add().CopyFrom(d_scale) graph.initializer.add().CopyFrom(d_zero_point) model = helper.make_model(graph) self.q_model = ONNXModel(model)
class TestOnnxModel(unittest.TestCase): def setUp(self): # Relu # | \ # Conv \ # | \ # Relu | # | Conv # Conv / # \ / # | # Add input0 = helper.make_tensor_value_info('input0', TensorProto.FLOAT, [1, 3, 1, 3]) output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 3, 1, 3]) X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X1_weight') X1_bias = generate_input_initializer([3], np.float32, 'X1_bias') X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X3_weight') X3_bias = generate_input_initializer([3], np.float32, 'X3_bias') X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X5_weight') X5_bias = generate_input_initializer([3], np.float32, 'X5_bias') relu_node_1 = onnx.helper.make_node('Relu', ['input0'], ['X1'], name='Relu1') conv_node_1 = onnx.helper.make_node('Conv', ['X1', 'X1_weight', 'X1_bias'], ['X2'], name='Conv1') relu_node_2 = onnx.helper.make_node('Relu', ['X2'], ['X3'], name='Relu2') conv_node_2 = onnx.helper.make_node('Conv', ['X3', 'X3_weight', 'X3_bias'], ['X4'], name='Conv2') conv_node_3 = onnx.helper.make_node('Conv', ['X1', 'X5_weight', 'X5_bias'], ['X5'], name='Conv3') add_node = onnx.helper.make_node('Add', ['X4', 'X5'], ['output'], name='Add') graph = helper.make_graph([ relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3, add_node ], 'test_graph_6', [input0], [output]) graph.initializer.add().CopyFrom(X1_weight) graph.initializer.add().CopyFrom(X1_bias) graph.initializer.add().CopyFrom(X3_weight) graph.initializer.add().CopyFrom(X3_bias) graph.initializer.add().CopyFrom(X5_weight) graph.initializer.add().CopyFrom(X5_bias) model = helper.make_model(graph) test_model_path = './test_model_6.onnx' onnx.save(model, test_model_path) model = onnx.load(test_model_path) self.model = ONNXModel(model) # QuantizeLinear # | # QLinearConv # | # DequantizeLinear A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) A_scale = helper.make_tensor_value_info('A_scale', TensorProto.FLOAT, [1]) a_scale = generate_input_initializer([1], np.float32, 'A_scale') A_zo = helper.make_tensor_value_info('A_zero_point', TensorProto.INT8, [1]) a_zero_point = generate_input_initializer([1], np.int8, 'A_zero_point') B_scale = helper.make_tensor_value_info('B_scale', TensorProto.FLOAT, [1]) b_scale = generate_input_initializer([1], np.float32, 'B_scale') B_zo = helper.make_tensor_value_info('B_zero_point', TensorProto.INT8, [1]) b_zero_point = generate_input_initializer([1], np.int8, 'B_zero_point') C = helper.make_tensor_value_info('C', TensorProto.INT8, [1, 1, 5, 5]) c = generate_input_initializer([1, 1, 5, 5], np.int8, 'C') C_scale = helper.make_tensor_value_info('C_scale', TensorProto.FLOAT, [1]) c_scale = generate_input_initializer([1], np.float32, 'C_scale') C_zo = helper.make_tensor_value_info('C_zero_point', TensorProto.INT8, [1]) c_zero_point = generate_input_initializer([1], np.int8, 'C_zero_point') E = helper.make_tensor_value_info('E', TensorProto.INT32, [1]) e = generate_input_initializer([1], np.int32, 'E') D_scale = helper.make_tensor_value_info('D_scale', TensorProto.FLOAT, [1]) d_scale = generate_input_initializer([1], np.float32, 'D_scale') D_zo = helper.make_tensor_value_info('D_zero_point', TensorProto.INT8, [1]) d_zero_point = generate_input_initializer([1], np.int8, 'D_zero_point') D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) quantize_node = onnx.helper.make_node('QuantizeLinear', ['A', 'A_scale', 'A_zero_point'], ['B_quantized'], name='A_QuantizeLinear') conv_node = onnx.helper.make_node('QLinearConv', [ 'B_quantized', 'B_scale', 'B_zero_point', 'C_quantized', 'C_scale', 'C_zero_point', 'D_scale', 'D_zero_point', 'E' ], ['D_quantized'], name='conv_quant', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) dequantize_node = onnx.helper.make_node( 'DequantizeLinear', ['D_quantized', 'D_scale', 'D_zero_point'], ['D'], name='D_DequantizeLinear') graph = helper.make_graph( [quantize_node, conv_node, dequantize_node], 'test_graph_7', [A, A_scale, A_zo, C, C_scale, C_zo, E, D_scale, D_zo], [D]) graph.initializer.add().CopyFrom(a_scale) graph.initializer.add().CopyFrom(a_zero_point) graph.initializer.add().CopyFrom(b_scale) graph.initializer.add().CopyFrom(b_zero_point) graph.initializer.add().CopyFrom(c) graph.initializer.add().CopyFrom(c_scale) graph.initializer.add().CopyFrom(c_zero_point) graph.initializer.add().CopyFrom(e) graph.initializer.add().CopyFrom(d_scale) graph.initializer.add().CopyFrom(d_zero_point) model = helper.make_model(graph) self.q_model = ONNXModel(model) def test_nodes(self): self.assertEqual(len(self.model.nodes()), 6) nodes_name = [node.name for node in self.model.nodes()] nodes = ["Relu1", "Conv1", "Relu2", "Conv2", "Conv3", "Add"] for node in nodes: self.assertTrue(node in nodes_name) def test_initializer(self): self.assertEqual(len(self.model.initializer()), 6) inits_name = [init.name for init in self.model.initializer()] inits = [ 'X1_weight', 'X1_bias', 'X3_weight', 'X3_bias', 'X5_weight', 'X5_bias' ] for init in inits: self.assertTrue(init in inits_name) def test_remove_node(self): for node in self.model.nodes(): if node.op_type == "Add": self.model.remove_node(node) self.assertEqual(len(self.model.nodes()), 5) nodes_name = [node.name for node in self.model.nodes()] nodes = ["Relu1", "Conv1", "Relu2", "Conv2", "Conv3"] for node in nodes: self.assertTrue(node in nodes_name) def test_remove_nodes(self): nodes_to_remove = [] for node in self.model.nodes(): if node.name == "Conv3" or node.name == "Add": nodes_to_remove.append(node) self.model.remove_nodes(nodes_to_remove) self.assertEqual(len(self.model.nodes()), 4) nodes_name = [node.name for node in self.model.nodes()] nodes = ["Relu1", "Conv1", "Relu2", "Conv2"] for node in nodes: self.assertTrue(node in nodes_name) def test_add_node(self): node_to_add = onnx.helper.make_node('Relu', ['output'], ['output1'], keepdims=0) self.model.add_node(node_to_add) last_node = self.model.nodes()[-1] self.assertEqual(last_node.op_type, 'Relu') def test_add_nodes(self): nodes_to_add = [] for i in range(2): node_to_add = onnx.helper.make_node( 'Relu', ["add_node{}_input".format(str(i))], ["add_node{}_output".format(str(i))], keepdims=0) nodes_to_add.append(node_to_add) self.model.add_nodes(nodes_to_add) self.assertEqual(self.model.nodes()[-1].input, ['add_node1_input']) self.assertEqual(self.model.nodes()[-2].input, ['add_node0_input']) self.assertEqual(self.model.nodes()[-1].output, ['add_node1_output']) self.assertEqual(self.model.nodes()[-2].output, ['add_node0_output']) def test_get_initializer(self): inits = [ 'X1_weight', 'X1_bias', 'X3_weight', 'X3_bias', 'X5_weight', 'X5_bias' ] for init in inits: self.assertIsNotNone(self.model.get_initializer(init)) def test_remove_initializer(self): for init in self.model.initializer(): if init.name == "X1_weight": self.model.remove_initializer(init) self.assertEqual(len(self.model.initializer()), 5) inits_name = [init.name for init in self.model.initializer()] inits = ['X1_bias', 'X3_weight', 'X3_bias', 'X5_weight', 'X5_bias'] for init in inits: self.assertTrue(init in inits_name) def test_remove_initializers(self): init_to_remove = [] for init in self.model.initializer(): if "bias" in init.name: init_to_remove.append(init) self.model.remove_initializers(init_to_remove) self.assertEqual(len(self.model.initializer()), 3) inits_name = [init.name for init in self.model.initializer()] inits = ['X1_weight', 'X3_weight', 'X5_weight'] for init in inits: self.assertTrue(init in inits_name) def test_input_name_to_nodes(self): self.assertEqual(len(self.model.input_name_to_nodes), 12) ipts_name = [name for name in self.model.input_name_to_nodes] ipts = [ 'input0', 'X1', 'X2', 'X3', 'X3_weight', 'X3_bias', 'X5_weight', 'X5_bias', 'X4', 'X5' ] for ipt in ipts: self.assertTrue(ipt in ipts_name) def test_output_name_to_node(self): self.assertEqual(len(self.model.output_name_to_node), 6) opts_name = [name for name in self.model.output_name_to_node] opts = ['X1', 'X2', 'X3', 'X4', 'X5', 'output'] for opt in opts: self.assertTrue(opt in opts_name) def test_get_children(self): for node in self.model.nodes(): if node.name == "Relu1": children = self.model.get_children(node) self.assertEqual(len(children), 2) children_name = [child.name for child in children] names = ["Conv1", "Conv3"] for name in names: self.assertTrue(name in children_name) def test_get_parents(self): for node in self.model.nodes(): if node.op_type == "Add": parents = self.model.get_parents(node) self.assertEqual(len(parents), 2) parents_name = [parent.name for parent in parents] names = ["Conv2", "Conv3"] for name in names: self.assertTrue(name in parents_name) def test_get_parent(self): for node in self.model.nodes(): if node.op_type == "Add": node_to_get_parent = node parent = self.model.get_parent(node, 0) self.assertEqual(parent.name, "Conv2") parent = self.model.get_parent(node, 1) self.assertEqual(parent.name, "Conv3") parent = self.model.get_parent(node, 2) self.assertIsNone(parent) def test_find_nodes_by_initializer(self): for init in self.model.initializer(): if init.name == "X1_weight": initializer = init nodes = self.model.find_nodes_by_initializer(self.model.graph(), initializer) self.assertEqual(len(nodes), 1) self.assertEqual(nodes[0].name, "Conv1") def test_get_scale_zo(self): input_scale, input_zo = self.q_model.get_scale_zo('B_quantized') weight_scale, weight_zo = self.q_model.get_scale_zo('C_quantized') bias_scale, bias_zo = self.q_model.get_scale_zo('E') def test_save(self): self.model.save_model_to_file('./test_model_6.onnx', use_external_data_format=True)
def _replace_gemm_with_matmul(self, model): new_nodes = [] from onnx import numpy_helper from lpot.model.onnx_model import ONNXModel if not isinstance(model, ONNXModel): model = ONNXModel(model) for node in model.nodes(): if node.op_type == 'Gemm': alpha = 1.0 beta = 1.0 transA = 0 transB = 0 for attr in node.attribute: if attr.name == 'alpha': alpha = onnx.helper.get_attribute_value(attr) elif attr.name == 'beta': beta = onnx.helper.get_attribute_value(attr) elif attr.name == 'transA': transA = onnx.helper.get_attribute_value(attr) elif attr.name == 'transB': transB = onnx.helper.get_attribute_value(attr) if alpha == 1.0 and beta == 1.0 and transA == 0: inputB = node.input[1] if transB == 1: B = model.get_initializer(node.input[1]) if B: # assume B is not used by any other node B_array = numpy_helper.to_array(B) B_trans = numpy_helper.from_array(B_array.T) B_trans.name = B.name model.remove_initializer(B) model.add_initializer(B_trans) #TBD this is for onnx model zoo, which are all in old IR version if model.model.ir_version < 4: for input in model.model.graph.input: if input.name == B_trans.name: for i, dim in enumerate( input.type.tensor_type.shape. dim): dim.dim_value = B_array.T.shape[i] else: inputB += '_Transposed' transpose_node = onnx.helper.make_node( 'Transpose', inputs=[node.input[1]], outputs=[inputB], name=node.name + '_Transpose') new_nodes.append(transpose_node) matmul_node = onnx.helper.make_node( 'MatMul', inputs=[node.input[0], inputB], outputs=[ node.output[0] + ('_MatMul' if len(node.input) > 2 else '') ], name=node.name + '_MatMul') new_nodes.append(matmul_node) if len(node.input) > 2: add_node = onnx.helper.make_node( 'Add', inputs=[node.output[0] + '_MatMul', node.input[2]], outputs=node.output, name=node.name + '_Add') new_nodes.append(add_node) # unsupported else: new_nodes.append(node) # not GEMM else: new_nodes.append(node) model.graph().ClearField('node') model.graph().node.extend(new_nodes) return model