Esempio n. 1
0
    def test_dump_tensor(self):
        model, dataloader = self.cv_session
        augment = ONNXRTAugment(ONNXModel(model),
                                dataloader, [],
                                self.augment_path,
                                iterations=[0, 1],
                                white_nodes=["conv"])
        map_dumped_tensors = augment.dump_tensor()
        assert "conv" in map_dumped_tensors["activation"][0]
        assert "C" in map_dumped_tensors["activation"][0]["conv"]
        assert "conv" in map_dumped_tensors["activation"][1]
        assert "C" in map_dumped_tensors["activation"][1]["conv"]

        model, dataloader = self.cv_session
        augment = ONNXRTAugment(ONNXModel(model),
                                dataloader, [],
                                self.augment_path,
                                iterations=[0],
                                white_nodes=["conv", "relu"])
        map_dumped_tensors = augment.dump_tensor(weight=True)
        assert "conv" in map_dumped_tensors["activation"][0]
        assert "relu" in map_dumped_tensors["activation"][0]
        assert "conv" in map_dumped_tensors["weight"]

        model, dataloader = self.nlp_session
        augment = ONNXRTAugment(ONNXModel(model),
                                dataloader, [],
                                self.augment_path,
                                iterations=[0],
                                white_nodes=["gather"])
        map_dumped_tensors = augment.dump_tensor()
        assert "gather" in map_dumped_tensors["activation"][0]
Esempio n. 2
0
 def test_dump_calibration(self):
     model, dataloader = self.cv_session
     augment = ONNXRTAugment(ONNXModel(model),
                             dataloader, ["Conv", "Relu"],
                             self.augment_path,
                             iterations=[0])
     calib_params = augment.dump_calibration()
     assert "A" in calib_params and "B" in calib_params and "D" in calib_params and "C" in calib_params
Esempio n. 3
0
 def set_tensor(self, model, tensor_dict):
     from onnx import numpy_helper
     from lpot.model.onnx_model import ONNXModel
     from lpot.adaptor.ox_utils.util import quantize_data_with_scale_zo
     from lpot.adaptor.ox_utils.util import quantize_data_per_channel
     if not isinstance(model, ONNXModel):
         model = ONNXModel(model)
     assert "QuantizeLinear" in [node.op_type for node in model.model.graph.node], \
                                        'adaptor.set_tensor only accept int8 model'
     input_name_to_nodes = model.input_name_to_nodes
     for tensor_name, tensor_value in tensor_dict.items():
         if not tensor_name.endswith('_quantized'):
             tensor_name += '_quantized'
         not_filter = False
         scale_tensor, zo_tensor = model.get_scale_zo(tensor_name)
         if scale_tensor is None or zo_tensor is None:
             not_filter = True
         else:
             scale_value = numpy_helper.to_array(scale_tensor)
             zo_value = numpy_helper.to_array(zo_tensor)
         assert len(input_name_to_nodes[tensor_name]) == 1, \
                 'quantized filter weight should be input of only one node'
         node = input_name_to_nodes[tensor_name][0]  #TBD only for conv bias
         node_name = node.name.replace('_quant', '')
         assert node_name in self.q_config
         q_type = self.q_config[node_name]['weight']['dtype']
         if not_filter:
             new_tensor_value = self._requantize_bias(
                 model, tensor_name, tensor_value)
         elif self.q_config[node_name]['weight'][
                 'granularity'] == 'per_tensor':
             new_tensor_value = quantize_data_with_scale_zo(
                 tensor_value, q_type, scale_value, zo_value)
         else:
             new_tensor_value = quantize_data_per_channel(
                 tensor_value, q_type, scale_value, zo_value)
         model.set_initializer(tensor_name, new_tensor_value)
     return model
Esempio n. 4
0
 def _get_quantize_params(self, model, data_loader, q_config, iterations):
     from lpot.adaptor.ox_utils.onnxrt_mid import ONNXRTAugment
     from lpot.model.onnx_model import ONNXModel
     if not isinstance(model, ONNXModel):
         model = ONNXModel(model)
     black_nodes = [node for node in q_config if q_config[node] == 'fp32']
     white_nodes = [node for node in q_config if q_config[node] != 'fp32']
     augment = ONNXRTAugment(model, \
               data_loader, self.quantizable_op_types, \
               os.path.join(self.work_space, 'augmented_model.onnx'), \
               black_nodes=black_nodes, white_nodes=white_nodes, \
               iterations=list(range(0, q_config['calib_iteration'])))
     quantize_params = augment.dump_calibration()
     return quantize_params
Esempio n. 5
0
 def inspect_tensor(self,
                    model,
                    data_loader,
                    op_list=[],
                    iteration_list=[],
                    inspect_type='activation',
                    save_to_disk=False):
     '''The function is used by tune strategy class for dumping tensor info.
     '''
     from lpot.adaptor.ox_utils.onnxrt_mid import ONNXRTAugment
     from lpot.model.onnx_model import ONNXModel
     if not isinstance(model, ONNXModel):
         model = ONNXModel(model)
     augment = ONNXRTAugment(model, data_loader, [], \
               os.path.join(self.work_space, 'augment_for_inspect.onnx'), \
               iterations=iteration_list,
               white_nodes=op_list)
     tensors = augment.dump_tensor(activation=(inspect_type != 'weight'),
                                   weight=(inspect_type != 'activation'))
     if save_to_disk:
         np.savez(tensors,
                  os.path.join(self.work_space, 'dumped_tensors.npz'))
     return tensors
Esempio n. 6
0
 def dump_tensor(self, activation=True, weight=False):
     if "QuantizeLinear" in [
             node.op_type for node in self.model.graph.node
     ]:
         self.augment_nodes = ["DequantizeLinear"]
         self.already_quantized = True
     activation_only = not weight
     self.augment_graph(activation_only=activation_only, output_only=True)
     _, output_dicts_list = self.get_intermediate_outputs()
     output_dicts = {}
     for output_dicts_iter in output_dicts_list:
         for output_name in output_dicts_iter:
             if output_name not in output_dicts:
                 output_dicts[output_name] = []
             output_dicts[output_name].append(
                 output_dicts_iter[output_name])
     iters = len(output_dicts_list)
     map_node_activation = [{} for _ in range(iters)]
     map_node_weight = {}
     self.white_nodes = [
         node.replace('_quant', '') for node in self.white_nodes
     ]
     augmengted_wrapper = ONNXModel(self.augmented_model)
     map_output = augmengted_wrapper.output_name_to_node
     map_input = augmengted_wrapper.input_name_to_nodes
     model_output_names = [t.name for t in self.model.graph.output]
     model_initializer_names = [
         t.name for t in self.model.graph.initializer
     ]
     for tensor_name, tensors in output_dicts.items():
         if tensor_name.endswith('_scale') or tensor_name.endswith(
                 '_zero_point'):
             continue  # don't dump scale and zero_point
         if tensor_name in model_initializer_names:
             nodes = [node for node in map_input[tensor_name] \
                                    if node.name.replace('_quant', '') in self.white_nodes]
         else:
             nodes = [map_output[tensor_name]]
         for node in nodes:
             node_name = node.name.replace('_quant', '')
             if tensor_name in model_output_names and node_name not in self.white_nodes:
                 continue
             while node_name not in self.white_nodes:
                 node = augmengted_wrapper.get_parents(
                     node, output_name_to_node=map_output)[0]
                 node_name = node.name.replace('_quant', '')
             if node_name not in map_node_weight:
                 map_node_weight[node_name] = {}
             if tensor_name not in model_initializer_names:
                 for i in range(iters):
                     map_node_activation[i][node_name] = \
                                            {tensor_name.replace('_quantized', ''): tensors[i]}
             else:
                 map_node_weight[node_name].update({tensor_name.replace('_quantized', ''): \
                                                                                   tensors[0]})
     dumped_tensors_map = {}
     if weight:
         dumped_tensors_map.update({"weight": map_node_weight})
     if activation:
         dumped_tensors_map.update({"activation": map_node_activation})
     return dumped_tensors_map
Esempio n. 7
0
    def test_quant_param_calculation(self):
        '''TEST_CONFIG_6'''

        #   Relu
        #    |      \
        #   Conv     \
        #    |        \
        #   Relu       |
        #    |       Conv
        #   Conv      /
        #      \     /
        #         |
        #        Add

        input0 = helper.make_tensor_value_info('input0', TensorProto.FLOAT,
                                               [1, 3, 1, 3])
        output = helper.make_tensor_value_info('output', TensorProto.FLOAT,
                                               [1, 3, 1, 3])

        X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32,
                                               'X1_weight')
        X1_bias = generate_input_initializer([3], np.float32, 'X1_bias')
        X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32,
                                               'X3_weight')
        X3_bias = generate_input_initializer([3], np.float32, 'X3_bias')
        X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32,
                                               'X5_weight')
        X5_bias = generate_input_initializer([3], np.float32, 'X5_bias')

        relu_node_1 = onnx.helper.make_node('Relu', ['input0'], ['X1'],
                                            name='Relu1')
        conv_node_1 = onnx.helper.make_node('Conv',
                                            ['X1', 'X1_weight', 'X1_bias'],
                                            ['X2'],
                                            name='Conv1')
        relu_node_2 = onnx.helper.make_node('Relu', ['X2'], ['X3'],
                                            name='Relu2')
        conv_node_2 = onnx.helper.make_node('Conv',
                                            ['X3', 'X3_weight', 'X3_bias'],
                                            ['X4'],
                                            name='Conv2')
        conv_node_3 = onnx.helper.make_node('Conv',
                                            ['X1', 'X5_weight', 'X5_bias'],
                                            ['X5'],
                                            name='Conv3')
        add_node = onnx.helper.make_node('Add', ['X4', 'X5'], ['output'],
                                         name='Add')

        graph = helper.make_graph([
            relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3,
            add_node
        ], 'test_graph_5', [input0], [output])
        graph.initializer.add().CopyFrom(X1_weight)
        graph.initializer.add().CopyFrom(X1_bias)
        graph.initializer.add().CopyFrom(X3_weight)
        graph.initializer.add().CopyFrom(X3_bias)
        graph.initializer.add().CopyFrom(X5_weight)
        graph.initializer.add().CopyFrom(X5_bias)

        model = helper.make_model(graph)
        data_reader = TestDataset()
        augmented_model_path = os.path.join(self.work_space,
                                            './augmented_test_model_5.onnx')
        augment = ONNXRTAugment(ONNXModel(model), data_reader,
                                ['Conv', 'MatMul'], augmented_model_path)

        #test calculation of quantization params
        #TO_DO: check rmin/rmax
        quantization_params_dict = augment.dump_calibration()
        node_output_names, output_dicts_list = augment.get_intermediate_outputs(
        )
        dict_for_quantization = augment._map_calibration(
            node_output_names, output_dicts_list)
        #check the size of the quantization dictionary
        self.assertEqual(len(quantization_params_dict), 11)

        #check the computation of zp and scale
        for key, value in quantization_params_dict.items():

            self.assertTrue(value is not None)
            self.assertTrue(len(value) == 2)

            thresholds = dict_for_quantization[key]
            rmin = min(thresholds[0], 0)
            rmax = max(thresholds[1], 0)
            if key == 'X2':  #next_node is Relu
                if rmin < 0: rmin = 0

            scale_expected = np.float32((rmax - rmin) /
                                        255 if rmin != rmax else 1)
            zp_expected = np.uint8(
                round(max(0, min(255, (0 - rmin) / scale_expected))))
            zp_actual = value[0]
            scale_actual = value[1]

            self.assertEqual(zp_expected, zp_actual)
            self.assertEqual(scale_expected, scale_actual)

        print('Finished' + ' test calculation of quantization params.')
Esempio n. 8
0
    def test_augment_graph(self):
        ''' TEST_CONFIG_1'''

        #     Conv
        #      |
        #     Clip
        #      |
        #     MatMul

        A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5])
        B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3])
        E = helper.make_tensor_value_info('E', TensorProto.FLOAT, [1, 1, 5, 1])
        F = helper.make_tensor_value_info('F', TensorProto.FLOAT, [1, 1, 5, 1])
        conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'],
                                          name='Conv',
                                          kernel_shape=[3, 3],
                                          pads=[1, 1, 1, 1])
        clip_node = onnx.helper.make_node('Clip', ['C'], ['D'], name='Clip')
        matmul_node = onnx.helper.make_node('MatMul', ['D', 'E'], ['F'],
                                            name='MatMul')
        graph = helper.make_graph([conv_node, clip_node, matmul_node],
                                  'test_graph_1', [A, B, E], [F])
        model = helper.make_model(graph)

        # Augmenting graph
        data_reader = None
        augmented_model_path = os.path.join(self.work_space,
                                            './augmented_test_model_1.onnx')
        augment = ONNXRTAugment(ONNXModel(model), data_reader,
                                ['Conv', 'MatMul'], augmented_model_path)
        augment.augment_nodes = ["ReduceMin", "ReduceMax"]
        augment.augment_graph()
        augmented_model = augment.augmented_model
        onnx.save(augmented_model, augmented_model_path)

        # Checking if each added ReduceMin and ReduceMax node and its output exists
        augmented_model_node_names = [
            node.name for node in augmented_model.graph.node
        ]
        augmented_model_outputs = [
            output.name for output in augmented_model.graph.output
        ]
        added_node_names = ['A_ReduceMin', 'A_ReduceMax', 'B_ReduceMin', 'B_ReduceMax', 'C_ReduceMin', \
            'C_ReduceMax', 'D_ReduceMin', 'D_ReduceMax', 'F_ReduceMin', 'F_ReduceMax']
        added_outputs = ['A_ReduceMin', 'A_ReduceMax', 'B_ReduceMin', 'B_ReduceMax', 'C_ReduceMin', \
            'C_ReduceMax', 'D_ReduceMin', 'D_ReduceMax', 'F_ReduceMin', 'F_ReduceMax']
        # Original 3 nodes + added ReduceMin/Max nodes * 6 (exlude graph input/output)
        self.assertEqual(len(augmented_model_node_names), 15)
        # Original 1 graph output + added outputs * 6
        self.assertEqual(len(augmented_model_outputs), 13)
        for name in added_node_names:
            self.assertTrue(name in augmented_model_node_names)
        for output in added_outputs:
            self.assertTrue(output in augmented_model_outputs)

        print('Finished TEST_CONFIG_1')
        '''TEST_CONFIG_2'''

        #   Conv
        #    |
        #   Conv

        G = helper.make_tensor_value_info('G', TensorProto.FLOAT, [1, 1, 5, 5])
        H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 1, 3, 3])
        J = helper.make_tensor_value_info('J', TensorProto.FLOAT, [1, 1, 3, 3])
        K = helper.make_tensor_value_info('K', TensorProto.FLOAT, [1, 1, 5, 5])
        conv_node_1 = onnx.helper.make_node('Conv', ['G', 'H'], ['I'],
                                            name='Conv',
                                            kernel_shape=[3, 3],
                                            pads=[1, 1, 1, 1])
        conv_node_2 = onnx.helper.make_node('Conv', ['I', 'J'], ['K'],
                                            name='Conv',
                                            kernel_shape=[3, 3],
                                            pads=[1, 1, 1, 1])
        graph = helper.make_graph([conv_node_1, conv_node_2], 'test_graph_2',
                                  [G, H, J], [K])
        model = helper.make_model(graph)

        # Augmenting graph
        data_reader = None
        augmented_model_path = os.path.join(self.work_space,
                                            './augmented_test_model_2.onnx')
        augment = ONNXRTAugment(ONNXModel(model), data_reader,
                                ['Conv', 'MatMul'], augmented_model_path)
        augment.augment_nodes = ["ReduceMin", "ReduceMax"]
        augment.augment_graph()
        augmented_model = augment.augmented_model
        onnx.save(augmented_model, augmented_model_path)

        augmented_model_node_names = [
            node.name for node in augmented_model.graph.node
        ]
        augmented_model_outputs = [
            output.name for output in augmented_model.graph.output
        ]
        added_node_names = ['I_ReduceMin', 'I_ReduceMax', 'J_ReduceMin', 'J_ReduceMax', 'H_ReduceMin', 'H_ReduceMax', \
            'G_ReduceMin', 'G_ReduceMax', 'K_ReduceMin', 'K_ReduceMax']
        added_outputs = ['I_ReduceMin', 'I_ReduceMax', 'J_ReduceMin', 'J_ReduceMax', 'H_ReduceMin', 'H_ReduceMax',\
            'G_ReduceMin', 'G_ReduceMax', 'K_ReduceMin', 'K_ReduceMax']
        # Original 2 nodes + added ReduceMin/Max nodes * 4
        self.assertEqual(len(augmented_model_node_names), 12)
        # Original 1 graph output + added outputs * 4
        self.assertEqual(len(augmented_model_outputs), 11)
        for name in added_node_names:
            self.assertTrue(name in augmented_model_node_names)
        for output in added_outputs:
            self.assertTrue(output in augmented_model_outputs)

        print('Finished TEST_CONFIG_2')
        '''TEST_CONFIG_3'''

        #   Relu
        #    |
        #   Conv  \
        #    |     |
        #   Clip   |
        #    |    /
        #   MatMul

        L = helper.make_tensor_value_info('L', TensorProto.FLOAT, [1, 1, 5, 5])
        N = helper.make_tensor_value_info('N', TensorProto.FLOAT, [1, 1, 3, 3])
        Q = helper.make_tensor_value_info('Q', TensorProto.FLOAT, [1, 1, 5, 5])
        relu_node = onnx.helper.make_node('Relu', ['L'], ['M'], name='Relu')
        conv_node = onnx.helper.make_node('Conv', ['M', 'N'], ['O'],
                                          name='Conv',
                                          kernel_shape=[3, 3],
                                          pads=[1, 1, 1, 1])
        clip_node = onnx.helper.make_node('Clip', ['O'], ['P'], name='Clip')
        matmul_node = onnx.helper.make_node('MatMul', ['P', 'M'], ['Q'],
                                            name='MatMul')
        graph = helper.make_graph(
            [relu_node, conv_node, clip_node, matmul_node], 'test_graph_3',
            [L, N], [Q])
        model = helper.make_model(graph)

        # Augmenting graph
        data_reader = None
        augmented_model_path = os.path.join(self.work_space,
                                            './augmented_test_model_3.onnx')
        augment = ONNXRTAugment(ONNXModel(model), data_reader,
                                ['Conv', 'MatMul'], augmented_model_path)
        augment.augment_nodes = ["ReduceMin", "ReduceMax"]
        augment.augment_graph()
        augmented_model = augment.augmented_model
        onnx.save(augmented_model, augmented_model_path)

        augmented_model_node_names = [
            node.name for node in augmented_model.graph.node
        ]
        augmented_model_outputs = [
            output.name for output in augmented_model.graph.output
        ]
        added_node_names = ['O_ReduceMin', 'O_ReduceMax', 'Q_ReduceMin', 'Q_ReduceMax', 'N_ReduceMin', \
            'N_ReduceMax', 'P_ReduceMin', 'P_ReduceMax', 'M_ReduceMin', 'M_ReduceMax']
        added_outputs =  ['O_ReduceMin', 'O_ReduceMax', 'Q_ReduceMin', 'Q_ReduceMax', 'N_ReduceMin', \
            'N_ReduceMax', 'P_ReduceMin', 'P_ReduceMax', 'M_ReduceMin', 'M_ReduceMax']
        # Original 4 nodes + added ReduceMin/Max nodes * 8
        self.assertEqual(len(augmented_model_node_names), 14)
        # Original 1 graph output + added outputs * 8
        self.assertEqual(len(augmented_model_outputs), 11)
        for name in added_node_names:
            self.assertTrue(name in augmented_model_node_names)
        for output in added_outputs:
            self.assertTrue(output in augmented_model_outputs)

        print('Finished TEST_CONFIG_3')
        '''TEST_CONFIG_4'''

        #   Attention
        #    |
        #   MatMul

        Attention_weight = helper.make_tensor_value_info(
            'Attention_weight', TensorProto.FLOAT, [13, 7])
        Attention_bias = helper.make_tensor_value_info('Attention_bias',
                                                       TensorProto.FLOAT,
                                                       [13, 7])
        Attention_mask = helper.make_tensor_value_info('Attention_mask',
                                                       TensorProto.INT32,
                                                       [13, 7])
        S = helper.make_tensor_value_info('S', TensorProto.FLOAT, [13, 7])
        T = helper.make_tensor_value_info('T', TensorProto.FLOAT, [13, 7])
        attention_node = onnx.helper.make_node(
            'Attention',
            ['Attention_weight', 'Attention_bias', 'Attention_mask'], ['R'],
            name='Attention')
        matmul_node = onnx.helper.make_node('MatMul', ['R', 'S'], ['T'],
                                            name='MatMul')
        graph = helper.make_graph(
            [attention_node, matmul_node], 'test_graph_4',
            [Attention_weight, Attention_bias, Attention_mask, S], [T])
        model = helper.make_model(graph)

        # Augmenting graph
        data_reader = None
        augmented_model_path = os.path.join(self.work_space,
                                            './augmented_test_model_4.onnx')
        augment = ONNXRTAugment(ONNXModel(model), data_reader,
                                ['Conv', 'MatMul', 'Attention'],
                                augmented_model_path)
        augment.augment_nodes = ["ReduceMin", "ReduceMax"]
        augment.augment_graph()
        augmented_model = augment.augmented_model
        onnx.save(augmented_model, augmented_model_path)

        augmented_model_node_names = [
            node.name for node in augmented_model.graph.node
        ]
        augmented_model_outputs = [
            output.name for output in augmented_model.graph.output
        ]
        added_node_names = ['Attention_bias_ReduceMin', 'Attention_bias_ReduceMax', 'Attention_weight_ReduceMin', \
            'Attention_weight_ReduceMax', 'S_ReduceMin', 'S_ReduceMax', 'R_ReduceMin', 'R_ReduceMax', 'T_ReduceMin', 'T_ReduceMax']
        added_outputs = ['Attention_bias_ReduceMin', 'Attention_bias_ReduceMax', 'Attention_weight_ReduceMin', \
            'Attention_weight_ReduceMax', 'S_ReduceMin', 'S_ReduceMax', 'R_ReduceMin', 'R_ReduceMax', 'T_ReduceMin', 'T_ReduceMax']
        # Original 2 nodes + added ReduceMin/Max nodes * 5
        self.assertEqual(len(augmented_model_node_names), 12)
        # Original 1 graph output + added outputs * 5
        self.assertEqual(len(augmented_model_outputs), 11)
        for name in added_node_names:
            self.assertTrue(name in augmented_model_node_names)
        for output in added_outputs:
            self.assertTrue(output in augmented_model_outputs)

        print('Finished TEST_CONFIG_4')

        #    QAttention
        #        |
        #    QuantizeLinear

        Attention_weight = helper.make_tensor_value_info(
            'weight_quantized', TensorProto.INT8, [13, 7])
        weight_quantized = generate_input_initializer([13, 7], np.int8,
                                                      'weight_quantized')
        Attention_bias = helper.make_tensor_value_info('bias',
                                                       TensorProto.FLOAT,
                                                       [13, 7])
        bias = generate_input_initializer([13, 7], np.float32, 'bias')
        Input_scale = helper.make_tensor_value_info('input_scale',
                                                    TensorProto.FLOAT, [1])
        input_scale = generate_input_initializer([1], np.float32,
                                                 'input_scale')
        Weight_scale = helper.make_tensor_value_info('weight_scale',
                                                     TensorProto.FLOAT, [1])
        weight_scale = generate_input_initializer([1], np.float32,
                                                  'weight_scale')
        Attention_mask = helper.make_tensor_value_info('mask',
                                                       TensorProto.INT32,
                                                       [13, 7])
        mask = generate_input_initializer([13, 7], np.int32, 'mask')
        Input_zo = helper.make_tensor_value_info('input_zero_point',
                                                 TensorProto.INT8, [1])
        input_zero_point = generate_input_initializer([1], np.int8,
                                                      'input_zero_point')
        Weight_zo = helper.make_tensor_value_info('weight_zero_point',
                                                  TensorProto.INT8, [1])
        weight_zero_point = generate_input_initializer([1], np.int8,
                                                       'weight_zero_point')
        Q_scale = helper.make_tensor_value_info('attn_output_scale',
                                                TensorProto.FLOAT, [1])
        attn_output_scale = generate_input_initializer([1], np.float32,
                                                       'attn_output_scale')
        Q_zo = helper.make_tensor_value_info('attn_output_zero_point',
                                             TensorProto.INT8, [1])
        attn_output_zero_point = generate_input_initializer(
            [1], np.int8, 'attn_output_zero_point')
        Output = helper.make_tensor_value_info('output', TensorProto.INT8,
                                               [13, 7])
        attention_node = onnx.helper.make_node('QAttention', [
            'weight_quantized', 'bias', 'input_scale', 'weight_scale', 'mask',
            'input_zero_point', 'weight_zero_point'
        ], ['attn_output'],
                                               name='attention_quant')
        qlinear_node = onnx.helper.make_node(
            'QuantizeLinear',
            ['attn_output', 'attn_output_scale', 'attn_output_zero_point'],
            ['attn_output_quantized'],
            name='attn_output_QuantizeLinear')
        graph = helper.make_graph(
            [attention_node, qlinear_node], 'test_graph_5', [
                Attention_weight, Attention_bias, Input_scale, Weight_scale,
                Attention_mask, Input_zo, Weight_zo, Q_scale, Q_zo
            ], [Output])
        graph.initializer.add().CopyFrom(weight_quantized)
        graph.initializer.add().CopyFrom(bias)
        graph.initializer.add().CopyFrom(input_scale)
        graph.initializer.add().CopyFrom(weight_scale)
        graph.initializer.add().CopyFrom(mask)
        graph.initializer.add().CopyFrom(input_zero_point)
        graph.initializer.add().CopyFrom(weight_zero_point)
        graph.initializer.add().CopyFrom(attn_output_scale)
        graph.initializer.add().CopyFrom(attn_output_zero_point)
        model = helper.make_model(graph)

        # Augmenting graph
        data_reader = None
        augmented_model_path = os.path.join(self.work_space,
                                            './augmented_test_model_5.onnx')
        augment = ONNXRTAugment(ONNXModel(model),
                                data_reader, [],
                                augmented_model_path,
                                white_nodes=['attention'])
        augment.augment_nodes = ['DequantizeLinear']
        augment.already_quantized = True
        augment.augment_graph(activation_only=True, output_only=True)
        augmented_model = augment.augmented_model
        onnx.save(augmented_model, augmented_model_path)

        augmented_model_node_names = [
            node.name for node in augmented_model.graph.node
        ]
        augmented_model_outputs = [
            output.name for output in augmented_model.graph.output
        ]
        added_outputs = ['attn_output']
        self.assertEqual(len(augmented_model_node_names), 2)
        self.assertEqual(len(augmented_model_outputs), 2)
        for output in added_outputs:
            self.assertTrue(output in augmented_model_outputs)

        print('Finished TEST_CONFIG_5')

        #    QuantizeLinear
        #        |
        #    QLinearConv
        #        |
        #    DequantizeLinear
        A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5])
        A_scale = helper.make_tensor_value_info('A_scale', TensorProto.FLOAT,
                                                [1])
        a_scale = generate_input_initializer([1], np.float32, 'A_scale')
        A_zo = helper.make_tensor_value_info('A_zero_point', TensorProto.INT8,
                                             [1])
        a_zero_point = generate_input_initializer([1], np.int8, 'A_zero_point')
        B_scale = helper.make_tensor_value_info('B_scale', TensorProto.FLOAT,
                                                [1])
        b_scale = generate_input_initializer([1], np.float32, 'B_scale')
        B_zo = helper.make_tensor_value_info('B_zero_point', TensorProto.INT8,
                                             [1])
        b_zero_point = generate_input_initializer([1], np.int8, 'B_zero_point')
        C = helper.make_tensor_value_info('C', TensorProto.INT8, [1, 1, 5, 5])
        c = generate_input_initializer([1, 1, 5, 5], np.int8, 'C')
        C_scale = helper.make_tensor_value_info('C_scale', TensorProto.FLOAT,
                                                [1])
        c_scale = generate_input_initializer([1], np.float32, 'C_scale')
        C_zo = helper.make_tensor_value_info('C_zero_point', TensorProto.INT8,
                                             [1])
        c_zero_point = generate_input_initializer([1], np.int8, 'C_zero_point')
        E = helper.make_tensor_value_info('E', TensorProto.INT32, [1])
        e = generate_input_initializer([1], np.int32, 'E')
        D_scale = helper.make_tensor_value_info('D_scale', TensorProto.FLOAT,
                                                [1])
        d_scale = generate_input_initializer([1], np.float32, 'D_scale')
        D_zo = helper.make_tensor_value_info('D_zero_point', TensorProto.INT8,
                                             [1])
        d_zero_point = generate_input_initializer([1], np.int8, 'D_zero_point')
        D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5])
        quantize_node = onnx.helper.make_node('QuantizeLinear',
                                              ['A', 'A_scale', 'A_zero_point'],
                                              ['B'],
                                              name='A_QuantizeLinear')
        conv_node = onnx.helper.make_node('QLinearConv', [
            'B', 'B_scale', 'B_zero_point', 'C', 'C_scale', 'C_zero_point',
            'D_scale', 'D_zero_point', 'E'
        ], ['D_quantized'],
                                          name='conv_quant',
                                          kernel_shape=[3, 3],
                                          pads=[1, 1, 1, 1])
        dequantize_node = onnx.helper.make_node(
            'DequantizeLinear', ['D_quantized', 'D_scale', 'D_zero_point'],
            ['D'],
            name='D_DequantizeLinear')
        graph = helper.make_graph(
            [quantize_node, conv_node, dequantize_node], 'test_graph_5',
            [A, A_scale, A_zo, C, C_scale, C_zo, E, D_scale, D_zo], [D])
        graph.initializer.add().CopyFrom(a_scale)
        graph.initializer.add().CopyFrom(a_zero_point)
        graph.initializer.add().CopyFrom(b_scale)
        graph.initializer.add().CopyFrom(b_zero_point)
        graph.initializer.add().CopyFrom(c)
        graph.initializer.add().CopyFrom(c_scale)
        graph.initializer.add().CopyFrom(c_zero_point)
        graph.initializer.add().CopyFrom(e)
        graph.initializer.add().CopyFrom(d_scale)
        graph.initializer.add().CopyFrom(d_zero_point)
        model = helper.make_model(graph)

        # Augmenting graph
        data_reader = None
        augmented_model_path = os.path.join(self.work_space,
                                            './augmented_test_model_6.onnx')
        augment = ONNXRTAugment(ONNXModel(model),
                                data_reader, [],
                                augmented_model_path,
                                white_nodes=['conv'])
        augment.augment_nodes = ["DequantizeLinear"]
        augment.already_quantized = True
        augment.augment_graph(activation_only=True, output_only=True)
        augmented_model = augment.augmented_model
        onnx.save(augmented_model, augmented_model_path)

        augmented_model_node_names = [
            node.name for node in augmented_model.graph.node
        ]
        augmented_model_outputs = [
            output.name for output in augmented_model.graph.output
        ]
        added_node_names = ['D_quantized_DequantizeLinear']
        added_outputs = ['D_quantized_output']
        self.assertEqual(len(augmented_model_node_names), 4)
        self.assertEqual(len(augmented_model_outputs), 2)
        for name in added_node_names:
            self.assertTrue(name in augmented_model_node_names)
        for output in added_outputs:
            self.assertTrue(output in augmented_model_outputs)
Esempio n. 9
0
    def setUp(self):
        #   Relu
        #    |      \
        #   Conv     \
        #    |        \
        #   Relu       |
        #    |       Conv
        #   Conv      /
        #      \     /
        #         |
        #        Add

        input0 = helper.make_tensor_value_info('input0', TensorProto.FLOAT,
                                               [1, 3, 1, 3])
        output = helper.make_tensor_value_info('output', TensorProto.FLOAT,
                                               [1, 3, 1, 3])

        X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32,
                                               'X1_weight')
        X1_bias = generate_input_initializer([3], np.float32, 'X1_bias')
        X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32,
                                               'X3_weight')
        X3_bias = generate_input_initializer([3], np.float32, 'X3_bias')
        X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32,
                                               'X5_weight')
        X5_bias = generate_input_initializer([3], np.float32, 'X5_bias')

        relu_node_1 = onnx.helper.make_node('Relu', ['input0'], ['X1'],
                                            name='Relu1')
        conv_node_1 = onnx.helper.make_node('Conv',
                                            ['X1', 'X1_weight', 'X1_bias'],
                                            ['X2'],
                                            name='Conv1')
        relu_node_2 = onnx.helper.make_node('Relu', ['X2'], ['X3'],
                                            name='Relu2')
        conv_node_2 = onnx.helper.make_node('Conv',
                                            ['X3', 'X3_weight', 'X3_bias'],
                                            ['X4'],
                                            name='Conv2')
        conv_node_3 = onnx.helper.make_node('Conv',
                                            ['X1', 'X5_weight', 'X5_bias'],
                                            ['X5'],
                                            name='Conv3')
        add_node = onnx.helper.make_node('Add', ['X4', 'X5'], ['output'],
                                         name='Add')

        graph = helper.make_graph([
            relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3,
            add_node
        ], 'test_graph_6', [input0], [output])
        graph.initializer.add().CopyFrom(X1_weight)
        graph.initializer.add().CopyFrom(X1_bias)
        graph.initializer.add().CopyFrom(X3_weight)
        graph.initializer.add().CopyFrom(X3_bias)
        graph.initializer.add().CopyFrom(X5_weight)
        graph.initializer.add().CopyFrom(X5_bias)

        model = helper.make_model(graph)
        test_model_path = './test_model_6.onnx'
        onnx.save(model, test_model_path)
        model = onnx.load(test_model_path)
        self.model = ONNXModel(model)

        #    QuantizeLinear
        #        |
        #    QLinearConv
        #        |
        #    DequantizeLinear
        A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5])
        A_scale = helper.make_tensor_value_info('A_scale', TensorProto.FLOAT,
                                                [1])
        a_scale = generate_input_initializer([1], np.float32, 'A_scale')
        A_zo = helper.make_tensor_value_info('A_zero_point', TensorProto.INT8,
                                             [1])
        a_zero_point = generate_input_initializer([1], np.int8, 'A_zero_point')
        B_scale = helper.make_tensor_value_info('B_scale', TensorProto.FLOAT,
                                                [1])
        b_scale = generate_input_initializer([1], np.float32, 'B_scale')
        B_zo = helper.make_tensor_value_info('B_zero_point', TensorProto.INT8,
                                             [1])
        b_zero_point = generate_input_initializer([1], np.int8, 'B_zero_point')
        C = helper.make_tensor_value_info('C', TensorProto.INT8, [1, 1, 5, 5])
        c = generate_input_initializer([1, 1, 5, 5], np.int8, 'C')
        C_scale = helper.make_tensor_value_info('C_scale', TensorProto.FLOAT,
                                                [1])
        c_scale = generate_input_initializer([1], np.float32, 'C_scale')
        C_zo = helper.make_tensor_value_info('C_zero_point', TensorProto.INT8,
                                             [1])
        c_zero_point = generate_input_initializer([1], np.int8, 'C_zero_point')
        E = helper.make_tensor_value_info('E', TensorProto.INT32, [1])
        e = generate_input_initializer([1], np.int32, 'E')
        D_scale = helper.make_tensor_value_info('D_scale', TensorProto.FLOAT,
                                                [1])
        d_scale = generate_input_initializer([1], np.float32, 'D_scale')
        D_zo = helper.make_tensor_value_info('D_zero_point', TensorProto.INT8,
                                             [1])
        d_zero_point = generate_input_initializer([1], np.int8, 'D_zero_point')
        D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5])
        quantize_node = onnx.helper.make_node('QuantizeLinear',
                                              ['A', 'A_scale', 'A_zero_point'],
                                              ['B_quantized'],
                                              name='A_QuantizeLinear')
        conv_node = onnx.helper.make_node('QLinearConv', [
            'B_quantized', 'B_scale', 'B_zero_point', 'C_quantized', 'C_scale',
            'C_zero_point', 'D_scale', 'D_zero_point', 'E'
        ], ['D_quantized'],
                                          name='conv_quant',
                                          kernel_shape=[3, 3],
                                          pads=[1, 1, 1, 1])
        dequantize_node = onnx.helper.make_node(
            'DequantizeLinear', ['D_quantized', 'D_scale', 'D_zero_point'],
            ['D'],
            name='D_DequantizeLinear')
        graph = helper.make_graph(
            [quantize_node, conv_node, dequantize_node], 'test_graph_7',
            [A, A_scale, A_zo, C, C_scale, C_zo, E, D_scale, D_zo], [D])
        graph.initializer.add().CopyFrom(a_scale)
        graph.initializer.add().CopyFrom(a_zero_point)
        graph.initializer.add().CopyFrom(b_scale)
        graph.initializer.add().CopyFrom(b_zero_point)
        graph.initializer.add().CopyFrom(c)
        graph.initializer.add().CopyFrom(c_scale)
        graph.initializer.add().CopyFrom(c_zero_point)
        graph.initializer.add().CopyFrom(e)
        graph.initializer.add().CopyFrom(d_scale)
        graph.initializer.add().CopyFrom(d_zero_point)
        model = helper.make_model(graph)
        self.q_model = ONNXModel(model)
Esempio n. 10
0
class TestOnnxModel(unittest.TestCase):
    def setUp(self):
        #   Relu
        #    |      \
        #   Conv     \
        #    |        \
        #   Relu       |
        #    |       Conv
        #   Conv      /
        #      \     /
        #         |
        #        Add

        input0 = helper.make_tensor_value_info('input0', TensorProto.FLOAT,
                                               [1, 3, 1, 3])
        output = helper.make_tensor_value_info('output', TensorProto.FLOAT,
                                               [1, 3, 1, 3])

        X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32,
                                               'X1_weight')
        X1_bias = generate_input_initializer([3], np.float32, 'X1_bias')
        X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32,
                                               'X3_weight')
        X3_bias = generate_input_initializer([3], np.float32, 'X3_bias')
        X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32,
                                               'X5_weight')
        X5_bias = generate_input_initializer([3], np.float32, 'X5_bias')

        relu_node_1 = onnx.helper.make_node('Relu', ['input0'], ['X1'],
                                            name='Relu1')
        conv_node_1 = onnx.helper.make_node('Conv',
                                            ['X1', 'X1_weight', 'X1_bias'],
                                            ['X2'],
                                            name='Conv1')
        relu_node_2 = onnx.helper.make_node('Relu', ['X2'], ['X3'],
                                            name='Relu2')
        conv_node_2 = onnx.helper.make_node('Conv',
                                            ['X3', 'X3_weight', 'X3_bias'],
                                            ['X4'],
                                            name='Conv2')
        conv_node_3 = onnx.helper.make_node('Conv',
                                            ['X1', 'X5_weight', 'X5_bias'],
                                            ['X5'],
                                            name='Conv3')
        add_node = onnx.helper.make_node('Add', ['X4', 'X5'], ['output'],
                                         name='Add')

        graph = helper.make_graph([
            relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3,
            add_node
        ], 'test_graph_6', [input0], [output])
        graph.initializer.add().CopyFrom(X1_weight)
        graph.initializer.add().CopyFrom(X1_bias)
        graph.initializer.add().CopyFrom(X3_weight)
        graph.initializer.add().CopyFrom(X3_bias)
        graph.initializer.add().CopyFrom(X5_weight)
        graph.initializer.add().CopyFrom(X5_bias)

        model = helper.make_model(graph)
        test_model_path = './test_model_6.onnx'
        onnx.save(model, test_model_path)
        model = onnx.load(test_model_path)
        self.model = ONNXModel(model)

        #    QuantizeLinear
        #        |
        #    QLinearConv
        #        |
        #    DequantizeLinear
        A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5])
        A_scale = helper.make_tensor_value_info('A_scale', TensorProto.FLOAT,
                                                [1])
        a_scale = generate_input_initializer([1], np.float32, 'A_scale')
        A_zo = helper.make_tensor_value_info('A_zero_point', TensorProto.INT8,
                                             [1])
        a_zero_point = generate_input_initializer([1], np.int8, 'A_zero_point')
        B_scale = helper.make_tensor_value_info('B_scale', TensorProto.FLOAT,
                                                [1])
        b_scale = generate_input_initializer([1], np.float32, 'B_scale')
        B_zo = helper.make_tensor_value_info('B_zero_point', TensorProto.INT8,
                                             [1])
        b_zero_point = generate_input_initializer([1], np.int8, 'B_zero_point')
        C = helper.make_tensor_value_info('C', TensorProto.INT8, [1, 1, 5, 5])
        c = generate_input_initializer([1, 1, 5, 5], np.int8, 'C')
        C_scale = helper.make_tensor_value_info('C_scale', TensorProto.FLOAT,
                                                [1])
        c_scale = generate_input_initializer([1], np.float32, 'C_scale')
        C_zo = helper.make_tensor_value_info('C_zero_point', TensorProto.INT8,
                                             [1])
        c_zero_point = generate_input_initializer([1], np.int8, 'C_zero_point')
        E = helper.make_tensor_value_info('E', TensorProto.INT32, [1])
        e = generate_input_initializer([1], np.int32, 'E')
        D_scale = helper.make_tensor_value_info('D_scale', TensorProto.FLOAT,
                                                [1])
        d_scale = generate_input_initializer([1], np.float32, 'D_scale')
        D_zo = helper.make_tensor_value_info('D_zero_point', TensorProto.INT8,
                                             [1])
        d_zero_point = generate_input_initializer([1], np.int8, 'D_zero_point')
        D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5])
        quantize_node = onnx.helper.make_node('QuantizeLinear',
                                              ['A', 'A_scale', 'A_zero_point'],
                                              ['B_quantized'],
                                              name='A_QuantizeLinear')
        conv_node = onnx.helper.make_node('QLinearConv', [
            'B_quantized', 'B_scale', 'B_zero_point', 'C_quantized', 'C_scale',
            'C_zero_point', 'D_scale', 'D_zero_point', 'E'
        ], ['D_quantized'],
                                          name='conv_quant',
                                          kernel_shape=[3, 3],
                                          pads=[1, 1, 1, 1])
        dequantize_node = onnx.helper.make_node(
            'DequantizeLinear', ['D_quantized', 'D_scale', 'D_zero_point'],
            ['D'],
            name='D_DequantizeLinear')
        graph = helper.make_graph(
            [quantize_node, conv_node, dequantize_node], 'test_graph_7',
            [A, A_scale, A_zo, C, C_scale, C_zo, E, D_scale, D_zo], [D])
        graph.initializer.add().CopyFrom(a_scale)
        graph.initializer.add().CopyFrom(a_zero_point)
        graph.initializer.add().CopyFrom(b_scale)
        graph.initializer.add().CopyFrom(b_zero_point)
        graph.initializer.add().CopyFrom(c)
        graph.initializer.add().CopyFrom(c_scale)
        graph.initializer.add().CopyFrom(c_zero_point)
        graph.initializer.add().CopyFrom(e)
        graph.initializer.add().CopyFrom(d_scale)
        graph.initializer.add().CopyFrom(d_zero_point)
        model = helper.make_model(graph)
        self.q_model = ONNXModel(model)

    def test_nodes(self):
        self.assertEqual(len(self.model.nodes()), 6)
        nodes_name = [node.name for node in self.model.nodes()]
        nodes = ["Relu1", "Conv1", "Relu2", "Conv2", "Conv3", "Add"]
        for node in nodes:
            self.assertTrue(node in nodes_name)

    def test_initializer(self):
        self.assertEqual(len(self.model.initializer()), 6)
        inits_name = [init.name for init in self.model.initializer()]
        inits = [
            'X1_weight', 'X1_bias', 'X3_weight', 'X3_bias', 'X5_weight',
            'X5_bias'
        ]
        for init in inits:
            self.assertTrue(init in inits_name)

    def test_remove_node(self):
        for node in self.model.nodes():
            if node.op_type == "Add":
                self.model.remove_node(node)
        self.assertEqual(len(self.model.nodes()), 5)
        nodes_name = [node.name for node in self.model.nodes()]
        nodes = ["Relu1", "Conv1", "Relu2", "Conv2", "Conv3"]
        for node in nodes:
            self.assertTrue(node in nodes_name)

    def test_remove_nodes(self):
        nodes_to_remove = []
        for node in self.model.nodes():
            if node.name == "Conv3" or node.name == "Add":
                nodes_to_remove.append(node)
        self.model.remove_nodes(nodes_to_remove)
        self.assertEqual(len(self.model.nodes()), 4)
        nodes_name = [node.name for node in self.model.nodes()]
        nodes = ["Relu1", "Conv1", "Relu2", "Conv2"]
        for node in nodes:
            self.assertTrue(node in nodes_name)

    def test_add_node(self):
        node_to_add = onnx.helper.make_node('Relu', ['output'], ['output1'],
                                            keepdims=0)
        self.model.add_node(node_to_add)
        last_node = self.model.nodes()[-1]
        self.assertEqual(last_node.op_type, 'Relu')

    def test_add_nodes(self):
        nodes_to_add = []
        for i in range(2):
            node_to_add = onnx.helper.make_node(
                'Relu', ["add_node{}_input".format(str(i))],
                ["add_node{}_output".format(str(i))],
                keepdims=0)
            nodes_to_add.append(node_to_add)
        self.model.add_nodes(nodes_to_add)
        self.assertEqual(self.model.nodes()[-1].input, ['add_node1_input'])
        self.assertEqual(self.model.nodes()[-2].input, ['add_node0_input'])
        self.assertEqual(self.model.nodes()[-1].output, ['add_node1_output'])
        self.assertEqual(self.model.nodes()[-2].output, ['add_node0_output'])

    def test_get_initializer(self):
        inits = [
            'X1_weight', 'X1_bias', 'X3_weight', 'X3_bias', 'X5_weight',
            'X5_bias'
        ]
        for init in inits:
            self.assertIsNotNone(self.model.get_initializer(init))

    def test_remove_initializer(self):
        for init in self.model.initializer():
            if init.name == "X1_weight":
                self.model.remove_initializer(init)
        self.assertEqual(len(self.model.initializer()), 5)
        inits_name = [init.name for init in self.model.initializer()]
        inits = ['X1_bias', 'X3_weight', 'X3_bias', 'X5_weight', 'X5_bias']
        for init in inits:
            self.assertTrue(init in inits_name)

    def test_remove_initializers(self):
        init_to_remove = []
        for init in self.model.initializer():
            if "bias" in init.name:
                init_to_remove.append(init)
        self.model.remove_initializers(init_to_remove)
        self.assertEqual(len(self.model.initializer()), 3)
        inits_name = [init.name for init in self.model.initializer()]
        inits = ['X1_weight', 'X3_weight', 'X5_weight']
        for init in inits:
            self.assertTrue(init in inits_name)

    def test_input_name_to_nodes(self):
        self.assertEqual(len(self.model.input_name_to_nodes), 12)
        ipts_name = [name for name in self.model.input_name_to_nodes]
        ipts = [
            'input0', 'X1', 'X2', 'X3', 'X3_weight', 'X3_bias', 'X5_weight',
            'X5_bias', 'X4', 'X5'
        ]
        for ipt in ipts:
            self.assertTrue(ipt in ipts_name)

    def test_output_name_to_node(self):
        self.assertEqual(len(self.model.output_name_to_node), 6)
        opts_name = [name for name in self.model.output_name_to_node]
        opts = ['X1', 'X2', 'X3', 'X4', 'X5', 'output']
        for opt in opts:
            self.assertTrue(opt in opts_name)

    def test_get_children(self):
        for node in self.model.nodes():
            if node.name == "Relu1":
                children = self.model.get_children(node)
        self.assertEqual(len(children), 2)
        children_name = [child.name for child in children]
        names = ["Conv1", "Conv3"]
        for name in names:
            self.assertTrue(name in children_name)

    def test_get_parents(self):
        for node in self.model.nodes():
            if node.op_type == "Add":
                parents = self.model.get_parents(node)
        self.assertEqual(len(parents), 2)
        parents_name = [parent.name for parent in parents]
        names = ["Conv2", "Conv3"]
        for name in names:
            self.assertTrue(name in parents_name)

    def test_get_parent(self):
        for node in self.model.nodes():
            if node.op_type == "Add":
                node_to_get_parent = node
        parent = self.model.get_parent(node, 0)
        self.assertEqual(parent.name, "Conv2")
        parent = self.model.get_parent(node, 1)
        self.assertEqual(parent.name, "Conv3")
        parent = self.model.get_parent(node, 2)
        self.assertIsNone(parent)

    def test_find_nodes_by_initializer(self):
        for init in self.model.initializer():
            if init.name == "X1_weight":
                initializer = init
        nodes = self.model.find_nodes_by_initializer(self.model.graph(),
                                                     initializer)
        self.assertEqual(len(nodes), 1)
        self.assertEqual(nodes[0].name, "Conv1")

    def test_get_scale_zo(self):
        input_scale, input_zo = self.q_model.get_scale_zo('B_quantized')
        weight_scale, weight_zo = self.q_model.get_scale_zo('C_quantized')
        bias_scale, bias_zo = self.q_model.get_scale_zo('E')

    def test_save(self):
        self.model.save_model_to_file('./test_model_6.onnx',
                                      use_external_data_format=True)
Esempio n. 11
0
    def _replace_gemm_with_matmul(self, model):
        new_nodes = []
        from onnx import numpy_helper
        from lpot.model.onnx_model import ONNXModel
        if not isinstance(model, ONNXModel):
            model = ONNXModel(model)

        for node in model.nodes():
            if node.op_type == 'Gemm':
                alpha = 1.0
                beta = 1.0
                transA = 0
                transB = 0
                for attr in node.attribute:
                    if attr.name == 'alpha':
                        alpha = onnx.helper.get_attribute_value(attr)
                    elif attr.name == 'beta':
                        beta = onnx.helper.get_attribute_value(attr)
                    elif attr.name == 'transA':
                        transA = onnx.helper.get_attribute_value(attr)
                    elif attr.name == 'transB':
                        transB = onnx.helper.get_attribute_value(attr)
                if alpha == 1.0 and beta == 1.0 and transA == 0:
                    inputB = node.input[1]
                    if transB == 1:
                        B = model.get_initializer(node.input[1])
                        if B:
                            # assume B is not used by any other node
                            B_array = numpy_helper.to_array(B)
                            B_trans = numpy_helper.from_array(B_array.T)
                            B_trans.name = B.name
                            model.remove_initializer(B)
                            model.add_initializer(B_trans)

                            #TBD this is for onnx model zoo, which are all in old IR version
                            if model.model.ir_version < 4:
                                for input in model.model.graph.input:
                                    if input.name == B_trans.name:
                                        for i, dim in enumerate(
                                                input.type.tensor_type.shape.
                                                dim):
                                            dim.dim_value = B_array.T.shape[i]

                        else:
                            inputB += '_Transposed'
                            transpose_node = onnx.helper.make_node(
                                'Transpose',
                                inputs=[node.input[1]],
                                outputs=[inputB],
                                name=node.name + '_Transpose')
                            new_nodes.append(transpose_node)

                    matmul_node = onnx.helper.make_node(
                        'MatMul',
                        inputs=[node.input[0], inputB],
                        outputs=[
                            node.output[0] +
                            ('_MatMul' if len(node.input) > 2 else '')
                        ],
                        name=node.name + '_MatMul')
                    new_nodes.append(matmul_node)

                    if len(node.input) > 2:
                        add_node = onnx.helper.make_node(
                            'Add',
                            inputs=[node.output[0] + '_MatMul', node.input[2]],
                            outputs=node.output,
                            name=node.name + '_Add')
                        new_nodes.append(add_node)

                # unsupported
                else:
                    new_nodes.append(node)

            # not GEMM
            else:
                new_nodes.append(node)

        model.graph().ClearField('node')
        model.graph().node.extend(new_nodes)

        return model