Exemple #1
0
 def find_node_by_name(self, node_name, new_nodes_list, graph):
     '''
     Find out if a node exists in a graph or a node is in the
     new set of nodes created during quantization. Return the node found.
     '''
     graph_nodes_list = list(graph.node)  #deep copy
     graph_nodes_list.extend(new_nodes_list)
     node = find_by_name(node_name, graph_nodes_list)
     return node
Exemple #2
0
    def quantize_weight_per_channel(self, weight_name, weight_qType, channel_axis):
        # Find if this input is already quantized
        if weight_name in self.quantized_value_map:
            quantized_value = self.quantized_value_map[weight_name]
            return (quantized_value.q_name, quantized_value.zp_name, quantized_value.scale_name)
        
        initializer = find_by_name(weight_name, self.model.initializer())
        if initializer is None:
            raise ValueError("{} is not an initializer", weight_name)

        weights = self.tensor_proto_to_array(initializer)
        channel_count = weights.shape[channel_axis]
        rmin_list = []
        rmax_list = []
        zero_point_list = []
        scale_list = []
        quantized_per_channel_data_list = []
        for i in range(channel_count):
            per_channel_data = weights.take(i, channel_axis)
            rmin, rmax, zero_point, scale, quantized_per_channel_data = quantize_data(
                per_channel_data.flatten().tolist(), _get_qrange_for_qType(weight_qType, 
                self.reduce_range), weight_qType)
            rmin_list.append(rmin)
            rmax_list.append(rmax)
            zero_point_list.append(zero_point)
            scale_list.append(scale)
            quantized_per_channel_data_list.append(quantized_per_channel_data)

        # combine per_channel_data into one
        reshape_dims = list(weights.shape)  # deep copy
        reshape_dims[channel_axis] = 1  # only one per channel for reshape
        quantized_weights = np.asarray(quantized_per_channel_data_list[0]).reshape(reshape_dims)
        for i in range(1, len(quantized_per_channel_data_list)):
            channel_weights = np.asarray(quantized_per_channel_data_list[i]).reshape(reshape_dims)
            quantized_weights = np.concatenate((quantized_weights, channel_weights), channel_axis)

        weight = QuantizedInitializer(initializer.name, initializer, rmin_list, rmax_list, 
                                      zero_point_list, scale_list,
                                      weights,
                                      quantized_weights.flatten().tolist(), 
                                      channel_axis, weight_qType)

        # Make entry for this quantized weight
        assert (weight.name not in self.quantized_value_map)
        quantized_value = QuantizedValue(weight.name, weight.name + "_quantized", 
                                         weight.name + "_scale",
                                         weight.name + "_zero_point", 
                                         QuantizedValueType.Initializer, 
                                         None, weight_qType)
        self.quantized_value_map[weight.name] = quantized_value

        self._update_weight(weight)
        return (weight.name + "_quantized", weight.name + "_zero_point", weight.name + "_scale")
Exemple #3
0
    def quantize(self):
        node = self.node
        assert (node.op_type == "MatMul")

        (quantized_input_names, zero_point_names, scale_names, nodes) = \
            self.quantizer.quantize_inputs(node, [0, 1])

        matmul_integer_output = node.output[0] + "_output_quantized"
        matmul_integer_name = node.name + "_quant" if node.name != "" else ""
        matmul_integer_node = onnx.helper.make_node(
            "MatMulInteger", quantized_input_names + zero_point_names,
            [matmul_integer_output], matmul_integer_name)
        nodes.append(matmul_integer_node)

        # Add cast operation to cast matmulInteger output to float.
        cast_op_output = matmul_integer_output + "_cast_output"
        cast_node = onnx.helper.make_node("Cast", [matmul_integer_output],
                                          [cast_op_output],
                                          matmul_integer_output + "_cast",
                                          to=onnx_proto.TensorProto.FLOAT)
        nodes.append(cast_node)

        # Add mul operation to multiply scales of two inputs.
        assert (len(scale_names) == 2)
        scales_mul_op = matmul_integer_name + "_scales_mul" if matmul_integer_name != "" else \
                                 scale_names[0] + "_" + scale_names[1] + "_mul"

        scales_mul_node = find_by_name(scales_mul_op, self.quantizer.new_nodes)
        if scales_mul_node is None:
            scales_mul_node = get_mul_node(scale_names, scales_mul_op + ":0",
                                           scales_mul_op)
            nodes.append(scales_mul_node)

        scales_mul_op_output = scales_mul_node.output[0]

        # Add mul operation to multiply mul_scales_op result with output of MatMulInteger
        # and make the output of this node the same as output of original matmul node.
        output_scale_mul_op = ""
        if matmul_integer_name != "":
            output_scale_mul_op = matmul_integer_name + "_output_scale_mul"
        nodes.append(
            get_mul_node([cast_op_output, scales_mul_op_output],
                         node.output[0], output_scale_mul_op))
        self.quantizer.new_nodes += nodes
Exemple #4
0
    def get_bias_add_nodes(self, nodes, node, last_output,
                           quantized_bias_name):
        '''
        Given a node, this function handles bias add by 
            adding a "reshape" node on bias and an "add" node
            parameter nodes: new nodes would be appended into nodes
            parameter node: current node (Conv)
            parameter last_output: output of previous node (input to bias add)
            return: the name of output
        '''
        # Add tensors for the shape to be reshaped to
        weight = find_by_name(node.input[1], self.model.initializer())
        if weight is None:
            raise ValueError("Expected {} to be an initializer".format(
                node.input[1]))

        # Add reshape for correct broadcase
        reshape_input_data = quantized_bias_name
        reshape_input_shape = quantized_bias_name + "_reshape_shape"
        reshape_input = [reshape_input_data, reshape_input_shape]

        reshape_shape = np.ones((len(weight.dims)), dtype=np.int64)
        reshape_shape[1] = -1
        init_shape = onnx.helper.make_tensor(reshape_input_shape,
                                             onnx_proto.TensorProto.INT64,
                                             [len(weight.dims)], reshape_shape)
        self.model.add_initializer(init_shape)

        reshape_op_output = node.output[0] + "_reshape"
        reshape_node = onnx.helper.make_node("Reshape", reshape_input,
                                             [reshape_op_output],
                                             quantized_bias_name + "reshape")
        nodes.append(reshape_node)

        # Add an Add operation for bias
        bias_add_input = [last_output]
        bias_add_input.append(reshape_op_output)
        add_node_output = node.output[0] + "_bias_add"
        add_node = onnx.helper.make_node("Add", bias_add_input,
                                         [add_node_output],
                                         quantized_bias_name + "bias_add")
        nodes.append(add_node)
        return add_node_output
Exemple #5
0
    def quantize(self):
        node = self.node
        assert node.op_type in ["Conv", "FusedConv"]

        (quantized_input_names, zero_point_names, scale_names, nodes) = \
            self.quantizer.quantize_inputs(node, [0, 1])

        # quantize bias if exist
        quantized_bias_name = ""
        bias_present = False
        if len(node.input) == 3:
            quantized_bias_name = self.quantizer.quantize_bias(node, nodes)
            bias_present = True

        conv_integer_output = node.output[0] + "_output_quantized"
        conv_integer_name = node.name + "_quant" if node.name != "" else ""

        kwargs = {}
        for attribute in node.attribute:
            if attribute.name == 'activation' and attribute.s in [
                    b'Relu', b'Clip'
            ]:
                continue
            if attribute.name == 'activation_params':
                continue
            kwargs.update(attribute_to_kwarg(attribute))
        conv_integer_node = onnx.helper.make_node(
            "ConvInteger", quantized_input_names + zero_point_names,
            [conv_integer_output], conv_integer_name, **kwargs)
        nodes.append(conv_integer_node)

        # Add bias add nodes
        if bias_present:
            conv_integer_output = self.quantizer.get_bias_add_nodes(
                nodes, node, conv_integer_output, quantized_bias_name)

        # Add cast operation to cast convInteger output to float.
        cast_op_output = conv_integer_output + "_cast_output"
        cast_node = onnx.helper.make_node("Cast", [conv_integer_output],
                                          [cast_op_output],
                                          conv_integer_output + "_cast",
                                          to=onnx_proto.TensorProto.FLOAT)
        nodes.append(cast_node)

        # Add mul operation to multiply scales of two inputs.
        assert (len(scale_names) == 2)
        if conv_integer_name != "":
            scales_mul_op = conv_integer_name + "_scales_mul"
        else:
            scales_mul_op = scale_names[0] + "_" + scale_names[1] + "_mul"

        scales_mul_node = find_by_name(scales_mul_op, self.quantizer.new_nodes)
        if scales_mul_node is None:
            scales_mul_node = get_mul_node(scale_names, scales_mul_op + ":0",
                                           scales_mul_op)
            nodes.append(scales_mul_node)

        scales_mul_op_output = scales_mul_node.output[0]

        # Add mul operation to multiply mul_scales_op result with output of ConvInteger
        # and make the output of this node the same as output of original conv node.
        output_scale_mul_op = conv_integer_name + "_output_scale_mul" if \
                                                  conv_integer_name != "" else ""
        nodes.append(
            get_mul_node([cast_op_output, scales_mul_op_output],
                         node.output[0], output_scale_mul_op))

        self.quantizer.new_nodes += nodes
Exemple #6
0
 def add_initializer(self, tensor):
     if find_by_name(tensor.name, self.model.graph.initializer) is None:
         self.model.graph.initializer.extend([tensor])
Exemple #7
0
    def quantize_inputs(self,
                        node,
                        indices,
                        initializer_use_weight_qType=True):
        '''
        Given a node, this function quantizes the inputs as follows:
            - If input is an initializer, quantize the initializer data, replace old initializer
              with new initializer
            - Else, add QuantizeLinear nodes to perform quantization
            parameter node: node being quantized in NodeProto format.
            parameter indices: input indices to quantize.
            return: (List of quantized input names,
                     List of zero point names used for input quantization,
                     List of scale names used for input quantization,
                     List of new QuantizeLinear nodes created)
        '''

        scale_names = []
        zero_point_names = []
        quantized_input_names = []
        nodes = []

        for input_index in indices:
            node_input = node.input[input_index]

            # Find if this input is already quantized
            if node_input in self.quantized_value_map:
                quantized_value = self.quantized_value_map[node_input]
                scale_names.append(quantized_value.scale_name)
                zero_point_names.append(quantized_value.zp_name)
                quantized_input_names.append(quantized_value.q_name)
                continue

            # Quantize the input
            initializer = find_by_name(node_input, self.model.initializer())
            if initializer is not None:
                weight = self._get_quantized_weight(initializer,
                                                    self.config[node.name]['weight']['dtype'] if \
                                                    initializer_use_weight_qType else \
                                                    self.config[node.name]['activation']['dtype'])

                # Update graph
                self._update_weight(weight)

                quantized_input_names.append(weight.name + "_quantized")
                zero_point_names.append(weight.name + "_zero_point")
                scale_names.append(weight.name + "_scale")
            else:
                # Add QuantizeLinear node.
                qlinear_node = self.model.find_node_by_name(
                    node_input + "_QuantizeLinear", self.new_nodes,
                    self.model.graph())
                if qlinear_node is None:
                    quantize_input_nodes = self._get_quantize_input_nodes(
                        node, input_index,
                        self.config[node.name]['activation']['dtype'])
                    nodes.extend(quantize_input_nodes)
                    qlinear_node = quantize_input_nodes[-1]

                if qlinear_node.op_type == "QuantizeLinear":
                    quantized_input_names.extend(qlinear_node.output)
                    scale_names.append(qlinear_node.input[1])
                    zero_point_names.append(qlinear_node.input[2])
                else:
                    quantized_input_names.append(qlinear_node.output[0])
                    scale_names.append(qlinear_node.output[1])
                    zero_point_names.append(qlinear_node.output[2])

        return (quantized_input_names, zero_point_names, scale_names, nodes)
Exemple #8
0
    def quantize_bias(self, node, new_node_list):
        '''
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        '''

        # get scale for weight
        weight_scale_name = self.quantized_value_map[node.input[1]].scale_name
        weight_initializer = find_by_name(weight_scale_name,
                                          self.model.initializer())
        weight_scale = self.tensor_proto_to_array(weight_initializer)

        # get bias
        bias_name = node.input[2]
        bias_initializer = find_by_name(bias_name, self.model.initializer())
        bias_data = self.tensor_proto_to_array(bias_initializer)
        quantized_bias_name = bias_name + "_quantized"

        # input scale is not provided and this input is dynamically quantized
        # so it is not pre-computed at this point
        # so resort to dynamic quantization for bias
        if self.quantization_params is None or node.input[0] not in self.quantization_params and \
           node.input[0] not in self.quantized_value_map:
            self._dynamic_quantize_bias(node.input[0], weight_scale_name,
                                        bias_name, quantized_bias_name,
                                        new_node_list)
        else:
            # get scale for input
            if node.input[0] in self.quantized_value_map:
                input_scale_name = self.quantized_value_map[
                    node.input[0]].scale_name
            elif node.input[0] in self.quantization_params:
                _, input_scale_name, _, _, _ = self._get_quantization_params(
                    node.input[0])
            else:
                raise ValueError("Expected {} to be in quantized value map \
                                  for static quantization".format(
                    node.input[0]))

            inputscale_initializer = find_by_name(input_scale_name,
                                                  self.model.initializer())
            input_scale = self.tensor_proto_to_array(inputscale_initializer)

            # calcuate scale for bias

            bias_scale = input_scale * weight_scale

            # quantize bias
            quantized_data = (np.asarray(bias_data) /
                              bias_scale).round().astype(np.int32)

            # update bias initializer
            bias_np_data = np.asarray(quantized_data, dtype=np.int32).reshape(\
                           bias_initializer.dims)
            packed_bias_initializer = onnx.numpy_helper.from_array(
                bias_np_data, quantized_bias_name)
            self.model.initializer().extend([packed_bias_initializer])

            # log entries for this quantized bias value
            quantized_bias_entry = QuantizedInitializer(
                bias_name,
                bias_initializer, [0], [0], [0], [bias_scale],
                bias_data,
                quantized_data,
                qType=onnx_proto.TensorProto.INT32)
            self._quantized_weights.append(quantized_bias_entry)

            assert (bias_name not in self.quantized_value_map)
            quantized_value = QuantizedValue(bias_name, quantized_bias_name,
                                             "", "",
                                             QuantizedValueType.Initializer,
                                             None,
                                             onnx_proto.TensorProto.INT32)
            self.quantized_value_map[bias_name] = quantized_value

        return quantized_bias_name
Exemple #9
0
 def is_valid_quantize_weight(self, weight_name):
     weight = find_by_name(weight_name, self.model.initializer())
     return weight is not None and weight.data_type == onnx_proto.TensorProto.FLOAT
Exemple #10
0
 def is_input_a_weight(self, input_name):
     initializer = find_by_name(input_name, self.model.initializer())
     return initializer is not None
Exemple #11
0
    def remove_fake_quantized_nodes(self):  # pragma: no cover
        '''
            Detect and remove the quantize/dequantizelinear node pairs(fake quantized nodes 
            in Quantization-Aware training) and reconnect and update the nodes.
            !!! not supported now !!!
        '''
        nodes_to_remove = []
        initializers_to_remove = []

        for curr_node in self.model.nodes():
            if curr_node.op_type == 'QuantizeLinear':
                next_node, prev_node, succ_node = None, None, None
                for child_node in self.model.get_children(curr_node):
                    if child_node.op_type == 'DequantizeLinear':
                        next_node = child_node
                if next_node is None:
                    raise ValueError(
                        "Remove fake-quantized node pair Error: DequantizeLinear node is \
                        not found for {}.".format(curr_node.name))

                prev_node = self.model.get_parent(curr_node, 0)
                if prev_node is None:
                    raise ValueError(
                        "Remove fake-quantized node pair Error: Parent node is \
                        not found for {}.".format(curr_node.name))

                succ_nodes = self.model.get_children(next_node)
                if len(succ_nodes) == 0:
                    raise ValueError(
                        "Remove fake-quantized node pair Error: No successive \
                        nodes found for {}.".format(next_node.name))

                # TODO: convert it to the specified input_type
                scale_tensor_name = curr_node.input[1]
                zp_tensor_name = curr_node.input[2]
                initializer_scale = find_by_name(scale_tensor_name,
                                                 self.model.initializer())
                initializer_zp = find_by_name(zp_tensor_name,
                                              self.model.initializer())
                zp_and_scale = [
                    onnx.numpy_helper.to_array(initializer_zp),
                    onnx.numpy_helper.to_array(initializer_scale)
                ]

                # connect the previous and successive node input and output
                for succ_node in succ_nodes:
                    succ_idx = get_elem_index(next_node.output[0],
                                              succ_node.input)
                    if succ_idx != -1:
                        succ_node.input[succ_idx] = curr_node.input[0]
                    else:
                        raise ValueError(
                            "Remove fake-quantized node pair Error: Connection failed. \
                           No matched successive node input found for {}.".
                            format(next_node.name))

                param_name = curr_node.input[0]
                if self.quantization_params is None:
                    self.quantization_params = {}
                self.quantization_params[param_name] = zp_and_scale

                # remove fake-quantized nodes
                nodes_to_remove.extend([curr_node])
                nodes_to_remove.extend([next_node])

                # remove unused initializers in graph
                initializers_to_remove.extend([initializer_scale])
                initializers_to_remove.extend([initializer_zp])

        self.model.remove_nodes(nodes_to_remove)
        self.model.remove_initializers(initializers_to_remove)

        return self.model.model