Esempio n. 1
0
    def build(self, *gradient_names):
        """Adds a clip grad norm sub graph to the onnx model."""

        # get the model to manipulate
        onnx_model = accessor.global_accessor.model

        # add the necessary graph initializers
        add_node_eps_name = graph_utils.generate_random_graph_name("add_eps")
        onnx_model.graph.initializer.append(
            onnx.helper.make_tensor(add_node_eps_name, onnx.TensorProto.FLOAT,
                                    [1], [1e-6]))
        max_norm_name = graph_utils.generate_random_graph_name("max_norm")
        onnx_model.graph.initializer.append(
            onnx.helper.make_tensor(max_norm_name, onnx.TensorProto.FLOAT, [1],
                                    [self._max_norm]))

        # perform gradient clipping
        total_norm_name = self._reduce(*gradient_names)
        adjusted_total_norm_name = self._add(total_norm_name,
                                             add_node_eps_name)
        clip_coef_name = self._clip(
            self._div(max_norm_name, adjusted_total_norm_name))
        return [
            self._mul(grad_name, clip_coef_name)
            for grad_name in gradient_names
        ]
Esempio n. 2
0
    def build(self, loss_input_name: str, target_name: typing.Optional[str] = "target"):
        """Adds a BCEWithLogitsLoss subgraph on top of an onnx model.

        Creates a block that measures the binary cross entropy with logits between
        loss_input_name and the target_name. This block combines Sigmoid layer
        followed by a BCELoss.

        Args:
            loss_input_name: string input representing the loss input
            target_name: string input representing the target

        Returns:
            Returns a string of the output name from the loss
        """

        # get the model to manipulate
        onnx_model = accessor.global_accessor.model

        # create the graph initializers for pos_weight, weight, and the sub operands ([1])
        pos_weight_name = graph_utils.generate_random_graph_name("bceloss.pos_weight")
        if self._pos_weight is not None:
            onnx_model.graph.initializer.append(onnx.numpy_helper.from_array(self._pos_weight, pos_weight_name))

        weight_name = graph_utils.generate_random_graph_name("bceloss.weight")
        if self._weight is not None:
            onnx_model.graph.initializer.append(onnx.numpy_helper.from_array(self._weight, weight_name))

        sub_ones_operand_name1 = graph_utils.generate_random_graph_name("bceloss.sub_ones")
        onnx_model.graph.initializer.append(
            onnx.helper.make_tensor(sub_ones_operand_name1, onnx.TensorProto.FLOAT, [1], [1.0])
        )
        sub_ones_operand_name2 = graph_utils.generate_random_graph_name("bceloss.sub_ones")
        onnx_model.graph.initializer.append(
            onnx.helper.make_tensor(sub_ones_operand_name2, onnx.TensorProto.FLOAT, [1], [1.0])
        )

        # create a new graph input. this is the target input needed to compare
        # the graph output against to calculate loss.
        target_input = copy.deepcopy(graph_utils.get_output_from_output_name(onnx_model, loss_input_name))
        target_input.name = target_name
        onnx_model.graph.input.append(target_input)

        # create the bceloss
        sigmoid_output = self._sigmoid(loss_input_name)
        add_operand1 = self._mul(self._log(sigmoid_output), target_name)
        if self._pos_weight is not None:
            add_operand1 = self._mul(add_operand1, pos_weight_name)

        add_operand2 = self._mul(
            self._log(self._sub(sub_ones_operand_name1, sigmoid_output)),
            self._sub(sub_ones_operand_name2, target_name),
        )

        loss_output = self._neg(self._add(add_operand1, add_operand2))

        if self._weight is not None:
            loss_output = self._mul(weight_name, loss_output)

        return self._reduce(loss_output)
Esempio n. 3
0
    def build(self, scores_input_name: str, labels_name: str = "labels"):
        """Adds a CrossEntropyLoss subgraph on top of an onnx model.

        Creates a block that measures the softmax cross entropy between
        scores_input_name and the labels_name.

        Args:
            loss_input_name: string input representing the loss input
            labels_name: string input representing the labels

        Returns:
            Returns a string of the output name from the loss
        """

        # get the model to manipulate
        onnx_model = accessor.global_accessor.model

        weight_name = graph_utils.generate_random_graph_name("celoss.weight")
        if self._weight is not None:
            onnx_model.graph.initializer.append(onnx.numpy_helper.from_array(self._weight, weight_name))

        # create a new graph input. this is the labels input needed to compare
        # the graph output against to calculate loss.
        labels_input = copy.deepcopy(graph_utils.get_output_from_output_name(onnx_model, scores_input_name))
        labels_input.name = labels_name
        labels_input.type.tensor_type.elem_type = onnx.TensorProto.INT32
        # if the predictions are (num_examples x num_classes)
        # labels should be (num_examples x 1)
        del labels_input.type.tensor_type.shape.dim[1]
        onnx_model.graph.input.append(labels_input)

        # create a new graph node for the loss
        loss_node_input_names = [scores_input_name, labels_name]
        if self._weight:
            loss_node_input_names.append(weight_name)
        loss_node_output_name = graph_utils.generate_random_graph_name("loss")
        loss_node_output_names = [
            loss_node_output_name,
            graph_utils.generate_random_graph_name("log_prob"),
        ]
        loss_node = onnx.helper.make_node(
            "SoftmaxCrossEntropyLoss",
            loss_node_input_names,
            loss_node_output_names,
            reduction=self._reduction,
            ignore_index=self._ignore_index,
            name=graph_utils.generate_random_graph_name("SoftmaxCrossEntropyLoss"),
        )
        onnx_model.graph.node.append(loss_node)

        return loss_node_output_name
Esempio n. 4
0
    def build(  # pylint: disable=too-many-arguments
        self,
        learning_rate_name: str,
        step_name: str,
        parameter_sequence_name: str,
        gradient_sequence_name: str,
        first_order_moment_sequence_name: str,
        second_order_moment_sequence_name: str,
    ):
        """Adds the AdamWOptimizer node to the model."""

        # get the model to manipulate
        onnx_model = accessor.global_accessor.model

        # define the node attributes
        node_attributes = {
            "alpha": self._betas[0],  # beta1
            "beta": self._betas[1],  # beta2
            "epsilon": self._eps,  # epsilon
            "weight_decay": self._weight_decay,  # weight decay
            "correct_bias":
            1 if self._bias_correction else 0,  # bias_correction
            "adam_mode": 1,  # adam mode (1 for hf/transformers/AdamW)
        }

        # add the adamw node to the onnx model
        adamw_input_names = [
            learning_rate_name,  # learning rate
            step_name,  # training step
            parameter_sequence_name,  # param to be updated
            gradient_sequence_name,  # gradient of the param to be used for update
            first_order_moment_sequence_name,  # first order moment for this param
            second_order_moment_sequence_name,  # second order moment for this param
        ]
        adamw_output_name = graph_utils.generate_random_graph_name(
            "adamw.updated_flag")
        adamw_output_names = [adamw_output_name]
        adamw_node = onnx.helper.make_node(
            "AdamWOptimizer",
            adamw_input_names,
            adamw_output_names,
            name=graph_utils.generate_random_graph_name("AdamWOptimizer"),
            domain="com.microsoft",
            **node_attributes,
        )
        onnx_model.graph.node.append(adamw_node)

        return adamw_output_name
Esempio n. 5
0
 def build(self):
     # create the graph initializer for the exponent
     initializer_name = graph_utils.generate_random_graph_name("initializer")
     accessor.global_accessor.model.graph.initializer.append(
         onnx.helper.make_tensor(initializer_name, onnx.TensorProto.FLOAT, [1], [self._value])
     )
     return initializer_name
Esempio n. 6
0
    def build(self, *sequence_input_names):
        # get the model to manipulate
        onnx_model = accessor.global_accessor.model

        # create the graph node for this sequence construct node
        sc_node_input_names = list(sequence_input_names)
        sc_node_output_name = graph_utils.generate_random_graph_name("sequenceconstruct_output")
        sc_node_output_names = [sc_node_output_name]
        sc_node = onnx.helper.make_node(
            "SequenceConstruct",
            sc_node_input_names,
            sc_node_output_names,
            graph_utils.generate_random_graph_name("SequenceConstruct"),
        )
        onnx_model.graph.node.append(sc_node)

        return sc_node_output_name
Esempio n. 7
0
    def build(self, input_name1, input_name2):
        # get the model to manipulate
        onnx_model = accessor.global_accessor.model

        # Assert that the op name is not empty
        if not self._op_name:
            raise RuntimeError("Unknown op name. Please override _op_name")

        # create the graph node for sub
        node_input_names = [input_name1, input_name2]
        node_output_name = graph_utils.generate_random_graph_name(f"{self._op_name.lower()}_output")
        node_output_names = [node_output_name]
        node = onnx.helper.make_node(
            self._op_name,
            node_input_names,
            node_output_names,
            name=graph_utils.generate_random_graph_name(self._op_name),
        )
        onnx_model.graph.node.append(node)

        return node_output_name
Esempio n. 8
0
    def build(self, *reduce_node_input_names):
        # get the model to manipulate
        onnx_model = accessor.global_accessor.model

        # create the graph node for this reducealll2 node
        reduce_node_input_names = list(reduce_node_input_names)
        reduce_node_output_name = graph_utils.generate_random_graph_name("reducealll2_output")
        reduce_node_output_names = [reduce_node_output_name]
        reduce_node = onnx.helper.make_node(
            "ReduceAllL2",
            reduce_node_input_names,
            reduce_node_output_names,
            graph_utils.generate_random_graph_name("ReduceAllL2"),
            domain="com.microsoft",
        )
        onnx_model.graph.node.append(reduce_node)
        # TODO: register shape inference with onnx
        onnx_model.graph.value_info.append(
            onnx.helper.make_tensor_value_info(reduce_node_output_name, onnx.TensorProto.FLOAT, [1])
        )

        return reduce_node_output_name
Esempio n. 9
0
    def build(self, pow_input_name):
        # get the model to manipulate
        onnx_model = accessor.global_accessor.model

        # create the graph initializer for the exponent
        pow_node_exponent_name = graph_utils.generate_random_graph_name("pow_exponent")
        onnx_model.graph.initializer.append(
            onnx.helper.make_tensor(pow_node_exponent_name, onnx.TensorProto.FLOAT, [1], [self._exponent])
        )

        # create the graph node for pow
        pow_node_input_names = [pow_input_name, pow_node_exponent_name]
        pow_node_output_name = graph_utils.generate_random_graph_name("pow_output")
        pow_node_output_names = [pow_node_output_name]
        pow_node = onnx.helper.make_node(
            "Pow",
            pow_node_input_names,
            pow_node_output_names,
            name=graph_utils.generate_random_graph_name("Pow"),
        )
        onnx_model.graph.node.append(pow_node)

        return pow_node_output_name
Esempio n. 10
0
    def build(self, clip_input_name):
        # get the model to manipulate
        onnx_model = accessor.global_accessor.model

        # create the graph initializer for the clip min
        clip_node_min_name = ""
        if self._min is not None:
            clip_node_min_name = graph_utils.generate_random_graph_name("clip_min")
            onnx_model.graph.initializer.append(
                onnx.helper.make_tensor(clip_node_min_name, onnx.TensorProto.FLOAT, [1], [self._min])
            )

        # create the graph initializer for the clip max
        clip_node_max_name = ""
        if self._max is not None:
            clip_node_max_name = graph_utils.generate_random_graph_name("clip_max")
            onnx_model.graph.initializer.append(
                onnx.helper.make_tensor(clip_node_max_name, onnx.TensorProto.FLOAT, [1], [self._max])
            )

        # create the graph node for this clip node
        clip_node_input_names = [
            clip_input_name,
            clip_node_min_name,
            clip_node_max_name,
        ]
        clip_node_output_name = graph_utils.generate_random_graph_name("clip_output")
        clip_node_output_names = [clip_node_output_name]
        clip_node = onnx.helper.make_node(
            "Clip",
            clip_node_input_names,
            clip_node_output_names,
            graph_utils.generate_random_graph_name("Clip"),
        )
        onnx_model.graph.node.append(clip_node)

        return clip_node_output_name