예제 #1
0
        def loop_test(data):
            ox = data.ox
            shape = ox.shape(data)
            dim_0 = ox.gather([shape, ox.constant(value=0)], axis=0)
            dim_1 = ox.gather(
                [shape,
                 ox.constant(value=np.array([1], dtype=np.int64))],
                axis=0)
            zeros = ox.constant_of_shape(dim_1, value=0.0)
            is_true = ox.constant(value=True)

            @onnx_function(outputs=['c_o', 'total_o', 'scan_o'],
                           output_types=[_Ty.b,
                                         _Ty.F([None]),
                                         _Ty.F([None])],
                           input_types=[_Ty.I([1]), _Ty.b,
                                        _Ty.F([None])])
            def range_body(iter_n, cond, total):
                ox = iter_n.ox
                iter_scalar = ox.squeeze(iter_n, axes=[0])
                col = ox.gather([data, iter_scalar], axis=0)
                total = ox.add([total, col])
                return (is_true, total, total)

            final_total, scan_res = ox.loop(
                dim_0,
                is_true,
                range_body,
                inputs=[zeros],
                outputs=['final_total', 'scan_res'])
            return final_total, scan_res
예제 #2
0
def test_optional_inputs(runner):
    input_ids = np.array([1, 2]).astype(np.int32)
    test_model = OptionalInputs()
    exp0 = test_model(input_ids)
    exp1 = test_model(input_ids, np.array([1, 2]).astype(np.int32))
    oxml = keras2onnx.convert_keras(test_model)
    assert runner('opt_inputs_0', oxml, [input_ids], exp0)

    from onnxconverter_common.onnx_fx import GraphFunctionType as _Ty
    oxml1 = keras2onnx.convert_keras(test_model, initial_types=(_Ty.I32(['N']), _Ty.I32(['N'])))
    assert runner('opt_inputs_1', oxml1, [input_ids, np.array([1, 2]).astype(np.int32)], exp1)
예제 #3
0
    def test_float16_with_loop(self):
        @onnx_function(outputs=['y1', 'y2'],
                       input_types=[_Ty.F([None, None])],
                       output_types=[_Ty.F([None]),
                                     _Ty.F([None, None])])
        def loop_test(data):
            ox = data.ox
            shape = ox.shape(data)
            dim_0 = ox.gather([shape, ox.constant(value=0)], axis=0)
            dim_1 = ox.gather(
                [shape,
                 ox.constant(value=np.array([1], dtype=np.int64))],
                axis=0)
            zeros = ox.constant_of_shape(dim_1, value=0.0)
            is_true = ox.constant(value=True)

            @onnx_function(outputs=['c_o', 'total_o', 'scan_o'],
                           output_types=[_Ty.b,
                                         _Ty.F([None]),
                                         _Ty.F([None])],
                           input_types=[_Ty.I([1]), _Ty.b,
                                        _Ty.F([None])])
            def range_body(iter_n, cond, total):
                ox = iter_n.ox
                iter_scalar = ox.squeeze(iter_n, axes=[0])
                col = ox.gather([data, iter_scalar], axis=0)
                total = ox.add([total, col])
                return (is_true, total, total)

            final_total, scan_res = ox.loop(
                dim_0,
                is_true,
                range_body,
                inputs=[zeros],
                outputs=['final_total', 'scan_res'])
            return final_total, scan_res

        m1 = np.array([[2, 3], [4, 5], [6, 7]], dtype=np.float32)
        expected_res = loop_test(m1)

        model = loop_test.to_model()
        f16model = convert_float_to_float16(copy.deepcopy(model))
        actual_res = _ort_inference(f16model, {'data': m1.astype(np.float16)})
        for expected, actual in zip(expected_res, actual_res):
            self.assertTrue(np.allclose(expected, actual))
            self.assertTrue(actual.dtype == np.float16)

        f16model2 = convert_float_to_float16(copy.deepcopy(model),
                                             keep_io_types=True)
        actual_res2 = _ort_inference(f16model2, {'data': m1})
        for expected, actual2 in zip(expected_res, actual_res2):
            self.assertTrue(np.allclose(expected, actual2))
            self.assertTrue(actual2.dtype == np.float32)
예제 #4
0
    def test_float16(self):
        @onnx_function(outputs=['z'],
                       input_types=(_Ty.F([1, 1, 6, 1])),
                       output_types=[_Ty.f])
        def transpose_n_matmul(x):
            ox = x.ox  # type: OnnxOperatorBuilderX
            wm = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                           12]).astype(np.float32).reshape([2, 6])
            b = ox.constant(value=wm)
            a = ox.transpose(x, perm=[0, 1, 3, 2])
            c = ox.transpose(b, perm=[1, 0])
            return ox.matmul([a, c])

        m1 = np.array([[2, 3], [4, 5],
                       [6, 7]]).astype(np.float32).reshape([1, 1, 6, 1])
        expected = transpose_n_matmul(m1)
        model = transpose_n_matmul.to_model()
        f16model = convert_float_to_float16(copy.deepcopy(model))
        actual = _ort_inference(f16model, {'x': m1.astype(np.float16)})
        self.assertTrue(np.allclose(expected, actual))

        f16model2 = convert_float_to_float16(copy.deepcopy(model),
                                             keep_io_types=True)
        actual2 = _ort_inference(f16model2, {'x': m1})
        self.assertTrue(np.allclose(expected, actual2))
예제 #5
0
        def loop_test(len):
            ox = len.ox
            s_len = ox.squeeze(len, axes=[0])
            is_true = ox.constant(value=True)

            @onnx_function(outputs=['c_o', 'i_o', 'j_o', 'all_i', 'all_j'],
                           output_types=[_Ty.b, _Ty.f, _Ty.f, _Ty.f, _Ty.f],
                           input_types=[_Ty.I([1]), _Ty.b, _Ty.F([1]), _Ty.F([1])])
            def range_body(iter_n, cond, i, j):
                return (is_true,
                        i + i.ox.constant(value=1.0), j + 2.0, i, j)

            one_c = ox.constant(value=-1.0)
            y1, y2, y3, y4 = ox.loop(s_len, is_true, range_body, inputs=[one_c, one_c],
                                     outputs=['y1_o', 'y2_o', 'y3_o', 'y4_o'])
            return y1, y2, y3, y4
예제 #6
0
    def test_loop(self):
        @onnx_function(outputs=['y1', 'y2', 'y3', 'y4'],
                       input_types=[_Ty.I([1])],
                       output_types=[_Ty.F([None]), _Ty.F([None]), _Ty.F([None, 1]), _Ty.F([None, 1])])
        def loop_test(len):
            ox = len.ox
            s_len = ox.squeeze(len, axes=[0])
            is_true = ox.constant(value=True)

            @onnx_function(outputs=['c_o', 'i_o', 'j_o', 'all_i', 'all_j'],
                           output_types=[_Ty.b, _Ty.f, _Ty.f, _Ty.f, _Ty.f],
                           input_types=[_Ty.I([1]), _Ty.b, _Ty.F([1]), _Ty.F([1])])
            def range_body(iter_n, cond, i, j):
                return (is_true,
                        i + i.ox.constant(value=1.0), j + 2.0, i, j)

            one_c = ox.constant(value=-1.0)
            y1, y2, y3, y4 = ox.loop(s_len, is_true, range_body, inputs=[one_c, one_c],
                                     outputs=['y1_o', 'y2_o', 'y3_o', 'y4_o'])
            return y1, y2, y3, y4

        self.assertEqual(
            loop_test(np.array([16], dtype=np.int64))[2][4], 3.0)
    def test_matmul_opt(self):
        @onnx_function(outputs=['z'],
                       input_types=(_Ty.F([1, 1, 6, 1])),
                       output_types=[_Ty.f])
        def transpose_n_matmul(x):
            ox = x.ox  # type: OnnxOperatorBuilderX
            wm = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]).astype(np.float32).reshape([2, 6])
            b = ox.constant(value=wm)
            a = ox.transpose(x, perm=[0, 1, 3, 2])
            c = ox.transpose(b, perm=[1, 0])
            return ox.matmul([a, c])

        m1 = np.array([[2, 3], [4, 5], [6, 7]]).astype(np.float32).reshape([1, 1, 6, 1])
        expected = transpose_n_matmul(m1)
        opted = optimize_onnx_model(transpose_n_matmul.to_model())
        actual = _ort_inference(opted, {'x': m1})
        self.assertTrue(np.allclose(expected, actual), "The result mismatch")
                       cast_batch, op_version=operator.target_opset, axes=[0])
    apply_cast(scope, cast_batch, operator.output_full_names[2], container, to=onnx_proto.TensorProto.INT32)

    apply_identity(scope, box_batch, operator.output_full_names[0], container)
    apply_identity(scope, score_batch, operator.output_full_names[1], container)


set_converter(YOLONMSLayer, convert_NMSLayer)

yolo_model_graph_tiny = None
evaluation_model_graph_tiny = None
nms_model_graph_tiny = None
num_classes = 20

@Graph.trace(
    input_types=[_Ty.F(shape=['N', 3, 'M1', 'M2']), _Ty.F(shape=['N', 2])],
    output_types=[_Ty.F(shape=[1, 'M1', 4]), _Ty.F(shape=[1, num_classes, 'M2']), _Ty.I32(shape=[1, 'M3', 3])],
    outputs=["yolonms_layer_1", "yolonms_layer_1_1", "yolonms_layer_1_2"])
def combine_model_tiny(input_1, image_shape):
    global yolo_model_graph_tiny
    global evaluation_model_graph_tiny
    global nms_model_graph_tiny
    output_1 = yolo_model_graph_tiny(input_1)
    input_2 = output_1 + (image_shape,)
    yolo_evaluation_layer_1, yolo_evaluation_layer_2 = evaluation_model_graph_tiny(*input_2)
    nms_layer_1_1, nms_layer_1_2, nms_layer_1_3 = nms_model_graph_tiny(yolo_evaluation_layer_1, yolo_evaluation_layer_2)
    return nms_layer_1_1, nms_layer_1_2, nms_layer_1_3


yolo_model_graph = None
evaluation_model_graph = None
예제 #9
0
    def greedy_search(X, eos_id):
        """
        Args:
            X: sequence of input tokens, including EOS symbol, as integer indices into the input vocabulary
            eos_id: id of the EOS symbol in the output vocabulary
        """
        ox = X.ox
        data_0 = X
        data_0_shape = data_0.shape()
        data_0_mask = ox.constant_of_shape(data_0_shape, value=1.0)
        seq_len = data_0_shape[-1]
        data_0_index_range = ox.range(
            [ox.constant(value=0), seq_len,
             ox.constant(value=1)]).cast(to=ox.float)
        data_0_index_range = ox.unsqueeze(data_0_index_range, axes=[1, 2])
        max_len = seq_len * 3

        encoder_context_0 = encode_source(data_0=data_0,
                                          data_0_mask=data_0_mask,
                                          data_0_posrange=data_0_index_range)

        y_len_0 = ox.constant(value=0.0)
        logp, *out_decoder_states = decode_first(
            data_1_posrange=y_len_0,
            encoder_context_0=encoder_context_0,
            data_0_mask=data_0_mask)

        y_t = logp[0, 0, 0].argmax(
            axis=-1, keepdims=True)  # note: rank-1 tensor, not a scalar
        eos_token = eos_id + 0
        test_y_t = (y_t != eos_token)

        @Graph.trace(outputs=[
            'ty_t', 'y_t_o', *(f'ods_{i}' for i in range(decoder_state_dim)),
            'y_t_o2'
        ],
                     output_types=[_Ty.b, _Ty.i] +
                     [_Ty.f] * decoder_state_dim + [_Ty.i],
                     input_types=[_Ty.I([1]), _Ty.b, _Ty.i] +
                     [_Ty.f] * decoder_state_dim)
        def loop_body(
                iteration_count,
                condition,  # these are not actually used inside
                y_t,
                out_decoder_states_0,
                out_decoder_states_1,
                out_decoder_states_2,
                out_decoder_states_3,
                out_decoder_states_4,
                out_decoder_states_5,
                out_decoder_states_6,
                out_decoder_states_7,
                out_decoder_states_8,
                out_decoder_states_9,
                out_decoder_states_10,
                out_decoder_states_11):
            # @BUGBUG: Currently, we do not support variable number of arguments to the callable.
            # @TODO: We have the information from the type signature in Graph.trace(), so this should be possible.
            assert decoder_state_dim == 12, "Currently, decoder layers other than 6 require a manual code change"
            out_decoder_states = [
                out_decoder_states_0, out_decoder_states_1,
                out_decoder_states_2, out_decoder_states_3,
                out_decoder_states_4, out_decoder_states_5,
                out_decoder_states_6, out_decoder_states_7,
                out_decoder_states_8, out_decoder_states_9,
                out_decoder_states_10, out_decoder_states_11
            ]
            """
            Loop body follows the requirements of ONNX Loop:

            "The graph run each iteration.
            It has 2+N inputs: (iteration_num, condition, loop carried dependencies...).
            It has 1+N+K outputs: (condition, loop carried dependencies..., scan_outputs...).
            Each scan_output is created by concatenating the value of the specified output value at the end of each iteration of the loop.
            It is an error if the dimensions or data type of these scan_outputs change across loop iterations."

            Inputs:
                iteration_num (not used by our function)
                test_y_t: condition (not used as an input)
                y_t, *out_decoder_states: N=(decoder_state_dim+1) loop-carried dependencies

            Outputs:
                test_y_t: condition, return True if there is more to decode
                y_t, *out_decoder_states: N=(decoder_state_dim+1) loop-carried dependencies (same as in the Inputs section)
                y_t: K=1 outputs
            """
            pos = iteration_count + 1
            data_1_posrange = pos.cast(to=1).unsqueeze(axes=[0, 1, 2])
            logp, *out_decoder_states = decode_next(
                prev_word=y_t,
                data_1_posrange=data_1_posrange,
                encoder_context_0=encoder_context_0,
                data_0_mask=data_0_mask,
                **{
                    f"decoder_state_{i}": out_decoder_states[i]
                    for i in range(len(out_decoder_states))
                })
            y_t = logp[0, 0, 0].argmax(axis=-1, keepdims=True)
            test_y_t = (y_t != eos_token)
            return [test_y_t, y_t] + out_decoder_states + [y_t]

        # "Final N loop carried dependency values then K scan_outputs"
        ret_vals = ox.loop(
            max_len,
            test_y_t,
            loop_body,
            inputs=[y_t] + out_decoder_states,
            outputs=[
                'gy_t_o',
                *[f"gods_{i}" for i in range(len(out_decoder_states))],
                'greedy_out'
            ])
        y = ret_vals[-1]  # scan_output

        # we must prepend the very first token
        Y = ox.concat(
            [ox.unsqueeze(y_t), y], axis=0
        )  # note: y_t are rank-1 tensors, not scalars (ORT concat fails with scalars)
        return ox.squeeze(Y, axes=[1])
예제 #10
0
def compose_model_components_with_greedy_search(partial_models: Dict[str,
                                                                     Graph],
                                                num_decoder_layers: int):
    """
    Create an ONNX model that implements greedy search over the exported Marian pieces.

    Args:
        partial_models: models returned from export_marian_model_components()
        num_decoder_layers: must be specified, since it cannot be inferred from the model files presently (e.g. 6)
    Returns:
        ONNX model that can be called as
        result_ids = greedy_search_fn(np.array(source_ids, dtype=np.int64), np.array([target_eos_id], dtype=np.int64))[0]
    """
    decoder_state_dim = num_decoder_layers * 2  # each decoder has two state variables

    # load our partial functions
    # ONNX graph inputs and outputs are named but not ordered. Therefore, we must define the parameter order here.
    def define_parameter_order(graph, inputs, outputs):
        tmppath = "/tmp/tmpmodel.onnx"
        graph.save(
            tmppath
        )  # unfortunately, Graph.load() cannot load from another Graph, so use a tmp file
        graph = Graph.load(tmppath, inputs=inputs, outputs=outputs)
        os.unlink(tmppath)
        return graph

    encode_source = define_parameter_order(
        partial_models["encode_source"],
        inputs=['data_0', 'data_0_mask',
                'data_0_posrange'],  # define the order of arguments
        outputs=['encoder_context_0'])
    decode_first = define_parameter_order(
        partial_models["decode_first"],
        inputs=['data_1_posrange', 'encoder_context_0', 'data_0_mask'],
        outputs=['first_logits'] +
        [f"first_decoder_state_{i}" for i in range(decoder_state_dim)])
    decode_next = define_parameter_order(
        partial_models["decode_next"],
        inputs=[
            'prev_word', 'data_1_posrange', 'encoder_context_0', 'data_0_mask'
        ] + [f"decoder_state_{i}" for i in range(decoder_state_dim)],
        outputs=['next_logits'] +
        [f"next_decoder_state_{i}" for i in range(decoder_state_dim)])

    # create an ONNX graph that implements full greedy search
    # The greedy search is implemented via the @onnx_fx.Graph.trace decorator, which allows us to
    # author the greedy search in Python, similar to @CNTK.Function and PyTorch trace-based jit.
    # The decorator executes greedy_search() below on a dummy input in order to generate an ONNX graph
    # via invoking operators from the onnx.fx library.
    # The partial functions exported from Marian are invoked (=inlined) by this.
    # The result is a full ONNX graph that implements greedy search using the Marian model.
    @Graph.trace(input_types=[_Ty.I(shape=['N']),
                              _Ty.I([1])],
                 output_types=[_Ty.I(shape=['T'])],
                 outputs="Y")
    def greedy_search(X, eos_id):
        """
        Args:
            X: sequence of input tokens, including EOS symbol, as integer indices into the input vocabulary
            eos_id: id of the EOS symbol in the output vocabulary
        """
        ox = X.ox
        data_0 = X
        data_0_shape = data_0.shape()
        data_0_mask = ox.constant_of_shape(data_0_shape, value=1.0)
        seq_len = data_0_shape[-1]
        data_0_index_range = ox.range(
            [ox.constant(value=0), seq_len,
             ox.constant(value=1)]).cast(to=ox.float)
        data_0_index_range = ox.unsqueeze(data_0_index_range, axes=[1, 2])
        max_len = seq_len * 3

        encoder_context_0 = encode_source(data_0=data_0,
                                          data_0_mask=data_0_mask,
                                          data_0_posrange=data_0_index_range)

        y_len_0 = ox.constant(value=0.0)
        logp, *out_decoder_states = decode_first(
            data_1_posrange=y_len_0,
            encoder_context_0=encoder_context_0,
            data_0_mask=data_0_mask)

        y_t = logp[0, 0, 0].argmax(
            axis=-1, keepdims=True)  # note: rank-1 tensor, not a scalar
        eos_token = eos_id + 0
        test_y_t = (y_t != eos_token)

        @Graph.trace(outputs=[
            'ty_t', 'y_t_o', *(f'ods_{i}' for i in range(decoder_state_dim)),
            'y_t_o2'
        ],
                     output_types=[_Ty.b, _Ty.i] +
                     [_Ty.f] * decoder_state_dim + [_Ty.i],
                     input_types=[_Ty.I([1]), _Ty.b, _Ty.i] +
                     [_Ty.f] * decoder_state_dim)
        def loop_body(
                iteration_count,
                condition,  # these are not actually used inside
                y_t,
                out_decoder_states_0,
                out_decoder_states_1,
                out_decoder_states_2,
                out_decoder_states_3,
                out_decoder_states_4,
                out_decoder_states_5,
                out_decoder_states_6,
                out_decoder_states_7,
                out_decoder_states_8,
                out_decoder_states_9,
                out_decoder_states_10,
                out_decoder_states_11):
            # @BUGBUG: Currently, we do not support variable number of arguments to the callable.
            # @TODO: We have the information from the type signature in Graph.trace(), so this should be possible.
            assert decoder_state_dim == 12, "Currently, decoder layers other than 6 require a manual code change"
            out_decoder_states = [
                out_decoder_states_0, out_decoder_states_1,
                out_decoder_states_2, out_decoder_states_3,
                out_decoder_states_4, out_decoder_states_5,
                out_decoder_states_6, out_decoder_states_7,
                out_decoder_states_8, out_decoder_states_9,
                out_decoder_states_10, out_decoder_states_11
            ]
            """
            Loop body follows the requirements of ONNX Loop:

            "The graph run each iteration.
            It has 2+N inputs: (iteration_num, condition, loop carried dependencies...).
            It has 1+N+K outputs: (condition, loop carried dependencies..., scan_outputs...).
            Each scan_output is created by concatenating the value of the specified output value at the end of each iteration of the loop.
            It is an error if the dimensions or data type of these scan_outputs change across loop iterations."

            Inputs:
                iteration_num (not used by our function)
                test_y_t: condition (not used as an input)
                y_t, *out_decoder_states: N=(decoder_state_dim+1) loop-carried dependencies

            Outputs:
                test_y_t: condition, return True if there is more to decode
                y_t, *out_decoder_states: N=(decoder_state_dim+1) loop-carried dependencies (same as in the Inputs section)
                y_t: K=1 outputs
            """
            pos = iteration_count + 1
            data_1_posrange = pos.cast(to=1).unsqueeze(axes=[0, 1, 2])
            logp, *out_decoder_states = decode_next(
                prev_word=y_t,
                data_1_posrange=data_1_posrange,
                encoder_context_0=encoder_context_0,
                data_0_mask=data_0_mask,
                **{
                    f"decoder_state_{i}": out_decoder_states[i]
                    for i in range(len(out_decoder_states))
                })
            y_t = logp[0, 0, 0].argmax(axis=-1, keepdims=True)
            test_y_t = (y_t != eos_token)
            return [test_y_t, y_t] + out_decoder_states + [y_t]

        # "Final N loop carried dependency values then K scan_outputs"
        ret_vals = ox.loop(
            max_len,
            test_y_t,
            loop_body,
            inputs=[y_t] + out_decoder_states,
            outputs=[
                'gy_t_o',
                *[f"gods_{i}" for i in range(len(out_decoder_states))],
                'greedy_out'
            ])
        y = ret_vals[-1]  # scan_output

        # we must prepend the very first token
        Y = ox.concat(
            [ox.unsqueeze(y_t), y], axis=0
        )  # note: y_t are rank-1 tensors, not scalars (ORT concat fails with scalars)
        return ox.squeeze(Y, axes=[1])

    greedy_search.to_model()  # this triggers the model tracing (which is lazy)
    # optimize the final model as well
    # @BUGBUG: This leads to a malformed or hanging model.
    #_optimize_graph_in_place(greedy_search)
    return greedy_search
예제 #11
0
               to=onnx_proto.TensorProto.INT32)

    apply_identity(scope, box_batch, operator.output_full_names[0], container)
    apply_identity(scope, score_batch, operator.output_full_names[1],
                   container)


set_converter(YOLONMSLayer, convert_NMSLayer)

yolo_model_graph_tiny = None
evaluation_model_graph_tiny = None
nms_model_graph_tiny = None


@Graph.trace(
    input_types=[_Ty.F(shape=['N', 3, 'M1', 'M2']),
                 _Ty.F(shape=['N', 2])],
    output_types=[
        _Ty.F(shape=[1, 'M1', 4]),
        _Ty.F(shape=[1, 80, 'M2']),
        _Ty.I32(shape=[1, 'M3', 3])
    ],
    outputs=["yolonms_layer_1", "yolonms_layer_1_1", "yolonms_layer_1_2"])
def combine_model_tiny(input_1, image_shape):
    global yolo_model_graph_tiny
    global evaluation_model_graph_tiny
    global nms_model_graph_tiny
    output_1 = yolo_model_graph_tiny(input_1)
    input_2 = output_1 + (image_shape, )
    yolo_evaluation_layer_1, yolo_evaluation_layer_2 = evaluation_model_graph_tiny(
        *input_2)