Python ModelHelper.net Examples

Programming Language: Python

Namespace/Package Name: caffe2.python.model_helper

Class/Type: ModelHelper

Method/Function: net

Examples at hotexamples.com: 8

Python ModelHelper.net - 8 examples found. These are the top rated real world Python examples of caffe2.python.model_helper.ModelHelper.net extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ModelHelper(30)

Proto(10)

AddGradientOperators(10)

AveragedLoss(10)

net(7)

LabelCrossEntropy(6)

CreateDB(4)

Validate(4)

SoftmaxWithLoss(3)

GetParams(3)

SquaredL2Distance(3)

Copy(2)

get_param_info(1)

_rendezvous(1)

_Validate(1)

WeightedSum(1)

Relu(1)

SigmoidCrossEntropyWithLogits(1)

Sigmoid(1)

ExpandDims(1)

AddParameter(1)

InitProto(1)

HalfToFloat(1)

GetComputedParams(1)

GetAllParams(1)

C10LayerNorm_DontUseThisOpYet(1)

Example #1

Show file

File: backend.py Project: davidbuniat/onnx-caffe2

    def _create_lstm(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert LSTMs without access to the full model"
        assert pred_model is not None, "cannot convert LSTMs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys())

        input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for LSTM input failed")

        name = dummy_name()
        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM(
            pred_mh,
            input_blob,
            sequence_lens,
            [initial_h, initial_c],
            input_size,
            hidden_size,
            name,
            forward_only=True,
            return_params=True
        )

        # input and recurrence biases are squashed together in onnx but not in caffe2
        Bi = name + "_bias_i2h"
        Br = name + "_bias_gates"
        init_net.Slice(B, Bi, starts=[0*hidden_size], ends=[4*hidden_size])
        init_net.Slice(B, Br, starts=[4*hidden_size], ends=[8*hidden_size])

        # caffe2 has a different order from onnx. We need to rearrange
        #   i o f c -> i f o c
        reforms = ((W,  params['input']    ['weights'], [(0, input_size)]),
                   (R,  params['recurrent']['weights'], [(0, hidden_size)]),
                   (Bi, params['input']    ['biases'],  []),
                   (Br, params['recurrent']['biases'],  []))
        for name_from, name_to, extra_dims in reforms:
            xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")]
            for i, x in enumerate([xi, xo, xf, xc]):
                dim0 = i * hidden_size, (i+1) * hidden_size
                starts, ends = zip(dim0, *extra_dims)
                init_net.Slice(name_from, x, starts=starts, ends=ends)
            init_net.Concat([xi, xf, xo, xc], [name_to, dummy_name()], axis=0)

        pred_mh.net = pred_mh.net.Clone(
            "dummy-clone-net",
            blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }
        )

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))

Example #2

Show file

File: backend.py Project: teo251/onnx-caffe2

    def _create_rnn(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs)  # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        activation = attrs.pop('activations')[0]
        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n,
                                              input_blob, W)
        if input_size is None:
            raise RuntimeError(
                "best-effort shape inference for RNN input failed")

        name = dummy_name()
        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        # input and recurrence biases are squashed together in onnx but not in caffe2
        Bi = name + "/i2h_b"
        Br = name + "/gates_t_b"
        init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[1 * hidden_size])
        init_net.Slice(B, Br, starts=[1 * hidden_size], ends=[2 * hidden_size])

        hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(pred_mh,
                                                        input_blob,
                                                        sequence_lens,
                                                        [initial_h],
                                                        input_size,
                                                        hidden_size,
                                                        name,
                                                        drop_states=True,
                                                        forward_only=True,
                                                        activation=activation)

        init_net.Copy(W, name + '/i2h_w')
        init_net.Copy(R, name + '/gates_t_w')

        pred_mh.net = pred_mh.net.Clone("dummy-clone-net",
                                        blob_remap={
                                            hidden_t_all: n.outputs[0],
                                            hidden_t_last: n.outputs[1]
                                        })

        return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))

Example #3

Show file

File: backend.py Project: teo251/onnx-caffe2

    def _create_gru(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert GRUs without access to the full model"
        assert pred_model is not None, "cannot convert GRUs without access to the full model"

        attrs = dict(n.attrs)  # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        linear_before_reset = attrs.pop('linear_before_reset')
        assert not attrs, "unsupported GRU attributes: " + str(attrs.keys())

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n,
                                              input_blob, W)
        if input_size is None:
            raise RuntimeError(
                "best-effort shape inference for GRU input failed")

        name = dummy_name()
        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        hidden_t_all, hidden_t_last = gru_cell.GRU(
            pred_mh,
            input_blob,
            sequence_lens, [initial_h],
            input_size,
            hidden_size,
            name,
            drop_states=True,
            forward_only=True,
            linear_before_reset=linear_before_reset)

        # input and recurrence biases are squashed together in onnx but not in caffe2
        Bi = name + "_bias_i2h"
        Br = name + "_bias_gates"
        init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[3 * hidden_size])
        init_net.Slice(B, Br, starts=[3 * hidden_size], ends=[6 * hidden_size])

        # caffe2 has a different order from onnx. We need to rearrange
        #  z r h  -> r z h
        #
        # TODO implement support for return_params in gru_cell.GRU.
        # Until then, hardcode blob names.
        reforms = ((W, 'i2h_w', True, [
            (0, input_size)
        ]), (R, 'gate_t_w', False, [(0, hidden_size)]),
                   (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, []))
        for name_from, name_to, do_concat, extra_dims in reforms:
            xz, xr, xh = [
                '%s/%s_%s' % (name, prefix, name_to)
                for prefix in ('update', 'reset', 'output')
            ]
            for i, x in enumerate([xz, xr, xh]):
                dim0 = i * hidden_size, (i + 1) * hidden_size
                starts, ends = zip(dim0, *extra_dims)
                init_net.Slice(name_from, x, starts=starts, ends=ends)
            if do_concat:
                init_net.Concat([xr, xz, xh],
                                ['%s/%s' % (name, name_to),
                                 dummy_name()],
                                axis=0)

        pred_mh.net = pred_mh.net.Clone("dummy-clone-net",
                                        blob_remap={
                                            hidden_t_all: n.outputs[0],
                                            hidden_t_last: n.outputs[1]
                                        })

        return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))

Example #4

Show file

File: backend.py Project: tlwzzy/caffe2

    def _create_gru(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert GRUs without access to the full model"
        assert pred_model is not None, "cannot convert GRUs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        linear_before_reset = attrs.pop('linear_before_reset', 0)
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported GRU attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards GRU"

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for GRU input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_gru(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 6 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 3 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 3 * hidden_size],
                                ends  =[bias_offset + 6 * hidden_size])

            weight_offset = 3 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #  z r h  -> r z h
            reforms = ((W_, 'i2h_w',    True,  [(0,-1)]),
                       (R_, 'gate_t_w', False, [(0,-1)]),
                       (Bi, 'i2h_b',    True,  []),
                       (Br, 'gate_t_b', False, []))
            for name_from, name_to, do_concat, extra_dims in reforms:
                xz, xr, xh = ['%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output')]
                for i, x in enumerate([xz, xr, xh]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                if do_concat:
                    init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = gru_cell.GRU(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                linear_before_reset=linear_before_reset
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last = make_gru(0)
            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net",
                blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }
            )
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f = make_gru(0)
            hidden_t_all_b, hidden_t_last_b = make_gru(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()], axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()], axis=2)

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))

Example #5

Show file

File: backend.py Project: tlwzzy/caffe2

    def _create_rnn(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        activation = force_unicode(attrs.pop('activations', ('tanh',))[0])
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN"

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for RNN input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_rnn(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 2 * direction_offset * hidden_size
            init_net.Slice(B, name + "/i2h_b",
                           starts=[bias_offset + 0 * hidden_size],
                           ends  =[bias_offset + 1 * hidden_size])
            init_net.Slice(B, name + "/gates_t_b",
                           starts=[bias_offset + 1 * hidden_size],
                           ends  =[bias_offset + 2 * hidden_size])

            weight_offset = direction_offset * hidden_size
            init_net.Slice(W, name + '/i2h_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends  =[weight_offset + 1 * hidden_size,-1])
            init_net.Slice(R, name + '/gates_t_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends  =[weight_offset + 1 * hidden_size,-1])

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                activation=activation
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last = make_rnn(0)
            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net",
                blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }
            )
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f = make_rnn(0)
            hidden_t_all_b, hidden_t_last_b = make_rnn(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()], axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()], axis=2)

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))

Example #6

Show file

File: backend.py Project: ssyd/caffe2

    def _create_lstm(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert LSTMs without access to the full model"
        assert pred_model is not None, "cannot convert LSTMs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards LSTM"

        input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for LSTM input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_lstm(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 8 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 4 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 4 * hidden_size],
                                ends  =[bias_offset + 8 * hidden_size])

            weight_offset = 4 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #   i o f c -> i f o c
            reforms = ((W_, 'i2h_w',     [(0, -1)]),
                       (R_, 'gates_t_w', [(0, -1)]),
                       (Bi, 'i2h_b'    , []),
                       (Br, 'gates_t_b', []))
            for name_from, name_to, extra_dims in reforms:
                xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")]
                for i, x in enumerate([xi, xo, xf, xc]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                init_net.Concat([xi, xf, xo, xc], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])
            initial_c_sliced = name + '/initial_c'
            init_net.Slice(initial_c, initial_c_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last, _, cell_last, params = rnn_cell.LSTM(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced, initial_c_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=False,
                forward_only=True,
                return_params=True
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last, cell_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last, cell_last = make_lstm(0)

            # in the forward case, storage is shared between the three
            # outputs. We need to decouple them so that the
            # VariableLengthSequencePadding only mutates n.outputs[0]
            pred_mh.net.Copy(hidden_t_last, n.outputs[1])
            pred_mh.net.Copy(cell_last, n.outputs[2])

            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net",
                blob_remap={ hidden_t_all: n.outputs[0] }
            )
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f, cell_last_f = make_lstm(0)
            hidden_t_all_b, hidden_t_last_b, cell_last_b = make_lstm(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()], axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()], axis=0)
            pred_mh.net.Concat([cell_last_f, cell_last_b],
                               [n.outputs[2], dummy_name()], axis=0)

        if sequence_lens is not None:
            pred_mh.net.VariableLengthSequencePadding(
                [n.outputs[0], sequence_lens], [n.outputs[0]])

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))

Example #7

Show file

File: backend.py Project: roypj/pytorch

    def _create_rnn_variant(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs)  # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        direction = force_unicode(attrs.pop('direction', 'forward'))

        if n.op_type == 'RNN':
            activation = force_unicode(attrs.pop('activations', ('tanh', ))[0])
        elif n.op_type == 'GRU':
            linear_before_reset = attrs.pop('linear_before_reset', 0)

        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'
                             ], "unsupported backwards RNN/GRU/LSTM"

        if n.op_type in ['RNN', 'GRU']:
            input_blob, W, R, B, sequence_lens, initial_h = n.inputs
        elif n.op_type == 'LSTM':
            input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        for x in itertools.chain(init_model.graph.input,
                                 init_model.graph.value_info,
                                 pred_model.graph.input,
                                 pred_model.graph.value_info):
            if x.name == W:
                input_size = x.type.tensor_type.shape.dim[2].dim_value
                break
        else:
            raise RuntimeError(
                "best-effort shape inference for RNN/GRU/LSTM failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        init_net.Reshape(W, [W, cls.dummy_name()], shape=[1, -1, 0])
        init_net.Squeeze(W, W, dims=[0])
        init_net.Reshape(R, [R, cls.dummy_name()], shape=[1, -1, 0])
        init_net.Squeeze(R, R, dims=[0])
        init_net.Reshape(B, [B, cls.dummy_name()], shape=[1, -1])
        init_net.Squeeze(B, B, dims=[0])

        if n.op_type == 'RNN':

            def reform(*args):
                pass

            def make_cell(*args, **kwargs):
                return rnn_cell.BasicRNN(*args,
                                         activation=activation,
                                         **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')],
                    sequence_lens, pred_mh, init_net, input_size, hidden_size,
                    1, direction_offset, "/i2h_b", "/gates_t_b", "/i2h_w",
                    "/gates_t_w", reform, make_cell, lambda x: x)

        elif n.op_type == 'GRU':

            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #  z r h  -> r z h
                reforms = ((W_, 'i2h_w', True, [
                    (0, -1)
                ]), (R_, 'gate_t_w', False, [
                    (0, -1)
                ]), (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['update', 'reset', 'output'],
                                        [1, 0, 2])

            def make_cell(*args, **kwargs):
                return gru_cell.GRU(*args,
                                    linear_before_reset=linear_before_reset,
                                    **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')],
                    sequence_lens, pred_mh, init_net, input_size, hidden_size,
                    3, direction_offset, "_bias_i2h", "_bias_gates",
                    "/i2h_w_pre", "/gates_t_w_pre", reform, make_cell,
                    lambda x: x)

        elif n.op_type == 'LSTM':

            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #   i o f c -> i f o c
                reforms = ((W_, 'i2h_w', True, [
                    (0, -1)
                ]), (R_, 'gates_t_w', True, [
                    (0, -1)
                ]), (Bi, 'i2h_b', True, []), (Br, 'gates_t_b', True, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['input', 'output', 'forget', 'cell'],
                                        [0, 2, 1, 3])

            def make_cell(*args, **kwargs):
                return rnn_cell.LSTM(*args, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h'),
                                          (initial_c, '/initial_c')],
                    sequence_lens, pred_mh, init_net, input_size, hidden_size,
                    4, direction_offset, "/i2h_b", "/gates_t_b", "/i2h_w",
                    "/gates_t_w", reform, make_cell,
                    lambda x: [x[0], x[1], x[3]])

        if direction == 'forward':
            outputs = make_rnn(0)

            # in the forward case, storage is shared between the
            # last outputs. We need to decouple them so that the
            # VariableLengthSequencePadding only mutates
            # n.outputs[0]
            for i in range(1, len(outputs)):
                pred_mh.net.Copy(outputs[i], n.outputs[i])

            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net", blob_remap={outputs[0]: n.outputs[0]})
        elif direction == 'bidirectional':
            outputs_f = make_rnn(0)
            outputs_b = make_rnn(1)

            pred_mh.net.Concat([outputs_f[0], outputs_b[0]],
                               [n.outputs[0], cls.dummy_name()],
                               axis=2)
            for i in range(1, len(n.outputs)):
                pred_mh.net.Concat(
                    [outputs_f[i], outputs_b[i]],
                    [n.outputs[i], cls.dummy_name()],
                    axis=0)

        if sequence_lens is not None:
            pred_mh.net.VariableLengthSequencePadding(
                [n.outputs[0], sequence_lens], [n.outputs[0]])

        return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))

Example #8

Show file

File: backend.py Project: RichieMay/pytorch

    def _create_rnn_variant(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        direction = force_unicode(attrs.pop('direction', 'forward'))

        if n.op_type == 'RNN':
            activation = force_unicode(attrs.pop('activations', ('tanh',))[0])
        elif n.op_type == 'GRU':
            linear_before_reset = attrs.pop('linear_before_reset', 0)

        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN/GRU/LSTM"

        if n.op_type in ['RNN', 'GRU']:
            input_blob, W, R, B, sequence_lens, initial_h = n.inputs
        elif n.op_type == 'LSTM':
            input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        for x in itertools.chain(init_model.graph.input,
                                 init_model.graph.value_info,
                                 pred_model.graph.input,
                                 pred_model.graph.value_info):
            if x.name == W:
                input_size = x.type.tensor_type.shape.dim[1].dim_value
                break
        else:
            raise RuntimeError("best-effort shape inference for RNN/GRU/LSTM failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        if n.op_type == 'RNN':
            def reform(*args):
                pass

            def make_cell(*args, **kwargs):
                return rnn_cell.BasicRNN(*args, activation=activation, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 1, direction_offset,
                    "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w",
                    reform, make_cell, lambda x: x)

        elif n.op_type == 'GRU':
            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #  z r h  -> r z h
                reforms = ((W_, 'i2h_w',    True,  [(0,-1)]),
                           (R_, 'gate_t_w', False, [(0,-1)]),
                           (Bi, 'i2h_b',    True,  []),
                           (Br, 'gate_t_b', False, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['update', 'reset', 'output'], [1, 0, 2])

            def make_cell(*args, **kwargs):
                return gru_cell.GRU(*args, linear_before_reset=linear_before_reset, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 3, direction_offset,
                    "_bias_i2h", "_bias_gates", "/i2h_w_pre", "/gates_t_w_pre",
                    reform, make_cell, lambda x: x)

        elif n.op_type == 'LSTM':
            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #   i o f c -> i f o c
                reforms = ((W_, 'i2h_w',     True, [(0, -1)]),
                           (R_, 'gates_t_w', True, [(0, -1)]),
                           (Bi, 'i2h_b'    , True, []),
                           (Br, 'gates_t_b', True, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['input', 'output', 'forget', 'cell'], [0, 2, 1, 3])

            def make_cell(*args, **kwargs):
                return rnn_cell.LSTM(*args, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h'), (initial_c, '/initial_c')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 4, direction_offset,
                    "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w",
                    reform, make_cell, lambda x: [x[0], x[1], x[3]])

        if direction == 'forward':
            outputs = make_rnn(0)

            # in the forward case, storage is shared between the
            # last outputs. We need to decouple them so that the
            # VariableLengthSequencePadding only mutates
            # n.outputs[0]
            for i in range(1, len(outputs)):
                pred_mh.net.Copy(outputs[i], n.outputs[i])

            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net", blob_remap={ outputs[0]: n.outputs[0] }
            )
        elif direction == 'bidirectional':
            outputs_f = make_rnn(0)
            outputs_b = make_rnn(1)

            pred_mh.net.Concat([outputs_f[0], outputs_b[0]],
                               [n.outputs[0], cls.dummy_name()], axis=2)
            for i in range(1, len(n.outputs)):
                pred_mh.net.Concat([outputs_f[i], outputs_b[i]],
                                   [n.outputs[i], cls.dummy_name()], axis=0)

        if sequence_lens is not None:
            pred_mh.net.VariableLengthSequencePadding(
                [n.outputs[0], sequence_lens], [n.outputs[0]])

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))