Exemple #1
0
    def test_convolution_sync(self, net_type, num_workers, do, engine):
        m = ModelHelper(name="test_model")
        n = 1
        d = 2
        depth = 3
        iters = 5
        h = 5
        w = 5
        workspace.ResetWorkspace()

        use_cudnn = (engine == 'CUDNN')

        np.random.seed(1701)
        # Build a binary tree of conv layers, summing at each node.
        for i in reversed(range(depth)):
            for j in range(2**i):
                bottom_1 = "{}_{}".format(i + 1, 2 * j)
                bottom_2 = "{}_{}".format(i + 1, 2 * j + 1)
                mid_1 = "{}_{}_m".format(i + 1, 2 * j)
                mid_2 = "{}_{}_m".format(i + 1, 2 * j + 1)
                top = "{}_{}".format(i, j)
                w1, b1, w2, b2 = np.random.randn(4).tolist()
                brew.conv(m,
                          bottom_1,
                          mid_1,
                          dim_in=d,
                          dim_out=d,
                          kernel=3,
                          weight_init=('ConstantFill', dict(value=w1)),
                          bias_init=('ConstantFill', dict(value=b1)),
                          cudnn_state=np.random.randint(0, 3),
                          stride=1,
                          pad=1,
                          deterministic=1,
                          use_cudnn=use_cudnn,
                          engine=engine)
                brew.conv(m,
                          bottom_2,
                          mid_2,
                          dim_in=d,
                          dim_out=d,
                          kernel=3,
                          stride=1,
                          pad=1,
                          weight_init=('ConstantFill', dict(value=w2)),
                          bias_init=('ConstantFill', dict(value=b2)),
                          deterministic=1,
                          cudnn_state=np.random.randint(0, 3),
                          use_cudnn=use_cudnn,
                          engine=engine)
                m.net.Sum([mid_1, mid_2], top)

        m.net.Flatten(["0_0"], ["0_0_flat"])
        m.net.SquaredL2Distance(["0_0_flat", "label"], "xent")
        m.net.AveragedLoss("xent", "loss")
        input_to_grad = m.AddGradientOperators(["loss"])
        m.Proto().device_option.CopyFrom(do)
        m.param_init_net.Proto().device_option.CopyFrom(do)
        m.Proto().type = net_type
        m.Proto().num_workers = num_workers
        self.ws.run(m.param_init_net)

        def run():
            import numpy as np
            np.random.seed(1701)
            input_blobs = ["{}_{}".format(depth, j) for j in range(2**depth)]
            for input_blob in input_blobs:
                self.ws.create_blob(input_blob).feed(np.random.randn(
                    n, d, h, w).astype(np.float32),
                                                     device_option=do)
                self.ws.create_blob("label").feed(np.random.randn(
                    n, d * h * w).astype(np.float32),
                                                  device_option=do)
            self.ws.run(m.net)
            gradients = [
                self.ws.blobs[str(input_to_grad[input_blob])].fetch()
                for input_blob in input_blobs
            ]
            return gradients

        outputs = [run() for _ in range(iters)]
        for output in outputs[1:]:
            np.testing.assert_array_equal(outputs[0], output)
            np.testing.assert_allclose(np.sum(np.square(output)),
                                       1763719461732352.0,
                                       rtol=1e-5)
Exemple #2
0
    def _create_gru(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert GRUs without access to the full model"
        assert pred_model is not None, "cannot convert GRUs without access to the full model"

        attrs = dict(n.attrs)  # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        linear_before_reset = attrs.pop('linear_before_reset')
        assert not attrs, "unsupported GRU attributes: " + str(attrs.keys())

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n,
                                              input_blob, W)
        if input_size is None:
            raise RuntimeError(
                "best-effort shape inference for GRU input failed")

        name = dummy_name()
        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        hidden_t_all, hidden_t_last = gru_cell.GRU(
            pred_mh,
            input_blob,
            sequence_lens, [initial_h],
            input_size,
            hidden_size,
            name,
            drop_states=True,
            forward_only=True,
            linear_before_reset=linear_before_reset)

        # input and recurrence biases are squashed together in onnx but not in caffe2
        Bi = name + "_bias_i2h"
        Br = name + "_bias_gates"
        init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[3 * hidden_size])
        init_net.Slice(B, Br, starts=[3 * hidden_size], ends=[6 * hidden_size])

        # caffe2 has a different order from onnx. We need to rearrange
        #  z r h  -> r z h
        #
        # TODO implement support for return_params in gru_cell.GRU.
        # Until then, hardcode blob names.
        reforms = ((W, 'i2h_w', True, [
            (0, input_size)
        ]), (R, 'gate_t_w', False, [(0, hidden_size)]),
                   (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, []))
        for name_from, name_to, do_concat, extra_dims in reforms:
            xz, xr, xh = [
                '%s/%s_%s' % (name, prefix, name_to)
                for prefix in ('update', 'reset', 'output')
            ]
            for i, x in enumerate([xz, xr, xh]):
                dim0 = i * hidden_size, (i + 1) * hidden_size
                starts, ends = zip(dim0, *extra_dims)
                init_net.Slice(name_from, x, starts=starts, ends=ends)
            if do_concat:
                init_net.Concat([xr, xz, xh],
                                ['%s/%s' % (name, name_to),
                                 dummy_name()],
                                axis=0)

        pred_mh.net = pred_mh.net.Clone("dummy-clone-net",
                                        blob_remap={
                                            hidden_t_all: n.outputs[0],
                                            hidden_t_last: n.outputs[1]
                                        })

        return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Exemple #3
0
    def _create_rnn_variant(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs)  # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        direction = force_unicode(attrs.pop('direction', 'forward'))

        if n.op_type == 'RNN':
            activation = force_unicode(attrs.pop('activations', ('tanh', ))[0])
        elif n.op_type == 'GRU':
            linear_before_reset = attrs.pop('linear_before_reset', 0)

        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'
                             ], "unsupported backwards RNN/GRU/LSTM"

        if n.op_type in ['RNN', 'GRU']:
            input_blob, W, R, B, sequence_lens, initial_h = n.inputs
        elif n.op_type == 'LSTM':
            input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        for x in itertools.chain(init_model.graph.input,
                                 init_model.graph.value_info,
                                 pred_model.graph.input,
                                 pred_model.graph.value_info):
            if x.name == W:
                input_size = x.type.tensor_type.shape.dim[2].dim_value
                break
        else:
            raise RuntimeError(
                "best-effort shape inference for RNN/GRU/LSTM failed")

        pred_mh = ModelHelper()
        init_net = core.Net("init-net")

        init_net.Reshape(W, [W, cls.dummy_name()], shape=[1, -1, 0])
        init_net.Squeeze(W, W, dims=[0])
        init_net.Reshape(R, [R, cls.dummy_name()], shape=[1, -1, 0])
        init_net.Squeeze(R, R, dims=[0])
        init_net.Reshape(B, [B, cls.dummy_name()], shape=[1, -1])
        init_net.Squeeze(B, B, dims=[0])

        if n.op_type == 'RNN':

            def reform(*args):
                pass

            def make_cell(*args, **kwargs):
                return rnn_cell.BasicRNN(*args,
                                         activation=activation,
                                         **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')],
                    sequence_lens, pred_mh, init_net, input_size, hidden_size,
                    1, direction_offset, "/i2h_b", "/gates_t_b", "/i2h_w",
                    "/gates_t_w", reform, make_cell, lambda x: x)

        elif n.op_type == 'GRU':

            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #  z r h  -> r z h
                reforms = ((W_, 'i2h_w', True, [
                    (0, -1)
                ]), (R_, 'gate_t_w', False, [
                    (0, -1)
                ]), (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['update', 'reset', 'output'],
                                        [1, 0, 2])

            def make_cell(*args, **kwargs):
                return gru_cell.GRU(*args,
                                    linear_before_reset=linear_before_reset,
                                    **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')],
                    sequence_lens, pred_mh, init_net, input_size, hidden_size,
                    3, direction_offset, "_bias_i2h", "_bias_gates",
                    "/i2h_w_pre", "/gates_t_w_pre", reform, make_cell,
                    lambda x: x)

        elif n.op_type == 'LSTM':

            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #   i o f c -> i f o c
                reforms = ((W_, 'i2h_w', True, [
                    (0, -1)
                ]), (R_, 'gates_t_w', True, [
                    (0, -1)
                ]), (Bi, 'i2h_b', True, []), (Br, 'gates_t_b', True, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['input', 'output', 'forget', 'cell'],
                                        [0, 2, 1, 3])

            def make_cell(*args, **kwargs):
                return rnn_cell.LSTM(*args, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h'),
                                          (initial_c, '/initial_c')],
                    sequence_lens, pred_mh, init_net, input_size, hidden_size,
                    4, direction_offset, "/i2h_b", "/gates_t_b", "/i2h_w",
                    "/gates_t_w", reform, make_cell,
                    lambda x: [x[0], x[1], x[3]])

        if direction == 'forward':
            outputs = make_rnn(0)

            # in the forward case, storage is shared between the
            # last outputs. We need to decouple them so that the
            # VariableLengthSequencePadding only mutates
            # n.outputs[0]
            for i in range(1, len(outputs)):
                pred_mh.net.Copy(outputs[i], n.outputs[i])

            if sequence_lens is not None:
                pred_mh.net.VariableLengthSequencePadding(
                    [outputs[0], sequence_lens], [outputs[0]])
            pred_mh.net.ExpandDims([outputs[0]], [n.outputs[0]], dims=[1])
        elif direction == 'bidirectional':
            outputs_f = make_rnn(0)
            outputs_b = make_rnn(1)

            concatted_output, _ = pred_mh.net.Concat(
                [outputs_f[0], outputs_b[0]],
                [cls.dummy_name(), cls.dummy_name()],
                axis=2)
            if sequence_lens is not None:
                pred_mh.net.VariableLengthSequencePadding(
                    [concatted_output, sequence_lens], [concatted_output])
            reshaped_output, _ = pred_mh.net.Reshape(
                concatted_output,
                [cls.dummy_name(), cls.dummy_name()],
                shape=[0, 0, -1, 2])
            pred_mh.net.Transpose(reshaped_output,
                                  n.outputs[0],
                                  axes=[0, 3, 1, 2])
            for i in range(1, len(n.outputs)):
                pred_mh.net.Concat(
                    [outputs_f[i], outputs_b[i]],
                    [n.outputs[i], cls.dummy_name()],
                    axis=0)

        # We want to decide whether to put all of our weight-reshaping
        # operators in the init net or the predict net. We can put
        # them in the init net iff the inputs to those operators are
        # already available, either as graph initializers, or as the
        # output of other operators in the init net. The latter case
        # occurs, for example, when exporting from pytorch to onnx.
        # In most production use, we expect has_initializers to be
        # true.
        initializers = {i.name for i in init_model.graph.initializer}
        outputs = {
            output
            for node in init_model.graph.node for output in node.output
        }
        has_initializers = all(x in initializers or x in outputs
                               for x in (W, R, B))

        pred_ops = []
        init_ops = []
        (init_ops if has_initializers else pred_ops).extend(
            init_net.Proto().op)
        pred_ops.extend(pred_mh.Proto().op)

        return Caffe2Ops(pred_ops, init_ops,
                         list(pred_mh.Proto().external_input))
Exemple #4
0
    def _create_gru(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert GRUs without access to the full model"
        assert pred_model is not None, "cannot convert GRUs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        linear_before_reset = attrs.pop('linear_before_reset', 0)
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported GRU attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards GRU"

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for GRU input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_gru(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 6 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 3 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 3 * hidden_size],
                                ends  =[bias_offset + 6 * hidden_size])

            weight_offset = 3 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #  z r h  -> r z h
            reforms = ((W_, 'i2h_w',    True,  [(0,-1)]),
                       (R_, 'gate_t_w', False, [(0,-1)]),
                       (Bi, 'i2h_b',    True,  []),
                       (Br, 'gate_t_b', False, []))
            for name_from, name_to, do_concat, extra_dims in reforms:
                xz, xr, xh = ['%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output')]
                for i, x in enumerate([xz, xr, xh]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                if do_concat:
                    init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = gru_cell.GRU(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                linear_before_reset=linear_before_reset
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last = make_gru(0)
            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net",
                blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }
            )
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f = make_gru(0)
            hidden_t_all_b, hidden_t_last_b = make_gru(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()], axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()], axis=2)

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Exemple #5
0
    def _create_lstm(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert LSTMs without access to the full model"
        assert pred_model is not None, "cannot convert LSTMs without access to the full model"

        attrs = dict(n.attrs)  # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys())

        input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n,
                                              input_blob, W)
        if input_size is None:
            raise RuntimeError(
                "best-effort shape inference for LSTM input failed")

        name = dummy_name()
        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM(
            pred_mh,
            input_blob,
            sequence_lens, [initial_h, initial_c],
            input_size,
            hidden_size,
            name,
            drop_states=True,
            forward_only=True,
            return_params=True)

        # input and recurrence biases are squashed together in onnx but not in caffe2
        Bi = name + "_bias_i2h"
        Br = name + "_bias_gates"
        init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[4 * hidden_size])
        init_net.Slice(B, Br, starts=[4 * hidden_size], ends=[8 * hidden_size])

        # caffe2 has a different order from onnx. We need to rearrange
        #   i o f c -> i f o c
        reforms = ((W, params['input']['weights'],
                    [(0, input_size)]), (R, params['recurrent']['weights'], [
                        (0, hidden_size)
                    ]), (Bi, params['input']['biases'], []),
                   (Br, params['recurrent']['biases'], []))
        for name_from, name_to, extra_dims in reforms:
            xi, xo, xf, xc = [
                name_from + suffix for suffix in ("_i", "_o", "_f", "_c")
            ]
            for i, x in enumerate([xi, xo, xf, xc]):
                dim0 = i * hidden_size, (i + 1) * hidden_size
                starts, ends = zip(dim0, *extra_dims)
                init_net.Slice(name_from, x, starts=starts, ends=ends)
            init_net.Concat([xi, xf, xo, xc], [name_to, dummy_name()], axis=0)

        pred_mh.net = pred_mh.net.Clone("dummy-clone-net",
                                        blob_remap={
                                            hidden_t_all: n.outputs[0],
                                            hidden_t_last: n.outputs[1]
                                        })

        return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Exemple #6
0
    def _create_rnn(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        activation = force_unicode(attrs.pop('activations', ('tanh',))[0])
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN"

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for RNN input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_rnn(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 2 * direction_offset * hidden_size
            init_net.Slice(B, name + "/i2h_b",
                           starts=[bias_offset + 0 * hidden_size],
                           ends  =[bias_offset + 1 * hidden_size])
            init_net.Slice(B, name + "/gates_t_b",
                           starts=[bias_offset + 1 * hidden_size],
                           ends  =[bias_offset + 2 * hidden_size])

            weight_offset = direction_offset * hidden_size
            init_net.Slice(W, name + '/i2h_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends  =[weight_offset + 1 * hidden_size,-1])
            init_net.Slice(R, name + '/gates_t_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends  =[weight_offset + 1 * hidden_size,-1])

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                activation=activation
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last = make_rnn(0)
            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net",
                blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }
            )
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f = make_rnn(0)
            hidden_t_all_b, hidden_t_last_b = make_rnn(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()], axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()], axis=2)

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Exemple #7
0
    def _create_lstm(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert LSTMs without access to the full model"
        assert pred_model is not None, "cannot convert LSTMs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards LSTM"

        input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for LSTM input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_lstm(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 8 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 4 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 4 * hidden_size],
                                ends  =[bias_offset + 8 * hidden_size])

            weight_offset = 4 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #   i o f c -> i f o c
            reforms = ((W_, 'i2h_w',     [(0, -1)]),
                       (R_, 'gates_t_w', [(0, -1)]),
                       (Bi, 'i2h_b'    , []),
                       (Br, 'gates_t_b', []))
            for name_from, name_to, extra_dims in reforms:
                xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")]
                for i, x in enumerate([xi, xo, xf, xc]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                init_net.Concat([xi, xf, xo, xc], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])
            initial_c_sliced = name + '/initial_c'
            init_net.Slice(initial_c, initial_c_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last, _, cell_last, params = rnn_cell.LSTM(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced, initial_c_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=False,
                forward_only=True,
                return_params=True
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last, cell_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last, cell_last = make_lstm(0)

            # in the forward case, storage is shared between the three
            # outputs. We need to decouple them so that the
            # VariableLengthSequencePadding only mutates n.outputs[0]
            pred_mh.net.Copy(hidden_t_last, n.outputs[1])
            pred_mh.net.Copy(cell_last, n.outputs[2])

            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net",
                blob_remap={ hidden_t_all: n.outputs[0] }
            )
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f, cell_last_f = make_lstm(0)
            hidden_t_all_b, hidden_t_last_b, cell_last_b = make_lstm(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()], axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()], axis=0)
            pred_mh.net.Concat([cell_last_f, cell_last_b],
                               [n.outputs[2], dummy_name()], axis=0)

        if sequence_lens is not None:
            pred_mh.net.VariableLengthSequencePadding(
                [n.outputs[0], sequence_lens], [n.outputs[0]])

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Exemple #8
0
def double_matmul():
    model = ModelHelper(name="r")
    fc0 = brew.fc(model, "data", "fc0", 10, 10)
    fc1 = brew.fc(model, fc0, "fc1", 10, 10)
    model.Proto().external_output[:] = [str(fc0), str(fc1)]
    return model, [(1, 10)]
Exemple #9
0
from caffe2.python import brew, core, scope, workspace
from caffe2.python.modeling.parameter_info import ParameterTags
from caffe2.python.model_helper import ModelHelper
from caffe2.python.cnn import CNNModelHelper

import unittest
import numpy as np

m, k, n = (1, 28 * 28, 10)  # [m][k] * [k][n] = [m][n]
x = np.random.rand(m, k).astype(np.float32) - 0.5  # x = m*k 2D tensor

workspace.ResetWorkspace()  # clear workspace
workspace.FeedBlob("x", x)  # feed x as a blob
model = ModelHelper(name="test_model")  # create model

model.Proto()  # print model's protocol buffer before add operator
brew.fc(
    model, "x", "y", k, n
)  # fully connected NN, weight = k*n 2D tensor /// bias, y = m*n 2D tensor
brew.softmax(model, "y", "z")
model.Validate()
model.Proto()  # print model's protocol buffer after add operator

workspace.RunNetOnce(
    model.param_init_net)  # init [y_w(weight), y_b(bias) (randomize)]
# weight is 2D array, bias is 1D array
workspace.Blobs()  # print workspace's blobs
# workspace.FetchBlob("y_w")
# workspace.FetchBlob("y_b")

workspace.RunNetOnce(model.net)
Exemple #10
0
    def _create_rnn_variant(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        direction = force_unicode(attrs.pop('direction', 'forward'))

        if n.op_type == 'RNN':
            activation = force_unicode(attrs.pop('activations', ('tanh',))[0])
        elif n.op_type == 'GRU':
            linear_before_reset = attrs.pop('linear_before_reset', 0)

        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN/GRU/LSTM"

        if n.op_type in ['RNN', 'GRU']:
            input_blob, W, R, B, sequence_lens, initial_h = n.inputs
        elif n.op_type == 'LSTM':
            input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        for x in itertools.chain(init_model.graph.input,
                                 init_model.graph.value_info,
                                 pred_model.graph.input,
                                 pred_model.graph.value_info):
            if x.name == W:
                input_size = x.type.tensor_type.shape.dim[1].dim_value
                break
        else:
            raise RuntimeError("best-effort shape inference for RNN/GRU/LSTM failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        if n.op_type == 'RNN':
            def reform(*args):
                pass

            def make_cell(*args, **kwargs):
                return rnn_cell.BasicRNN(*args, activation=activation, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 1, direction_offset,
                    "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w",
                    reform, make_cell, lambda x: x)

        elif n.op_type == 'GRU':
            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #  z r h  -> r z h
                reforms = ((W_, 'i2h_w',    True,  [(0,-1)]),
                           (R_, 'gate_t_w', False, [(0,-1)]),
                           (Bi, 'i2h_b',    True,  []),
                           (Br, 'gate_t_b', False, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['update', 'reset', 'output'], [1, 0, 2])

            def make_cell(*args, **kwargs):
                return gru_cell.GRU(*args, linear_before_reset=linear_before_reset, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 3, direction_offset,
                    "_bias_i2h", "_bias_gates", "/i2h_w_pre", "/gates_t_w_pre",
                    reform, make_cell, lambda x: x)

        elif n.op_type == 'LSTM':
            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #   i o f c -> i f o c
                reforms = ((W_, 'i2h_w',     True, [(0, -1)]),
                           (R_, 'gates_t_w', True, [(0, -1)]),
                           (Bi, 'i2h_b'    , True, []),
                           (Br, 'gates_t_b', True, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['input', 'output', 'forget', 'cell'], [0, 2, 1, 3])

            def make_cell(*args, **kwargs):
                return rnn_cell.LSTM(*args, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h'), (initial_c, '/initial_c')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 4, direction_offset,
                    "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w",
                    reform, make_cell, lambda x: [x[0], x[1], x[3]])

        if direction == 'forward':
            outputs = make_rnn(0)

            # in the forward case, storage is shared between the
            # last outputs. We need to decouple them so that the
            # VariableLengthSequencePadding only mutates
            # n.outputs[0]
            for i in range(1, len(outputs)):
                pred_mh.net.Copy(outputs[i], n.outputs[i])

            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net", blob_remap={ outputs[0]: n.outputs[0] }
            )
        elif direction == 'bidirectional':
            outputs_f = make_rnn(0)
            outputs_b = make_rnn(1)

            pred_mh.net.Concat([outputs_f[0], outputs_b[0]],
                               [n.outputs[0], cls.dummy_name()], axis=2)
            for i in range(1, len(n.outputs)):
                pred_mh.net.Concat([outputs_f[i], outputs_b[i]],
                                   [n.outputs[i], cls.dummy_name()], axis=0)

        if sequence_lens is not None:
            pred_mh.net.VariableLengthSequencePadding(
                [n.outputs[0], sequence_lens], [n.outputs[0]])

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))