Esempio n. 1
0
    def _create_gemm(cls, n):
        (A, B, C) = n.inputs
        (Y,) = n.outputs
        alpha = n.attrs.get('alpha', 1.)
        beta = n.attrs.get('beta', 1.)

        ops = []
        if alpha != 1:
            scaled_A = dummy_name()
            ops.append(core.CreateOperator('Scale', [A], [scaled_A], scale=alpha))
            A = scaled_A
        if beta != 1:
            scaled_C = dummy_name()
            ops.append(core.CreateOperator('Scale', [C], [scaled_C], scale=beta))
            C = scaled_C

        AB = dummy_name()
        ops.append(core.CreateOperator('MatMul',
                                       [A, B],
                                       [AB],
                                       trans_a=n.attrs.get('transA', 0),
                                       trans_b=n.attrs.get('transB', 0)))
        ops.append(core.CreateOperator('Add',
                                       [AB, C],
                                       [Y],
                                       broadcast=n.attrs.get('broadcast', 0)))

        return ops
Esempio n. 2
0
    def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers):
        device_option = get_device_option(Device(device))

        init_model = ModelProto()
        init_model.ParseFromString(cls.optimize_onnx(onnx_model.SerializeToString(), init=True))
        cls._inplace_rewrite(init_model.graph)

        predict_model = ModelProto()
        predict_model.ParseFromString(cls.optimize_onnx(onnx_model.SerializeToString(), predict=True))
        cls._inplace_rewrite(predict_model.graph)

        init_net = caffe2_pb2.NetDef()
        predict_net = caffe2_pb2.NetDef()

        init_net.name = onnx_model.graph.name + '_init'
        predict_net.name = onnx_model.graph.name + '_predict'

        if include_initializers:
            init_net.op.extend(cls._create_tensor_filling_op(tp) for tp in onnx_model.graph.initializer)

        dummy_name(cls._all_names_in_graph(init_model.graph) | cls._all_names_in_graph(predict_model.graph))

        for net, model in ( (init_net, init_model), (predict_net, predict_model) ):
            net.device_option.CopyFrom(device_option)
            for node in model.graph.node:
                net.op.extend(cls._onnx_node_to_caffe2_op(node, opset_version))
            net.external_output.extend(
                value_info.name for value_info in model.graph.output)
            net.external_input.extend(
                value_info.name for value_info in model.graph.input)

        return init_net, predict_net
Esempio n. 3
0
    def _create_channel_shuffle(cls, op_def, shapes):
        x, = op_def.input
        y, = op_def.output
        n, c, h, w = shapes[x]
        args = {arg.name: arg for arg in op_def.arg}
        g = args['group'].i
        assert c % g == 0

        nodes = []

        tmp1 = dummy_name()
        nodes.append(helper.make_node(
            'Reshape',
            inputs=[x],
            outputs=[tmp1],
            shape=[n, g, c // g, h, w],
        ))

        tmp2 = dummy_name()
        nodes.append(helper.make_node(
            'Transpose',
            inputs=[tmp1],
            outputs=[tmp2],
            perm=[0, 2, 1, 3, 4],
        ))

        nodes.append(helper.make_node(
            'Reshape',
            inputs=[tmp2],
            outputs=[y],
            shape=[n, c, h, w],
        ))
        return nodes
Esempio n. 4
0
    def _create_gemm(cls, init_model, pred_model, n, opset_version):
        (A, B, C) = n.inputs
        (Y, ) = n.outputs
        alpha = n.attrs.get('alpha', 1.)
        beta = n.attrs.get('beta', 1.)

        ops = []
        if alpha != 1:
            scaled_A = dummy_name()
            ops.append(
                core.CreateOperator('Scale', [A], [scaled_A], scale=alpha))
            A = scaled_A
        if beta != 1:
            scaled_C = dummy_name()
            ops.append(
                core.CreateOperator('Scale', [C], [scaled_C], scale=beta))
            C = scaled_C

        trans_a = n.attrs.get('transA', 0)
        trans_b = n.attrs.get('transB', 0)
        broadcast = n.attrs.get('broadcast', 0)
        if not trans_a and trans_b and broadcast:
            ops.append(core.CreateOperator('FC', [A, B, C], [Y]))
        else:
            AB = dummy_name()
            ops.append(
                core.CreateOperator('MatMul', [A, B], [AB],
                                    trans_a=trans_a,
                                    trans_b=trans_b))
            ops.append(
                core.CreateOperator('Add', [AB, C], [Y], broadcast=broadcast))

        return ops
Esempio n. 5
0
    def _create_lstm(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert LSTMs without access to the full model"
        assert pred_model is not None, "cannot convert LSTMs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys())

        input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for LSTM input failed")

        name = dummy_name()
        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM(
            pred_mh,
            input_blob,
            sequence_lens,
            [initial_h, initial_c],
            input_size,
            hidden_size,
            name,
            forward_only=True,
            return_params=True
        )

        # input and recurrence biases are squashed together in onnx but not in caffe2
        Bi = name + "_bias_i2h"
        Br = name + "_bias_gates"
        init_net.Slice(B, Bi, starts=[0*hidden_size], ends=[4*hidden_size])
        init_net.Slice(B, Br, starts=[4*hidden_size], ends=[8*hidden_size])

        # caffe2 has a different order from onnx. We need to rearrange
        #   i o f c -> i f o c
        reforms = ((W,  params['input']    ['weights'], [(0, input_size)]),
                   (R,  params['recurrent']['weights'], [(0, hidden_size)]),
                   (Bi, params['input']    ['biases'],  []),
                   (Br, params['recurrent']['biases'],  []))
        for name_from, name_to, extra_dims in reforms:
            xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")]
            for i, x in enumerate([xi, xo, xf, xc]):
                dim0 = i * hidden_size, (i+1) * hidden_size
                starts, ends = zip(dim0, *extra_dims)
                init_net.Slice(name_from, x, starts=starts, ends=ends)
            init_net.Concat([xi, xf, xo, xc], [name_to, dummy_name()], axis=0)

        pred_mh.net = pred_mh.net.Clone(
            "dummy-clone-net",
            blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }
        )

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Esempio n. 6
0
    def _create_gemm(cls, op_def, shapes):
        x, w, b = op_def.input
        args = {arg.name: arg for arg in op_def.arg}
        y, = op_def.output
        x_shape = list(shapes[x])

        nodes = []
        if 'axis' in args:
            axis = args['axis'].i
            outer = np.prod(x_shape[:axis]).astype(int)
            inner = np.prod(x_shape[axis:]).astype(int)
            reshaped_x = dummy_name()
            nodes.append(
                helper.make_node(
                    'Reshape',
                    inputs=[x],
                    outputs=[reshaped_x],
                    shape=[outer, inner],
                ))
            x = reshaped_x

        if 'axis_w' in args:
            axis_w = args['axis_w'].i
            w_shape = shapes[w]
            outer = np.prod(w_shape[:axis_w]).astype(int).item()
            inner = np.prod(w_shape[axis_w:]).astype(int).item()
            reshaped_w = dummy_name()
            nodes.append(
                helper.make_node(
                    'Reshape',
                    inputs=[w],
                    outputs=[reshaped_w],
                    shape=[outer, inner],
                ))
            w = reshaped_w

        gemm_y_output = dummy_name() if 'axis' in args else y
        nodes.append(
            helper.make_node(
                'Gemm',
                inputs=[x, w, b],
                outputs=[gemm_y_output],
                name=op_def.name,
                transB=1,
                broadcast=1,
            ))

        if 'axis' in args:
            axis = args['axis'].i
            nodes.append(
                helper.make_node(
                    'Reshape',
                    inputs=[gemm_y_output],
                    outputs=[y],
                    shape=x_shape[:axis] + [-1],
                ))

        return nodes
Esempio n. 7
0
    def _create_slice(cls, op_def, shapes):
        node = cls._common_caffe2_op_to_onnx_node(op_def, shapes)
        attrs = {attr.name: attr for attr in node.attribute}

        nodes = []

        data = node.input[0]
        n_dims = len(shapes[data])
        if 'starts' in attrs:
            assert 'ends' in attrs
            assert len(node.input) == 1
            starts = dummy_name()
            ends = dummy_name()

            axes = dummy_name()
            nodes.append(
                helper.make_node('Constant',
                                 inputs=[],
                                 outputs=[starts],
                                 value=helper.make_tensor(
                                     name=axes,
                                     data_type=TensorProto.INT64,
                                     dims=(n_dims, ),
                                     vals=attrs.pop('starts').ints,
                                 )))
            nodes.append(
                helper.make_node('Constant',
                                 inputs=[],
                                 outputs=[ends],
                                 value=helper.make_tensor(
                                     name=axes,
                                     data_type=TensorProto.INT64,
                                     dims=(n_dims, ),
                                     vals=attrs.pop('ends').ints,
                                 )))
        else:
            assert len(node.input) == 3
            starts, ends = node.input[1:]

        axes = dummy_name()
        nodes.append(
            helper.make_node('Constant',
                             inputs=[],
                             outputs=[axes],
                             value=helper.make_tensor(
                                 name=axes,
                                 data_type=TensorProto.INT32,
                                 dims=(n_dims, ),
                                 vals=list(range(n_dims)),
                             )))
        node.input[:] = [data, axes, starts, ends]

        del node.attribute[:]
        node.attribute.extend(attrs.values())
        nodes.append(node)

        return nodes
Esempio n. 8
0
    def onnx_graph_to_caffe2_net(cls,
                                 graph_def,
                                 device="CPU",
                                 opset_version=_known_opset_version):
        device_option = get_device_option(Device(device))
        cls._inplace_rewrite(graph_def)
        if graph_def.initializer:
            init_net = cls.onnx_initializer_to_caffe2_init_net(
                graph_def.initializer)
            initialized = {init.name for init in graph_def.initializer}
        else:
            init_net = caffe2_pb2.NetDef()
            initialized = set()

        dummy_name(cls._all_names_in_graph(graph_def) | initialized)

        predict_net = caffe2_pb2.NetDef()
        predict_net.name = graph_def.name
        for node in graph_def.node:
            predict_net.op.extend(
                cls._onnx_node_to_caffe2_op(node, opset_version))

        predict_net.external_input.extend(value_info.name
                                          for value_info in graph_def.input)
        predict_net.external_output.extend(value_info.name
                                           for value_info in graph_def.output)

        # Caffe2 predictor requires all input blobs (including the
        # real model inputs) are initialized in init_net
        for value_info in graph_def.input:
            if value_info.name in initialized:
                continue
            op_def = caffe2_pb2.OperatorDef()
            op_def.output.extend([value_info.name])
            op_def.type = 'GivenTensorFill'

            shape = list(d.dim_value
                         for d in value_info.type.tensor_type.shape.dim)
            # TODO: Putting this in the init net will make it run faster, but it
            # causes some tests to fail...
            # shape = (1,)

            shape_arg = op_def.arg.add()
            shape_arg.name = 'shape'
            shape_arg.ints.extend(shape)

            values_arg = op_def.arg.add()
            values_arg.name = 'values'
            values_arg.floats.extend(np.ones(shape).flatten().tolist())

            init_net.op.extend([op_def])

        # Set the device option for the init_net and predict_net.
        init_net.device_option.CopyFrom(device_option)
        predict_net.device_option.CopyFrom(device_option)

        return init_net, predict_net
Esempio n. 9
0
    def test_dummy_name(self):
        dummy_name([])
        names_1 = [dummy_name() for _ in range(3)]
        dummy_name([])
        names_2 = [dummy_name() for _ in range(3)]
        self.assertEqual(names_1, names_2)

        dummy_name(names_1)
        names_3 = [dummy_name() for _ in range(3)]
        self.assertFalse(set(names_1) & set(names_3))
Esempio n. 10
0
 def _create_concat(cls, n, opset_version):
     # TODO: Caffe2 Concat has an extra output. It should be only
     # used when doing training, so we should change Caffe2 to allow
     # 1 output.
     op = cls._common_onnx_node_to_caffe2_op(n, opset_version)
     assert len(op.output) == 1
     op.output.append(dummy_name())
     return op
Esempio n. 11
0
    def onnx_graph_to_caffe2_net(cls, graph_def):
        cls._inplace_rewrite(graph_def)
        if graph_def.initializer:
            init_net = cls.onnx_initializer_to_caffe2_init_net(
                graph_def.initializer)
            initialized = {init.name for init in graph_def.initializer}
        else:
            init_net = caffe2_pb2.NetDef()
            initialized = set()

        dummy_name(cls._all_names_in_graph(graph_def) | initialized)

        predict_net = caffe2_pb2.NetDef()
        predict_net.name = graph_def.name
        for node in graph_def.node:
            predict_net.op.extend(cls._onnx_node_to_caffe2_op(node))

        predict_net.external_input.extend(value_info.name
                                          for value_info in graph_def.input)
        predict_net.external_output.extend(value_info.name
                                           for value_info in graph_def.output)

        # Caffe2 predictor requires all input blobs (including the
        # real model inputs) are initialized in init_net
        for value_info in graph_def.input:
            if value_info.name in initialized:
                continue
            op_def = caffe2_pb2.OperatorDef()
            op_def.output.extend([value_info.name])
            op_def.type = 'GivenTensorFill'

            shape = list(d.dim_value
                         for d in value_info.type.tensor_type.shape.dim)

            shape_arg = op_def.arg.add()
            shape_arg.name = 'shape'
            shape_arg.ints.extend(shape)

            values_arg = op_def.arg.add()
            values_arg.name = 'values'
            values_arg.floats.extend(np.ones(shape).flatten().tolist())

            init_net.op.extend([op_def])

        return init_net, predict_net
Esempio n. 12
0
 def _create_logsoftmax(cls, init_model, pred_model, n, opset_version):
     # NB: this implementation is not backward stable.
     (A, ) = n.inputs
     (Y, ) = n.outputs
     axis = n.attrs.get('axis', 1)
     ops = []
     softmax_A = dummy_name()
     ops.append(core.CreateOperator('Softmax', [A], [softmax_A], axis=axis))
     ops.append(core.CreateOperator('Log', [softmax_A], [Y]))
     return ops
Esempio n. 13
0
        def make_rnn(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 2 * direction_offset * hidden_size
            init_net.Slice(B,
                           name + "/i2h_b",
                           starts=[bias_offset + 0 * hidden_size],
                           ends=[bias_offset + 1 * hidden_size])
            init_net.Slice(B,
                           name + "/gates_t_b",
                           starts=[bias_offset + 1 * hidden_size],
                           ends=[bias_offset + 2 * hidden_size])

            weight_offset = direction_offset * hidden_size
            init_net.Slice(W,
                           name + '/i2h_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends=[weight_offset + 1 * hidden_size, -1])
            init_net.Slice(R,
                           name + '/gates_t_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends=[weight_offset + 1 * hidden_size, -1])

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h,
                           initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends=[direction_offset + 1, -1, -1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(
                pred_mh,
                input,
                sequence_lens, [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                activation=activation)

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last
Esempio n. 14
0
    def _create_rnn(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs)  # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        activation = attrs.pop('activations')[0]
        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n,
                                              input_blob, W)
        if input_size is None:
            raise RuntimeError(
                "best-effort shape inference for RNN input failed")

        name = dummy_name()
        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        # input and recurrence biases are squashed together in onnx but not in caffe2
        Bi = name + "/i2h_b"
        Br = name + "/gates_t_b"
        init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[1 * hidden_size])
        init_net.Slice(B, Br, starts=[1 * hidden_size], ends=[2 * hidden_size])

        hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(pred_mh,
                                                        input_blob,
                                                        sequence_lens,
                                                        [initial_h],
                                                        input_size,
                                                        hidden_size,
                                                        name,
                                                        drop_states=True,
                                                        forward_only=True,
                                                        activation=activation)

        init_net.Copy(W, name + '/i2h_w')
        init_net.Copy(R, name + '/gates_t_w')

        pred_mh.net = pred_mh.net.Clone("dummy-clone-net",
                                        blob_remap={
                                            hidden_t_all: n.outputs[0],
                                            hidden_t_last: n.outputs[1]
                                        })

        return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Esempio n. 15
0
    def _create_optimized_rnn(cls, n, opset_version):
        # TODO: we cheat and rely on the fact that ONNX weight layout matches
        # CuDNN's. Properly we should extract the weight tensor and invoke
        # RecurrentParamSet exposed by C2

        # TODO: fix Caffe2 to accept initial_h and initial_c as optional inputs
        assert len(n.inputs) == 4, 'All inputs need to be specified for now'
        assert len(n.outputs) == 3, 'All outputs need to be specified for now'
        (w, x, in_h, in_c) = n.inputs
        (y, out_h, out_c) = n.outputs

        op = core.CreateOperator(
            'Recurrent',
            [x, in_h, in_c, w],
            [y, out_h, out_c, dummy_name(), dummy_name()],
            rnn_mode=n.attrs['cell_type'],
            bidirectional=n.attrs.get('directions', 1) - 1,
            hidden_size=n.attrs['hidden_size'],
            num_layers=n.attrs.get('num_layers', 1),
            input_mode='skip' if n.attrs.get('skip_input_transform', 0)
            else 'linear')
        return op
Esempio n. 16
0
 def _create_reshape(cls, n):
     c2_op = cls._common_onnx_node_to_caffe2_op(n)
     # Caffe2 has an extra output
     c2_op.output.append(dummy_name())
     return c2_op
Esempio n. 17
0
 def test_dummy_name(self):
     n1 = dummy_name()
     n2 = dummy_name()
     assert n1 != n2, "Got same names in different calls: {}".format(n1)
Esempio n. 18
0
        def make_gru(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 6 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 3 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 3 * hidden_size],
                                ends  =[bias_offset + 6 * hidden_size])

            weight_offset = 3 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #  z r h  -> r z h
            reforms = ((W_, 'i2h_w',    True,  [(0,-1)]),
                       (R_, 'gate_t_w', False, [(0,-1)]),
                       (Bi, 'i2h_b',    True,  []),
                       (Br, 'gate_t_b', False, []))
            for name_from, name_to, do_concat, extra_dims in reforms:
                xz, xr, xh = ['%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output')]
                for i, x in enumerate([xz, xr, xh]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                if do_concat:
                    init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = gru_cell.GRU(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                linear_before_reset=linear_before_reset
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last
Esempio n. 19
0
    def _create_gru(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert GRUs without access to the full model"
        assert pred_model is not None, "cannot convert GRUs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        linear_before_reset = attrs.pop('linear_before_reset', 0)
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported GRU attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards GRU"

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for GRU input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_gru(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 6 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 3 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 3 * hidden_size],
                                ends  =[bias_offset + 6 * hidden_size])

            weight_offset = 3 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #  z r h  -> r z h
            reforms = ((W_, 'i2h_w',    True,  [(0,-1)]),
                       (R_, 'gate_t_w', False, [(0,-1)]),
                       (Bi, 'i2h_b',    True,  []),
                       (Br, 'gate_t_b', False, []))
            for name_from, name_to, do_concat, extra_dims in reforms:
                xz, xr, xh = ['%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output')]
                for i, x in enumerate([xz, xr, xh]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                if do_concat:
                    init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = gru_cell.GRU(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                linear_before_reset=linear_before_reset
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last = make_gru(0)
            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net",
                blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }
            )
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f = make_gru(0)
            hidden_t_all_b, hidden_t_last_b = make_gru(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()], axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()], axis=2)

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Esempio n. 20
0
        def make_lstm(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 8 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 4 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 4 * hidden_size],
                                ends  =[bias_offset + 8 * hidden_size])

            weight_offset = 4 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #   i o f c -> i f o c
            reforms = ((W_, 'i2h_w',     [(0, -1)]),
                       (R_, 'gates_t_w', [(0, -1)]),
                       (Bi, 'i2h_b'    , []),
                       (Br, 'gates_t_b', []))
            for name_from, name_to, extra_dims in reforms:
                xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")]
                for i, x in enumerate([xi, xo, xf, xc]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                init_net.Concat([xi, xf, xo, xc], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])
            initial_c_sliced = name + '/initial_c'
            init_net.Slice(initial_c, initial_c_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced, initial_c_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                return_params=True
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last
Esempio n. 21
0
    def _create_slice(cls, init_model, pred_model, n, opset_version):
        op = cls._common_onnx_node_to_caffe2_op(init_model, pred_model, n,
                                                opset_version)
        args = {arg.name: arg for arg in op.arg}
        starts_vals = np.array(args.pop('starts').ints,
                               dtype=np.int64).tolist()
        ends_vals = np.array(
            [i - 1 if i < 0 else i for i in args.pop('ends').ints],
            dtype=np.int64).tolist()
        if 'axes' in args:
            axes_vals = np.array(args.pop('axes').ints,
                                 dtype=np.int32).tolist()
        else:
            ndims = len(starts_vals)
            axes_vals = np.array(range(ndims), dtype=np.int32).tolist()

        data, = op.input
        ops = []

        shape_tensor = dummy_name()
        ops.append(core.CreateOperator('Shape', [data], [shape_tensor]))

        axes_tensor = dummy_name()
        ops.extend([
            core.CreateOperator(
                'GivenTensorIntFill',
                [],
                [axes_tensor],
                shape=[len(axes_vals)],
                values=axes_vals,
            ),
        ])

        starts_vals_tensor = dummy_name()
        starts_tensor = dummy_name()
        casted_starts_tensor = dummy_name()
        ops.extend([
            core.CreateOperator(
                'GivenTensorInt64Fill',
                [],
                [starts_vals_tensor],
                shape=[len(starts_vals)],
                values=starts_vals,
            ),
            core.CreateOperator(
                'ConstantFill',
                [shape_tensor],
                [starts_tensor],
                dtype=caffe2_pb2.TensorProto.INT64,
                value=0,
            ),
            core.CreateOperator(
                'ScatterAssign',
                [starts_tensor, axes_tensor, starts_vals_tensor],
                [starts_tensor],
            ),
            # Slice only accepts starts as int
            core.CreateOperator(
                'Cast',
                [starts_tensor],
                [casted_starts_tensor],
                to=caffe2_pb2.TensorProto.INT32,
            ),
        ])

        ends_vals_tensor = dummy_name()
        ends_tensor = dummy_name()
        casted_ends_tensor = dummy_name()
        ops.extend([
            core.CreateOperator(
                'GivenTensorInt64Fill',
                [],
                [ends_vals_tensor],
                shape=[len(ends_vals)],
                values=ends_vals,
            ),
            core.CreateOperator(
                'ConstantFill',
                [shape_tensor],
                [ends_tensor],
                dtype=caffe2_pb2.TensorProto.INT64,
                value=-1,
            ),
            core.CreateOperator(
                'ScatterAssign',
                [ends_tensor, axes_tensor, ends_vals_tensor],
                [ends_tensor],
            ),
            # Slice only accepts ends as int
            core.CreateOperator(
                'Cast',
                [ends_tensor],
                [casted_ends_tensor],
                to=caffe2_pb2.TensorProto.INT32,
            ),
        ])

        op.input[:] = [data, casted_starts_tensor, casted_ends_tensor]
        del op.arg[:]
        op.arg.extend(args.values())
        ops.append(op)

        return ops
Esempio n. 22
0
    def caffe2_net_to_onnx_graph(cls,
                                 predict_net,
                                 init_net=None,
                                 value_info=None):
        if value_info is None:
            value_info = {}
        if not isinstance(value_info, dict):
            raise ValueError('Please pass value_info as a '
                             'name -> (type, shape) dictionary')

        cls._ssa_rewrite(predict_net, init_net, value_info)

        if init_net:
            initializer = cls.caffe2_init_net_to_initializer(init_net)
            value_info.update({
                init.name: (init.data_type, init.dims)
                for init in initializer
            })
        else:
            initializer = []

        # Check whether we have got type shape info of all input
        missing = (set(list(predict_net.external_input)) -
                   set(value_info.keys()))
        if missing:
            raise RuntimeError(
                'Could not find value info of inputs: {}'.format(
                    ', '.join(missing)))

        inputs = {}
        for name in predict_net.external_input:
            elem_type, shape = value_info[name]
            inputs[name] = np.random.randn(*shape).astype(
                mapping.TENSOR_TYPE_TO_NP_TYPE[elem_type])

        ws, outputs = c2_native_run_net(init_net, predict_net, inputs)

        for name in predict_net.external_output:
            output = outputs[name]
            elem_type = mapping.NP_TYPE_TO_TENSOR_TYPE[output.dtype]
            shape = output.shape
            value_info[name] = (elem_type, shape)

        graph_def = GraphProto()
        graph_def.name = predict_net.name
        graph_def.initializer.extend(initializer)
        # This is a mapping from Caffe2 names to ONNX names
        graph_def.input.extend(
            make_tensor_value_info(name=name,
                                   elem_type=value_info[name][0],
                                   shape=value_info[name][1])
            for name in predict_net.external_input)

        dummy_name(
            cls._all_names_in_net(predict_net)
            | cls._all_names_in_net(init_net))

        for op in predict_net.op:
            shapes = {}
            for name in itertools.chain(op.input, op.output):
                blob = ws.FetchBlob(name)
                if hasattr(blob, 'shape'):
                    shapes[name] = blob.shape
            graph_def.node.extend(cls.caffe2_op_to_onnx_node(op,
                                                             shapes=shapes))

        all_output = set(
            sum((list(node.output) for node in graph_def.node),
                [init.name for init in graph_def.initializer]))
        redundant_output = set(vi.name for vi in graph_def.output) - all_output
        if redundant_output:
            logger.warning(
                'There are graph output not produced by any node or initializer: {}'
                '! Will drop them.'.format(', '.join(redundant_output)))
        graph_def.output.extend(
            make_tensor_value_info(name=name,
                                   elem_type=value_info[name][0],
                                   shape=value_info[name][1])
            for name in predict_net.external_output if name in all_output)

        cls._annotate_consumed(graph_def)
        checker.check_graph(graph_def)
        return graph_def
Esempio n. 23
0
    def _create_gru(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert GRUs without access to the full model"
        assert pred_model is not None, "cannot convert GRUs without access to the full model"

        attrs = dict(n.attrs)  # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        linear_before_reset = attrs.pop('linear_before_reset')
        assert not attrs, "unsupported GRU attributes: " + str(attrs.keys())

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n,
                                              input_blob, W)
        if input_size is None:
            raise RuntimeError(
                "best-effort shape inference for GRU input failed")

        name = dummy_name()
        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        hidden_t_all, hidden_t_last = gru_cell.GRU(
            pred_mh,
            input_blob,
            sequence_lens, [initial_h],
            input_size,
            hidden_size,
            name,
            drop_states=True,
            forward_only=True,
            linear_before_reset=linear_before_reset)

        # input and recurrence biases are squashed together in onnx but not in caffe2
        Bi = name + "_bias_i2h"
        Br = name + "_bias_gates"
        init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[3 * hidden_size])
        init_net.Slice(B, Br, starts=[3 * hidden_size], ends=[6 * hidden_size])

        # caffe2 has a different order from onnx. We need to rearrange
        #  z r h  -> r z h
        #
        # TODO implement support for return_params in gru_cell.GRU.
        # Until then, hardcode blob names.
        reforms = ((W, 'i2h_w', True, [
            (0, input_size)
        ]), (R, 'gate_t_w', False, [(0, hidden_size)]),
                   (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, []))
        for name_from, name_to, do_concat, extra_dims in reforms:
            xz, xr, xh = [
                '%s/%s_%s' % (name, prefix, name_to)
                for prefix in ('update', 'reset', 'output')
            ]
            for i, x in enumerate([xz, xr, xh]):
                dim0 = i * hidden_size, (i + 1) * hidden_size
                starts, ends = zip(dim0, *extra_dims)
                init_net.Slice(name_from, x, starts=starts, ends=ends)
            if do_concat:
                init_net.Concat([xr, xz, xh],
                                ['%s/%s' % (name, name_to),
                                 dummy_name()],
                                axis=0)

        pred_mh.net = pred_mh.net.Clone("dummy-clone-net",
                                        blob_remap={
                                            hidden_t_all: n.outputs[0],
                                            hidden_t_last: n.outputs[1]
                                        })

        return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Esempio n. 24
0
    def _create_rnn(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        activation = force_unicode(attrs.pop('activations', ('tanh',))[0])
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN"

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for RNN input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_rnn(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 2 * direction_offset * hidden_size
            init_net.Slice(B, name + "/i2h_b",
                           starts=[bias_offset + 0 * hidden_size],
                           ends  =[bias_offset + 1 * hidden_size])
            init_net.Slice(B, name + "/gates_t_b",
                           starts=[bias_offset + 1 * hidden_size],
                           ends  =[bias_offset + 2 * hidden_size])

            weight_offset = direction_offset * hidden_size
            init_net.Slice(W, name + '/i2h_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends  =[weight_offset + 1 * hidden_size,-1])
            init_net.Slice(R, name + '/gates_t_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends  =[weight_offset + 1 * hidden_size,-1])

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                activation=activation
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last = make_rnn(0)
            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net",
                blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }
            )
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f = make_rnn(0)
            hidden_t_all_b, hidden_t_last_b = make_rnn(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()], axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()], axis=2)

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Esempio n. 25
0
 def _create_reshape(cls, init_model, pred_model, n, opset_version):
     c2_op = cls._common_onnx_node_to_caffe2_op(init_model, pred_model, n,
                                                opset_version)
     # Caffe2 has an extra output
     c2_op.output.append(dummy_name())
     return c2_op
Esempio n. 26
0
    def _create_slice(cls, n):
        op = cls._common_onnx_node_to_caffe2_op(n)
        data, axes, orig_starts, orig_ends = op.input
        ops = []

        data_shape = dummy_name()
        ops.append(core.CreateOperator(
            'Shape',
            [data],
            [data_shape]
        ))

        tmp_starts = dummy_name()
        starts = dummy_name()
        ops.extend([
            core.CreateOperator(
                'ConstantFill',
                [data_shape],
                [tmp_starts],
                dtype=caffe2_pb2.TensorProto.INT64,
                value=0,
            ),
            core.CreateOperator(
                'ScatterAssign',
                [tmp_starts, axes, orig_starts],
                [tmp_starts],
            ),
            # Slice only accepts starts as int
            core.CreateOperator(
                'Cast',
                [tmp_starts],
                [starts],
                to=caffe2_pb2.TensorProto.INT32,
            ),
        ])

        tmp_ends = dummy_name()
        ends = dummy_name()
        ops.extend([
            core.CreateOperator(
                'ConstantFill',
                [data_shape],
                [tmp_ends],
                dtype=caffe2_pb2.TensorProto.INT64,
                value=-1,
            ),
            core.CreateOperator(
                'ScatterAssign',
                [tmp_ends, axes, orig_ends],
                [tmp_ends],
            ),
            # Slice only accepts ends as int
            core.CreateOperator(
                'Cast',
                [tmp_ends],
                [ends],
                to=caffe2_pb2.TensorProto.INT32,
            ),
        ])

        op.input[:] = [data, starts, ends]
        ops.append(op)

        return ops