Ejemplo n.º 1
0
    def _create_gemm(cls, init_model, pred_model, n, opset_version):
        (A, B, C) = n.inputs
        (Y, ) = n.outputs
        alpha = n.attrs.get('alpha', 1.)
        beta = n.attrs.get('beta', 1.)

        ops = []
        if alpha != 1:
            scaled_A = dummy_name()
            ops.append(
                core.CreateOperator('Scale', [A], [scaled_A], scale=alpha))
            A = scaled_A
        if beta != 1:
            scaled_C = dummy_name()
            ops.append(
                core.CreateOperator('Scale', [C], [scaled_C], scale=beta))
            C = scaled_C

        trans_a = n.attrs.get('transA', 0)
        trans_b = n.attrs.get('transB', 0)
        broadcast = n.attrs.get('broadcast', 0)
        if not trans_a and trans_b and broadcast:
            ops.append(core.CreateOperator('FC', [A, B, C], [Y]))
        else:
            AB = dummy_name()
            ops.append(
                core.CreateOperator('MatMul', [A, B], [AB],
                                    trans_a=trans_a,
                                    trans_b=trans_b))
            ops.append(
                core.CreateOperator('Add', [AB, C], [Y], broadcast=broadcast))

        return ops
Ejemplo n.º 2
0
    def _create_gemm(cls, op_def, shapes):
        x, w, b = op_def.input
        args = {arg.name: arg for arg in op_def.arg}
        y, = op_def.output
        x_shape = list(shapes[x])

        nodes = []
        if 'axis' in args:
            axis = args['axis'].i
            outer = np.prod(x_shape[:axis]).astype(int)
            inner = np.prod(x_shape[axis:]).astype(int)
            reshaped_x = dummy_name()
            nodes.append(
                helper.make_node(
                    'Reshape',
                    inputs=[x],
                    outputs=[reshaped_x],
                    shape=[outer, inner],
                ))
            x = reshaped_x

        if 'axis_w' in args:
            axis_w = args['axis_w'].i
            w_shape = shapes[w]
            outer = np.prod(w_shape[:axis_w]).astype(int).item()
            inner = np.prod(w_shape[axis_w:]).astype(int).item()
            reshaped_w = dummy_name()
            nodes.append(
                helper.make_node(
                    'Reshape',
                    inputs=[w],
                    outputs=[reshaped_w],
                    shape=[outer, inner],
                ))
            w = reshaped_w

        gemm_y_output = dummy_name() if 'axis' in args else y
        nodes.append(
            helper.make_node(
                'Gemm',
                inputs=[x, w, b],
                outputs=[gemm_y_output],
                name=op_def.name,
                transB=1,
                broadcast=1,
            ))

        if 'axis' in args:
            axis = args['axis'].i
            nodes.append(
                helper.make_node(
                    'Reshape',
                    inputs=[gemm_y_output],
                    outputs=[y],
                    shape=x_shape[:axis] + [-1],
                ))

        return nodes
Ejemplo n.º 3
0
    def _create_gemm(cls, op_def, shapes):
        x, w, b = op_def.input
        args = {arg.name: arg for arg in op_def.arg}
        y, = op_def.output
        x_shape = list(shapes[x])

        nodes = []
        const_tensors = []
        if 'axis' in args:
            axis = args['axis'].i
            outer = np.prod(x_shape[:axis]).astype(int)
            inner = np.prod(x_shape[axis:]).astype(int)
            reshaped_x = dummy_name()
            shape_tensor = cls._create_shape_tensor([outer, inner])
            const_tensors.append(shape_tensor)
            nodes.append(helper.make_node(
                'Reshape',
                inputs=[x, shape_tensor.name],
                outputs=[reshaped_x],
            ))
            x = reshaped_x

        if 'axis_w' in args:
            axis_w = args['axis_w'].i
            w_shape = shapes[w]
            outer = np.prod(w_shape[:axis_w]).astype(int).item()
            inner = np.prod(w_shape[axis_w:]).astype(int).item()
            reshaped_w = dummy_name()
            shape_tensor = cls._create_shape_tensor([outer, inner])
            const_tensors.append(shape_tensor)
            nodes.append(helper.make_node(
                'Reshape',
                inputs=[w, shape_tensor.name],
                outputs=[reshaped_w],
            ))
            w = reshaped_w

        gemm_y_output = dummy_name() if 'axis' in args else y
        nodes.append(helper.make_node(
            'Gemm',
            inputs=[x, w, b],
            outputs=[gemm_y_output],
            name=op_def.name,
            transB=1,
            broadcast=1,
        ))

        if 'axis' in args:
            axis = args['axis'].i
            shape_tensor = cls._create_shape_tensor(x_shape[:axis] + [-1])
            const_tensors.append(shape_tensor)
            nodes.append(helper.make_node(
                'Reshape',
                inputs=[gemm_y_output, shape_tensor.name],
                outputs=[y],
            ))

        return nodes, const_tensors
Ejemplo n.º 4
0
    def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version,
                                  include_initializers):
        device_option = get_device_option(Device(device))

        init_model = ModelProto()
        init_model.ParseFromString(
            cls.optimize_onnx(onnx_model.SerializeToString(), init=True))

        pred_model = ModelProto()
        pred_model.ParseFromString(
            cls.optimize_onnx(onnx_model.SerializeToString(), predict=True))

        init_net = caffe2_pb2.NetDef()
        pred_net = caffe2_pb2.NetDef()

        init_net.name = onnx_model.graph.name + '_init'
        pred_net.name = onnx_model.graph.name + '_predict'

        if include_initializers:
            init_net.op.extend(
                cls._create_tensor_filling_op(tp)
                for tp in onnx_model.graph.initializer)

        dummy_name(
            cls._all_names_in_graph(init_model.graph)
            | cls._all_names_in_graph(pred_model.graph))

        success = True
        for net, model in ((init_net, init_model), (pred_net, pred_model)):
            net.device_option.CopyFrom(device_option)
            for node in model.graph.node:
                try:
                    c2ops = cls._onnx_node_to_caffe2_op(
                        init_model, pred_model, node, opset_version)
                except Exception as e:
                    success = False
                    print('ONNX FATAL:', e)
                    continue
                (init_net if include_initializers else net).op.extend(
                    c2ops.init_ops)
                net.op.extend(c2ops.ops)
                net.external_input.extend(c2ops.interface_blobs)
            net.external_output.extend(value_info.name
                                       for value_info in model.graph.output)
            net.external_input.extend(value_info.name
                                      for value_info in model.graph.input)

        if not success:
            raise RuntimeError('ONNX conversion failed')

        return init_net, pred_net
Ejemplo n.º 5
0
    def test_dummy_name(self):
        dummy_name([])
        names_1 = [dummy_name() for _ in range(3)]
        dummy_name([])
        names_2 = [dummy_name() for _ in range(3)]
        self.assertEqual(names_1, names_2)

        dummy_name(names_1)
        names_3 = [dummy_name() for _ in range(3)]
        self.assertFalse(set(names_1) & set(names_3))
Ejemplo n.º 6
0
 def _create_concat(cls, init_model, pred_model, n, opset_version):
     # TODO: Caffe2 Concat has an extra output. It should be only
     # used when doing training, so we should change Caffe2 to allow
     # 1 output.
     op = cls._common_onnx_node_to_caffe2_op(init_model, pred_model, n, opset_version)
     assert len(op.output) == 1
     op.output.append(dummy_name())
     return op
Ejemplo n.º 7
0
    def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version,
                                  include_initializers):
        device_option = get_device_option(Device(device))

        init_model = ModelProto()
        init_model.ParseFromString(
            cls.optimize_onnx(onnx_model.SerializeToString(), init=True))
        cls._inplace_rewrite(init_model.graph)

        pred_model = ModelProto()
        pred_model.ParseFromString(
            cls.optimize_onnx(onnx_model.SerializeToString(), predict=True))
        cls._inplace_rewrite(pred_model.graph)

        init_net = caffe2_pb2.NetDef()
        pred_net = caffe2_pb2.NetDef()

        init_net.name = onnx_model.graph.name + '_init'
        pred_net.name = onnx_model.graph.name + '_predict'

        if include_initializers:
            init_net.op.extend(
                cls._create_tensor_filling_op(tp)
                for tp in onnx_model.graph.initializer)

        dummy_name(
            cls._all_names_in_graph(init_model.graph)
            | cls._all_names_in_graph(pred_model.graph))

        for net, model in ((init_net, init_model), (pred_net, pred_model)):
            net.device_option.CopyFrom(device_option)
            for node in model.graph.node:
                c2ops = cls._onnx_node_to_caffe2_op(init_model, pred_model,
                                                    node, opset_version)
                (init_net if include_initializers else net).op.extend(
                    c2ops.init_ops)
                net.op.extend(c2ops.ops)
                net.external_input.extend(c2ops.interface_blobs)
            net.external_output.extend(value_info.name
                                       for value_info in model.graph.output)
            net.external_input.extend(value_info.name
                                      for value_info in model.graph.input)

        return init_net, pred_net
Ejemplo n.º 8
0
 def _create_logsoftmax(cls, init_model, pred_model, n, opset_version):
     # NB: this implementation is not backward stable.
     (A,) = n.inputs
     (Y,) = n.outputs
     axis = n.attrs.get('axis', 1)
     ops = []
     softmax_A = dummy_name()
     ops.append(core.CreateOperator('Softmax', [A], [softmax_A], axis=axis))
     ops.append(core.CreateOperator('Log', [softmax_A], [Y]))
     return ops
Ejemplo n.º 9
0
        def make_rnn(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 2 * direction_offset * hidden_size
            init_net.Slice(B,
                           name + "/i2h_b",
                           starts=[bias_offset + 0 * hidden_size],
                           ends=[bias_offset + 1 * hidden_size])
            init_net.Slice(B,
                           name + "/gates_t_b",
                           starts=[bias_offset + 1 * hidden_size],
                           ends=[bias_offset + 2 * hidden_size])

            weight_offset = direction_offset * hidden_size
            init_net.Slice(W,
                           name + '/i2h_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends=[weight_offset + 1 * hidden_size, -1])
            init_net.Slice(R,
                           name + '/gates_t_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends=[weight_offset + 1 * hidden_size, -1])

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h,
                           initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends=[direction_offset + 1, -1, -1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(
                pred_mh,
                input,
                sequence_lens, [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=False,
                forward_only=True,
                activation=activation)

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last
Ejemplo n.º 10
0
    def _create_channel_shuffle(cls, op_def, shapes):
        x, = op_def.input
        y, = op_def.output
        n, c, h, w = shapes[x]
        args = {arg.name: arg for arg in op_def.arg}
        g = args['group'].i
        assert c % g == 0

        nodes = []
        const_tensors = []

        tmp1 = dummy_name()
        shape_tensor = cls._create_shape_tensor([n, g, c // g, h, w])
        const_tensors.append(shape_tensor)
        nodes.append(
            helper.make_node(
                'Reshape',
                inputs=[x, shape_tensor.name],
                outputs=[tmp1],
            ))

        tmp2 = dummy_name()
        nodes.append(
            helper.make_node(
                'Transpose',
                inputs=[tmp1],
                outputs=[tmp2],
                perm=[0, 2, 1, 3, 4],
            ))

        shape_tensor = cls._create_shape_tensor([n, c, h, w])
        const_tensors.append(shape_tensor)
        nodes.append(
            helper.make_node(
                'Reshape',
                inputs=[tmp2, shape_tensor.name],
                outputs=[y],
            ))
        return nodes, const_tensors
Ejemplo n.º 11
0
    def _create_channel_shuffle(cls, op_def, shapes):
        x, = op_def.input
        y, = op_def.output
        n, c, h, w = shapes[x]
        args = {arg.name: arg for arg in op_def.arg}
        g = args['group'].i
        assert c % g == 0

        nodes = []
        const_tensors = []

        tmp1 = dummy_name()
        shape_tensor = cls._create_shape_tensor([n, g, c // g, h, w])
        const_tensors.append(shape_tensor)
        nodes.append(helper.make_node(
            'Reshape',
            inputs=[x, shape_tensor.name],
            outputs=[tmp1],
        ))

        tmp2 = dummy_name()
        nodes.append(helper.make_node(
            'Transpose',
            inputs=[tmp1],
            outputs=[tmp2],
            perm=[0, 2, 1, 3, 4],
        ))

        shape_tensor = cls._create_shape_tensor([n, c, h, w])
        const_tensors.append(shape_tensor)
        nodes.append(helper.make_node(
            'Reshape',
            inputs=[tmp2, shape_tensor.name],
            outputs=[y],
        ))
        return nodes, const_tensors
Ejemplo n.º 12
0
        def make_lstm(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 8 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 4 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 4 * hidden_size],
                                ends  =[bias_offset + 8 * hidden_size])

            weight_offset = 4 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #   i o f c -> i f o c
            reforms = ((W_, 'i2h_w',     [(0, -1)]),
                       (R_, 'gates_t_w', [(0, -1)]),
                       (Bi, 'i2h_b'    , []),
                       (Br, 'gates_t_b', []))
            for name_from, name_to, extra_dims in reforms:
                xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")]
                for i, x in enumerate([xi, xo, xf, xc]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                init_net.Concat([xi, xf, xo, xc], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])
            initial_c_sliced = name + '/initial_c'
            init_net.Slice(initial_c, initial_c_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced, initial_c_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                return_params=True
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last
Ejemplo n.º 13
0
    def caffe2_net_to_onnx_graph(cls,
                                 predict_net,
                                 init_net=None,
                                 value_info=None):
        if value_info is None:
            value_info = {}
        if not isinstance(value_info, dict):
            raise ValueError('Please pass value_info as a '
                             'name -> (type, shape) dictionary')

        cls._filter_fake_init(init_net, value_info)
        cls._ssa_rewrite(predict_net, init_net, value_info)

        if init_net:
            initializer = cls.caffe2_init_net_to_initializer(init_net)
            value_info.update({init.name: (init.data_type, init.dims)
                               for init in initializer})
        else:
            initializer = []

        # Check whether we have got type shape info of all input
        missing = (set(list(predict_net.external_input)) -
                   set(value_info.keys()))
        if missing:
            raise RuntimeError('Could not find value info of inputs: {}'.format(
                ', '.join(missing)))

        inputs = {}
        for name in predict_net.external_input:
            elem_type, shape = value_info[name]
            inputs[name] = np.random.randn(*shape).astype(
                mapping.TENSOR_TYPE_TO_NP_TYPE[elem_type])

        ws, outputs = c2_native_run_net(
            init_net,
            predict_net,
            inputs)

        for name in predict_net.external_output:
            output = outputs[name]
            elem_type = mapping.NP_TYPE_TO_TENSOR_TYPE[output.dtype]
            shape = output.shape
            value_info[name] = (elem_type, shape)

        graph_def = GraphProto()
        graph_def.name = predict_net.name
        graph_def.initializer.extend(initializer)
        # This is a mapping from Caffe2 names to ONNX names
        graph_def.input.extend(
            make_tensor_value_info(
                name=name,
                elem_type=value_info[name][0],
                shape=value_info[name][1])
            for name in predict_net.external_input)

        dummy_name(cls._all_names_in_net(predict_net) |
                   cls._all_names_in_net(init_net))

        for op in predict_net.op:
            shapes = {}
            for name in itertools.chain(op.input, op.output):
                blob = ws.FetchBlob(name)
                if hasattr(blob, 'shape'):
                    shapes[name] = blob.shape
            nodes, const_tensors = cls.caffe2_op_to_onnx_node(op, shapes=shapes)
            graph_def.node.extend(nodes)
            graph_def.initializer.extend(const_tensors)
            graph_def.input.extend([cls._extract_value_info(tensor) for tensor in const_tensors])

        all_output = set(sum((list(node.output) for node in graph_def.node),
                             [init.name for init in graph_def.initializer]))
        redundant_output = set(vi.name for vi in graph_def.output) - all_output
        if redundant_output:
            logger.warning(
                'There are graph output not produced by any node or initializer: {}'
                '! Will drop them.'.format(', '.join(redundant_output)))
        graph_def.output.extend(
            make_tensor_value_info(
                name=name,
                elem_type=value_info[name][0],
                shape=value_info[name][1])
            for name in predict_net.external_output
            if name in all_output)

        checker.check_graph(graph_def)
        return graph_def
Ejemplo n.º 14
0
    def _create_rnn(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        activation = force_unicode(attrs.pop('activations', ('tanh',))[0])
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN"

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for RNN input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_rnn(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 2 * direction_offset * hidden_size
            init_net.Slice(B, name + "/i2h_b",
                           starts=[bias_offset + 0 * hidden_size],
                           ends  =[bias_offset + 1 * hidden_size])
            init_net.Slice(B, name + "/gates_t_b",
                           starts=[bias_offset + 1 * hidden_size],
                           ends  =[bias_offset + 2 * hidden_size])

            weight_offset = direction_offset * hidden_size
            init_net.Slice(W, name + '/i2h_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends  =[weight_offset + 1 * hidden_size,-1])
            init_net.Slice(R, name + '/gates_t_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends  =[weight_offset + 1 * hidden_size,-1])

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                activation=activation
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last = make_rnn(0)
            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net",
                blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }
            )
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f = make_rnn(0)
            hidden_t_all_b, hidden_t_last_b = make_rnn(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()], axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()], axis=2)

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Ejemplo n.º 15
0
    def _create_lstm(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert LSTMs without access to the full model"
        assert pred_model is not None, "cannot convert LSTMs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards LSTM"

        input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for LSTM input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_lstm(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 8 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 4 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 4 * hidden_size],
                                ends  =[bias_offset + 8 * hidden_size])

            weight_offset = 4 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 4 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #   i o f c -> i f o c
            reforms = ((W_, 'i2h_w',     [(0, -1)]),
                       (R_, 'gates_t_w', [(0, -1)]),
                       (Bi, 'i2h_b'    , []),
                       (Br, 'gates_t_b', []))
            for name_from, name_to, extra_dims in reforms:
                xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")]
                for i, x in enumerate([xi, xo, xf, xc]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                init_net.Concat([xi, xf, xo, xc], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])
            initial_c_sliced = name + '/initial_c'
            init_net.Slice(initial_c, initial_c_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last, _, cell_last, params = rnn_cell.LSTM(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced, initial_c_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=False,
                forward_only=True,
                return_params=True
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last, cell_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last, cell_last = make_lstm(0)

            # in the forward case, storage is shared between the three
            # outputs. We need to decouple them so that the
            # VariableLengthSequencePadding only mutates n.outputs[0]
            pred_mh.net.Copy(hidden_t_last, n.outputs[1])
            pred_mh.net.Copy(cell_last, n.outputs[2])

            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net",
                blob_remap={ hidden_t_all: n.outputs[0] }
            )
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f, cell_last_f = make_lstm(0)
            hidden_t_all_b, hidden_t_last_b, cell_last_b = make_lstm(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()], axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()], axis=0)
            pred_mh.net.Concat([cell_last_f, cell_last_b],
                               [n.outputs[2], dummy_name()], axis=0)

        if sequence_lens is not None:
            pred_mh.net.VariableLengthSequencePadding(
                [n.outputs[0], sequence_lens], [n.outputs[0]])

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Ejemplo n.º 16
0
 def _create_shape_tensor(cls, shape):
     return make_tensor(name=dummy_name(),
                        data_type=TensorProto.INT64,
                        dims=[len(shape)],
                        vals=np.asarray(shape, dtype=np.int64).tobytes(),
                        raw=True)
Ejemplo n.º 17
0
    def _create_gru(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert GRUs without access to the full model"
        assert pred_model is not None, "cannot convert GRUs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        linear_before_reset = attrs.pop('linear_before_reset', 0)
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported GRU attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards GRU"

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W)
        if input_size is None:
            raise RuntimeError("best-effort shape inference for GRU input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_gru(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 6 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 3 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 3 * hidden_size],
                                ends  =[bias_offset + 6 * hidden_size])

            weight_offset = 3 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #  z r h  -> r z h
            reforms = ((W_, 'i2h_w',    True,  [(0,-1)]),
                       (R_, 'gate_t_w', False, [(0,-1)]),
                       (Bi, 'i2h_b',    True,  []),
                       (Br, 'gate_t_b', False, []))
            for name_from, name_to, do_concat, extra_dims in reforms:
                xz, xr, xh = ['%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output')]
                for i, x in enumerate([xz, xr, xh]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                if do_concat:
                    init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = gru_cell.GRU(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                linear_before_reset=linear_before_reset
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last = make_gru(0)
            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net",
                blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }
            )
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f = make_gru(0)
            hidden_t_all_b, hidden_t_last_b = make_gru(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()], axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()], axis=2)

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
    def test_dummy_name(self):
        dummy_name([])
        names_1 = [dummy_name() for _ in range(3)]
        dummy_name([])
        names_2 = [dummy_name() for _ in range(3)]
        self.assertEqual(names_1, names_2)

        dummy_name(names_1)
        names_3 = [dummy_name() for _ in range(3)]
        self.assertFalse(set(names_1) & set(names_3))

        dummy_name(set(names_1))
        names_4 = [dummy_name() for _ in range(3)]
        self.assertFalse(set(names_1) & set(names_4))
Ejemplo n.º 19
0
 def _create_reshape(cls, init_model, pred_model, n, opset_version):
     c2_op = cls._common_onnx_node_to_caffe2_op(init_model, pred_model, n, opset_version)
     # Caffe2 has an extra output
     c2_op.output.append(dummy_name())
     return c2_op
Ejemplo n.º 20
0
        def make_gru(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 6 * direction_offset * hidden_size
            Bi = init_net.Slice(B, name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends  =[bias_offset + 3 * hidden_size])
            Br = init_net.Slice(B, name + "_bias_gates",
                                starts=[bias_offset + 3 * hidden_size],
                                ends  =[bias_offset + 6 * hidden_size])

            weight_offset = 3 * direction_offset * hidden_size
            W_ = init_net.Slice(W, name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])
            R_ = init_net.Slice(R, name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends  =[weight_offset + 3 * hidden_size,-1])

            # caffe2 has a different order from onnx. We need to rearrange
            #  z r h  -> r z h
            reforms = ((W_, 'i2h_w',    True,  [(0,-1)]),
                       (R_, 'gate_t_w', False, [(0,-1)]),
                       (Bi, 'i2h_b',    True,  []),
                       (Br, 'gate_t_b', False, []))
            for name_from, name_to, do_concat, extra_dims in reforms:
                xz, xr, xh = ['%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output')]
                for i, x in enumerate([xz, xr, xh]):
                    dim0 = i * hidden_size, (i+1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                if do_concat:
                    init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h, initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends  =[direction_offset + 1,-1,-1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = gru_cell.GRU(
                pred_mh,
                input,
                sequence_lens,
                [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=True,
                forward_only=True,
                linear_before_reset=linear_before_reset
            )

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last
Ejemplo n.º 21
0
 def test_dummy_name(self):
     n1 = dummy_name()
     n2 = dummy_name()
     assert n1 != n2, "Got same names in different calls: {}".format(n1)
Ejemplo n.º 22
0
    def _create_slice(cls, init_model, pred_model, n, opset_version):
        op = cls._common_onnx_node_to_caffe2_op(init_model, pred_model, n, opset_version)
        args = {arg.name: arg for arg in op.arg}
        starts_vals = np.array(
            args.pop('starts').ints, dtype=np.int64).tolist()
        ends_vals = np.array(
            [i - 1 if i < 0 else i for i in args.pop('ends').ints],
            dtype=np.int64).tolist()
        if 'axes' in args:
            axes_vals = np.array(
                args.pop('axes').ints, dtype=np.int32).tolist()
        else:
            ndims = len(starts_vals)
            axes_vals = np.array(range(ndims), dtype=np.int32).tolist()

        data, = op.input
        ops = []

        shape_tensor = dummy_name()
        ops.append(core.CreateOperator(
            'Shape',
            [data],
            [shape_tensor]
        ))

        axes_tensor = dummy_name()
        ops.extend([
            core.CreateOperator(
                'GivenTensorIntFill',
                [],
                [axes_tensor],
                shape=[len(axes_vals)],
                values=axes_vals,
            ),
        ])

        starts_vals_tensor = dummy_name()
        starts_tensor = dummy_name()
        casted_starts_tensor = dummy_name()
        ops.extend([
            core.CreateOperator(
                'GivenTensorInt64Fill',
                [],
                [starts_vals_tensor],
                shape=[len(starts_vals)],
                values=starts_vals,
            ),
            core.CreateOperator(
                'ConstantFill',
                [shape_tensor],
                [starts_tensor],
                dtype=caffe2_pb2.TensorProto.INT64,
                value=0,
            ),
            core.CreateOperator(
                'ScatterAssign',
                [starts_tensor, axes_tensor, starts_vals_tensor],
                [starts_tensor],
            ),
            # Slice only accepts starts as int
            core.CreateOperator(
                'Cast',
                [starts_tensor],
                [casted_starts_tensor],
                to=caffe2_pb2.TensorProto.INT32,
            ),
        ])

        ends_vals_tensor = dummy_name()
        ends_tensor = dummy_name()
        casted_ends_tensor = dummy_name()
        ops.extend([
            core.CreateOperator(
                'GivenTensorInt64Fill',
                [],
                [ends_vals_tensor],
                shape=[len(ends_vals)],
                values=ends_vals,
            ),
            core.CreateOperator(
                'ConstantFill',
                [shape_tensor],
                [ends_tensor],
                dtype=caffe2_pb2.TensorProto.INT64,
                value=-1,
            ),
            core.CreateOperator(
                'ScatterAssign',
                [ends_tensor, axes_tensor, ends_vals_tensor],
                [ends_tensor],
            ),
            # Slice only accepts ends as int
            core.CreateOperator(
                'Cast',
                [ends_tensor],
                [casted_ends_tensor],
                to=caffe2_pb2.TensorProto.INT32,
            ),
        ])

        op.input[:] = [data, casted_starts_tensor, casted_ends_tensor]
        del op.arg[:]
        op.arg.extend(args.values())
        ops.append(op)

        return ops
Ejemplo n.º 23
0
 def test_dummy_name(self):
     n1 = dummy_name()
     n2 = dummy_name()
     assert n1 != n2, "Got same names in different calls: {}".format(n1)
Ejemplo n.º 24
0
    def caffe2_net_to_onnx_graph(cls,
                                 predict_net,
                                 init_net=None,
                                 value_info=None):
        if value_info is None:
            value_info = {}
        if not isinstance(value_info, dict):
            raise ValueError('Please pass value_info as a '
                             'name -> (type, shape) dictionary')

        cls._filter_fake_init(init_net, value_info)
        cls._ssa_rewrite(predict_net, init_net, value_info)

        if init_net:
            initializer = cls.caffe2_init_net_to_initializer(init_net)
            value_info.update({
                init.name: (init.data_type, init.dims)
                for init in initializer
            })
        else:
            initializer = []

        # Check whether we have got type shape info of all input
        missing = (set(list(predict_net.external_input)) -
                   set(value_info.keys()))
        if missing:
            raise RuntimeError(
                'Could not find value info of inputs: {}'.format(
                    ', '.join(missing)))

        inputs = {}
        for name in predict_net.external_input:
            elem_type, shape = value_info[name]
            inputs[name] = np.random.randn(*shape).astype(
                mapping.TENSOR_TYPE_TO_NP_TYPE[elem_type])

        ws, outputs = c2_native_run_net(init_net, predict_net, inputs)

        for name in predict_net.external_output:
            output = outputs[name]
            elem_type = mapping.NP_TYPE_TO_TENSOR_TYPE[output.dtype]
            shape = output.shape
            value_info[name] = (elem_type, shape)

        graph_def = GraphProto()
        graph_def.name = predict_net.name
        graph_def.initializer.extend(initializer)
        # This is a mapping from Caffe2 names to ONNX names
        graph_def.input.extend(
            make_tensor_value_info(name=name,
                                   elem_type=value_info[name][0],
                                   shape=value_info[name][1])
            for name in predict_net.external_input)

        dummy_name(
            cls._all_names_in_net(predict_net)
            | cls._all_names_in_net(init_net))

        for op in predict_net.op:
            shapes = {}
            for name in itertools.chain(op.input, op.output):
                blob = ws.FetchBlob(name)
                if hasattr(blob, 'shape'):
                    shapes[name] = blob.shape
            nodes, const_tensors = cls.caffe2_op_to_onnx_node(op,
                                                              shapes=shapes)
            graph_def.node.extend(nodes)
            graph_def.initializer.extend(const_tensors)
            graph_def.input.extend(
                [cls._extract_value_info(tensor) for tensor in const_tensors])

        all_output = set(
            sum((list(node.output) for node in graph_def.node),
                [init.name for init in graph_def.initializer]))
        redundant_output = set(vi.name for vi in graph_def.output) - all_output
        if redundant_output:
            logger.warning(
                'There are graph output not produced by any node or initializer: {}'
                '! Will drop them.'.format(', '.join(redundant_output)))
        graph_def.output.extend(
            make_tensor_value_info(name=name,
                                   elem_type=value_info[name][0],
                                   shape=value_info[name][1])
            for name in predict_net.external_output if name in all_output)

        checker.check_graph(graph_def)
        return graph_def
Ejemplo n.º 25
0
 def _create_shape_tensor(cls, shape):
     return make_tensor(name=dummy_name(),
                        data_type=TensorProto.INT64,
                        dims=[len(shape)],
                        vals=np.asarray(shape, dtype=np.int64).tobytes(),
                        raw=True)