def _create_gemm(cls, init_model, pred_model, n, opset_version): (A, B, C) = n.inputs (Y, ) = n.outputs alpha = n.attrs.get('alpha', 1.) beta = n.attrs.get('beta', 1.) ops = [] if alpha != 1: scaled_A = dummy_name() ops.append( core.CreateOperator('Scale', [A], [scaled_A], scale=alpha)) A = scaled_A if beta != 1: scaled_C = dummy_name() ops.append( core.CreateOperator('Scale', [C], [scaled_C], scale=beta)) C = scaled_C trans_a = n.attrs.get('transA', 0) trans_b = n.attrs.get('transB', 0) broadcast = n.attrs.get('broadcast', 0) if not trans_a and trans_b and broadcast: ops.append(core.CreateOperator('FC', [A, B, C], [Y])) else: AB = dummy_name() ops.append( core.CreateOperator('MatMul', [A, B], [AB], trans_a=trans_a, trans_b=trans_b)) ops.append( core.CreateOperator('Add', [AB, C], [Y], broadcast=broadcast)) return ops
def _create_gemm(cls, op_def, shapes): x, w, b = op_def.input args = {arg.name: arg for arg in op_def.arg} y, = op_def.output x_shape = list(shapes[x]) nodes = [] if 'axis' in args: axis = args['axis'].i outer = np.prod(x_shape[:axis]).astype(int) inner = np.prod(x_shape[axis:]).astype(int) reshaped_x = dummy_name() nodes.append( helper.make_node( 'Reshape', inputs=[x], outputs=[reshaped_x], shape=[outer, inner], )) x = reshaped_x if 'axis_w' in args: axis_w = args['axis_w'].i w_shape = shapes[w] outer = np.prod(w_shape[:axis_w]).astype(int).item() inner = np.prod(w_shape[axis_w:]).astype(int).item() reshaped_w = dummy_name() nodes.append( helper.make_node( 'Reshape', inputs=[w], outputs=[reshaped_w], shape=[outer, inner], )) w = reshaped_w gemm_y_output = dummy_name() if 'axis' in args else y nodes.append( helper.make_node( 'Gemm', inputs=[x, w, b], outputs=[gemm_y_output], name=op_def.name, transB=1, broadcast=1, )) if 'axis' in args: axis = args['axis'].i nodes.append( helper.make_node( 'Reshape', inputs=[gemm_y_output], outputs=[y], shape=x_shape[:axis] + [-1], )) return nodes
def _create_gemm(cls, op_def, shapes): x, w, b = op_def.input args = {arg.name: arg for arg in op_def.arg} y, = op_def.output x_shape = list(shapes[x]) nodes = [] const_tensors = [] if 'axis' in args: axis = args['axis'].i outer = np.prod(x_shape[:axis]).astype(int) inner = np.prod(x_shape[axis:]).astype(int) reshaped_x = dummy_name() shape_tensor = cls._create_shape_tensor([outer, inner]) const_tensors.append(shape_tensor) nodes.append(helper.make_node( 'Reshape', inputs=[x, shape_tensor.name], outputs=[reshaped_x], )) x = reshaped_x if 'axis_w' in args: axis_w = args['axis_w'].i w_shape = shapes[w] outer = np.prod(w_shape[:axis_w]).astype(int).item() inner = np.prod(w_shape[axis_w:]).astype(int).item() reshaped_w = dummy_name() shape_tensor = cls._create_shape_tensor([outer, inner]) const_tensors.append(shape_tensor) nodes.append(helper.make_node( 'Reshape', inputs=[w, shape_tensor.name], outputs=[reshaped_w], )) w = reshaped_w gemm_y_output = dummy_name() if 'axis' in args else y nodes.append(helper.make_node( 'Gemm', inputs=[x, w, b], outputs=[gemm_y_output], name=op_def.name, transB=1, broadcast=1, )) if 'axis' in args: axis = args['axis'].i shape_tensor = cls._create_shape_tensor(x_shape[:axis] + [-1]) const_tensors.append(shape_tensor) nodes.append(helper.make_node( 'Reshape', inputs=[gemm_y_output, shape_tensor.name], outputs=[y], )) return nodes, const_tensors
def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers): device_option = get_device_option(Device(device)) init_model = ModelProto() init_model.ParseFromString( cls.optimize_onnx(onnx_model.SerializeToString(), init=True)) pred_model = ModelProto() pred_model.ParseFromString( cls.optimize_onnx(onnx_model.SerializeToString(), predict=True)) init_net = caffe2_pb2.NetDef() pred_net = caffe2_pb2.NetDef() init_net.name = onnx_model.graph.name + '_init' pred_net.name = onnx_model.graph.name + '_predict' if include_initializers: init_net.op.extend( cls._create_tensor_filling_op(tp) for tp in onnx_model.graph.initializer) dummy_name( cls._all_names_in_graph(init_model.graph) | cls._all_names_in_graph(pred_model.graph)) success = True for net, model in ((init_net, init_model), (pred_net, pred_model)): net.device_option.CopyFrom(device_option) for node in model.graph.node: try: c2ops = cls._onnx_node_to_caffe2_op( init_model, pred_model, node, opset_version) except Exception as e: success = False print('ONNX FATAL:', e) continue (init_net if include_initializers else net).op.extend( c2ops.init_ops) net.op.extend(c2ops.ops) net.external_input.extend(c2ops.interface_blobs) net.external_output.extend(value_info.name for value_info in model.graph.output) net.external_input.extend(value_info.name for value_info in model.graph.input) if not success: raise RuntimeError('ONNX conversion failed') return init_net, pred_net
def test_dummy_name(self): dummy_name([]) names_1 = [dummy_name() for _ in range(3)] dummy_name([]) names_2 = [dummy_name() for _ in range(3)] self.assertEqual(names_1, names_2) dummy_name(names_1) names_3 = [dummy_name() for _ in range(3)] self.assertFalse(set(names_1) & set(names_3))
def _create_concat(cls, init_model, pred_model, n, opset_version): # TODO: Caffe2 Concat has an extra output. It should be only # used when doing training, so we should change Caffe2 to allow # 1 output. op = cls._common_onnx_node_to_caffe2_op(init_model, pred_model, n, opset_version) assert len(op.output) == 1 op.output.append(dummy_name()) return op
def _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers): device_option = get_device_option(Device(device)) init_model = ModelProto() init_model.ParseFromString( cls.optimize_onnx(onnx_model.SerializeToString(), init=True)) cls._inplace_rewrite(init_model.graph) pred_model = ModelProto() pred_model.ParseFromString( cls.optimize_onnx(onnx_model.SerializeToString(), predict=True)) cls._inplace_rewrite(pred_model.graph) init_net = caffe2_pb2.NetDef() pred_net = caffe2_pb2.NetDef() init_net.name = onnx_model.graph.name + '_init' pred_net.name = onnx_model.graph.name + '_predict' if include_initializers: init_net.op.extend( cls._create_tensor_filling_op(tp) for tp in onnx_model.graph.initializer) dummy_name( cls._all_names_in_graph(init_model.graph) | cls._all_names_in_graph(pred_model.graph)) for net, model in ((init_net, init_model), (pred_net, pred_model)): net.device_option.CopyFrom(device_option) for node in model.graph.node: c2ops = cls._onnx_node_to_caffe2_op(init_model, pred_model, node, opset_version) (init_net if include_initializers else net).op.extend( c2ops.init_ops) net.op.extend(c2ops.ops) net.external_input.extend(c2ops.interface_blobs) net.external_output.extend(value_info.name for value_info in model.graph.output) net.external_input.extend(value_info.name for value_info in model.graph.input) return init_net, pred_net
def _create_logsoftmax(cls, init_model, pred_model, n, opset_version): # NB: this implementation is not backward stable. (A,) = n.inputs (Y,) = n.outputs axis = n.attrs.get('axis', 1) ops = [] softmax_A = dummy_name() ops.append(core.CreateOperator('Softmax', [A], [softmax_A], axis=axis)) ops.append(core.CreateOperator('Log', [softmax_A], [Y])) return ops
def make_rnn(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 2 * direction_offset * hidden_size init_net.Slice(B, name + "/i2h_b", starts=[bias_offset + 0 * hidden_size], ends=[bias_offset + 1 * hidden_size]) init_net.Slice(B, name + "/gates_t_b", starts=[bias_offset + 1 * hidden_size], ends=[bias_offset + 2 * hidden_size]) weight_offset = direction_offset * hidden_size init_net.Slice(W, name + '/i2h_w', starts=[weight_offset + 0 * hidden_size, 0], ends=[weight_offset + 1 * hidden_size, -1]) init_net.Slice(R, name + '/gates_t_w', starts=[weight_offset + 0 * hidden_size, 0], ends=[weight_offset + 1 * hidden_size, -1]) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends=[direction_offset + 1, -1, -1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = rnn_cell.BasicRNN( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=False, forward_only=True, activation=activation) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last
def _create_channel_shuffle(cls, op_def, shapes): x, = op_def.input y, = op_def.output n, c, h, w = shapes[x] args = {arg.name: arg for arg in op_def.arg} g = args['group'].i assert c % g == 0 nodes = [] const_tensors = [] tmp1 = dummy_name() shape_tensor = cls._create_shape_tensor([n, g, c // g, h, w]) const_tensors.append(shape_tensor) nodes.append( helper.make_node( 'Reshape', inputs=[x, shape_tensor.name], outputs=[tmp1], )) tmp2 = dummy_name() nodes.append( helper.make_node( 'Transpose', inputs=[tmp1], outputs=[tmp2], perm=[0, 2, 1, 3, 4], )) shape_tensor = cls._create_shape_tensor([n, c, h, w]) const_tensors.append(shape_tensor) nodes.append( helper.make_node( 'Reshape', inputs=[tmp2, shape_tensor.name], outputs=[y], )) return nodes, const_tensors
def _create_channel_shuffle(cls, op_def, shapes): x, = op_def.input y, = op_def.output n, c, h, w = shapes[x] args = {arg.name: arg for arg in op_def.arg} g = args['group'].i assert c % g == 0 nodes = [] const_tensors = [] tmp1 = dummy_name() shape_tensor = cls._create_shape_tensor([n, g, c // g, h, w]) const_tensors.append(shape_tensor) nodes.append(helper.make_node( 'Reshape', inputs=[x, shape_tensor.name], outputs=[tmp1], )) tmp2 = dummy_name() nodes.append(helper.make_node( 'Transpose', inputs=[tmp1], outputs=[tmp2], perm=[0, 2, 1, 3, 4], )) shape_tensor = cls._create_shape_tensor([n, c, h, w]) const_tensors.append(shape_tensor) nodes.append(helper.make_node( 'Reshape', inputs=[tmp2, shape_tensor.name], outputs=[y], )) return nodes, const_tensors
def make_lstm(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 8 * direction_offset * hidden_size Bi = init_net.Slice(B, name + "_bias_i2h", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 4 * hidden_size]) Br = init_net.Slice(B, name + "_bias_gates", starts=[bias_offset + 4 * hidden_size], ends =[bias_offset + 8 * hidden_size]) weight_offset = 4 * direction_offset * hidden_size W_ = init_net.Slice(W, name + '/i2h_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 4 * hidden_size,-1]) R_ = init_net.Slice(R, name + '/gates_t_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 4 * hidden_size,-1]) # caffe2 has a different order from onnx. We need to rearrange # i o f c -> i f o c reforms = ((W_, 'i2h_w', [(0, -1)]), (R_, 'gates_t_w', [(0, -1)]), (Bi, 'i2h_b' , []), (Br, 'gates_t_b', [])) for name_from, name_to, extra_dims in reforms: xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")] for i, x in enumerate([xi, xo, xf, xc]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) init_net.Concat([xi, xf, xo, xc], ['%s/%s' % (name, name_to), dummy_name()], axis=0) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) initial_c_sliced = name + '/initial_c' init_net.Slice(initial_c, initial_c_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM( pred_mh, input, sequence_lens, [initial_h_sliced, initial_c_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, return_params=True ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last
def caffe2_net_to_onnx_graph(cls, predict_net, init_net=None, value_info=None): if value_info is None: value_info = {} if not isinstance(value_info, dict): raise ValueError('Please pass value_info as a ' 'name -> (type, shape) dictionary') cls._filter_fake_init(init_net, value_info) cls._ssa_rewrite(predict_net, init_net, value_info) if init_net: initializer = cls.caffe2_init_net_to_initializer(init_net) value_info.update({init.name: (init.data_type, init.dims) for init in initializer}) else: initializer = [] # Check whether we have got type shape info of all input missing = (set(list(predict_net.external_input)) - set(value_info.keys())) if missing: raise RuntimeError('Could not find value info of inputs: {}'.format( ', '.join(missing))) inputs = {} for name in predict_net.external_input: elem_type, shape = value_info[name] inputs[name] = np.random.randn(*shape).astype( mapping.TENSOR_TYPE_TO_NP_TYPE[elem_type]) ws, outputs = c2_native_run_net( init_net, predict_net, inputs) for name in predict_net.external_output: output = outputs[name] elem_type = mapping.NP_TYPE_TO_TENSOR_TYPE[output.dtype] shape = output.shape value_info[name] = (elem_type, shape) graph_def = GraphProto() graph_def.name = predict_net.name graph_def.initializer.extend(initializer) # This is a mapping from Caffe2 names to ONNX names graph_def.input.extend( make_tensor_value_info( name=name, elem_type=value_info[name][0], shape=value_info[name][1]) for name in predict_net.external_input) dummy_name(cls._all_names_in_net(predict_net) | cls._all_names_in_net(init_net)) for op in predict_net.op: shapes = {} for name in itertools.chain(op.input, op.output): blob = ws.FetchBlob(name) if hasattr(blob, 'shape'): shapes[name] = blob.shape nodes, const_tensors = cls.caffe2_op_to_onnx_node(op, shapes=shapes) graph_def.node.extend(nodes) graph_def.initializer.extend(const_tensors) graph_def.input.extend([cls._extract_value_info(tensor) for tensor in const_tensors]) all_output = set(sum((list(node.output) for node in graph_def.node), [init.name for init in graph_def.initializer])) redundant_output = set(vi.name for vi in graph_def.output) - all_output if redundant_output: logger.warning( 'There are graph output not produced by any node or initializer: {}' '! Will drop them.'.format(', '.join(redundant_output))) graph_def.output.extend( make_tensor_value_info( name=name, elem_type=value_info[name][0], shape=value_info[name][1]) for name in predict_net.external_output if name in all_output) checker.check_graph(graph_def) return graph_def
def _create_rnn(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert RNNs without access to the full model" assert pred_model is not None, "cannot convert RNNs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') activation = force_unicode(attrs.pop('activations', ('tanh',))[0]) direction = force_unicode(attrs.pop('direction', 'forward')) assert not attrs, "unsupported RNN attributes: " + str(attrs.keys()) assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN" input_blob, W, R, B, sequence_lens, initial_h = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError("best-effort shape inference for RNN input failed") init_net = core.Net("init-net") pred_mh = ModelHelper() def make_rnn(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 2 * direction_offset * hidden_size init_net.Slice(B, name + "/i2h_b", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 1 * hidden_size]) init_net.Slice(B, name + "/gates_t_b", starts=[bias_offset + 1 * hidden_size], ends =[bias_offset + 2 * hidden_size]) weight_offset = direction_offset * hidden_size init_net.Slice(W, name + '/i2h_w', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 1 * hidden_size,-1]) init_net.Slice(R, name + '/gates_t_w', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 1 * hidden_size,-1]) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = rnn_cell.BasicRNN( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, activation=activation ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last if direction == 'forward': hidden_t_all, hidden_t_last = make_rnn(0) pred_mh.net = pred_mh.net.Clone( "dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] } ) elif direction == 'bidirectional': hidden_t_all_f, hidden_t_last_f = make_rnn(0) hidden_t_all_b, hidden_t_last_b = make_rnn(1) pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b], [n.outputs[0], dummy_name()], axis=2) pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b], [n.outputs[1], dummy_name()], axis=2) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def _create_lstm(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert LSTMs without access to the full model" assert pred_model is not None, "cannot convert LSTMs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') direction = force_unicode(attrs.pop('direction', 'forward')) assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys()) assert direction in ['forward', 'bidirectional'], "unsupported backwards LSTM" input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError("best-effort shape inference for LSTM input failed") init_net = core.Net("init-net") pred_mh = ModelHelper() def make_lstm(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 8 * direction_offset * hidden_size Bi = init_net.Slice(B, name + "_bias_i2h", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 4 * hidden_size]) Br = init_net.Slice(B, name + "_bias_gates", starts=[bias_offset + 4 * hidden_size], ends =[bias_offset + 8 * hidden_size]) weight_offset = 4 * direction_offset * hidden_size W_ = init_net.Slice(W, name + '/i2h_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 4 * hidden_size,-1]) R_ = init_net.Slice(R, name + '/gates_t_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 4 * hidden_size,-1]) # caffe2 has a different order from onnx. We need to rearrange # i o f c -> i f o c reforms = ((W_, 'i2h_w', [(0, -1)]), (R_, 'gates_t_w', [(0, -1)]), (Bi, 'i2h_b' , []), (Br, 'gates_t_b', [])) for name_from, name_to, extra_dims in reforms: xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")] for i, x in enumerate([xi, xo, xf, xc]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) init_net.Concat([xi, xf, xo, xc], ['%s/%s' % (name, name_to), dummy_name()], axis=0) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) initial_c_sliced = name + '/initial_c' init_net.Slice(initial_c, initial_c_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last, _, cell_last, params = rnn_cell.LSTM( pred_mh, input, sequence_lens, [initial_h_sliced, initial_c_sliced], input_size, hidden_size, name, drop_states=False, forward_only=True, return_params=True ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last, cell_last if direction == 'forward': hidden_t_all, hidden_t_last, cell_last = make_lstm(0) # in the forward case, storage is shared between the three # outputs. We need to decouple them so that the # VariableLengthSequencePadding only mutates n.outputs[0] pred_mh.net.Copy(hidden_t_last, n.outputs[1]) pred_mh.net.Copy(cell_last, n.outputs[2]) pred_mh.net = pred_mh.net.Clone( "dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0] } ) elif direction == 'bidirectional': hidden_t_all_f, hidden_t_last_f, cell_last_f = make_lstm(0) hidden_t_all_b, hidden_t_last_b, cell_last_b = make_lstm(1) pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b], [n.outputs[0], dummy_name()], axis=2) pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b], [n.outputs[1], dummy_name()], axis=0) pred_mh.net.Concat([cell_last_f, cell_last_b], [n.outputs[2], dummy_name()], axis=0) if sequence_lens is not None: pred_mh.net.VariableLengthSequencePadding( [n.outputs[0], sequence_lens], [n.outputs[0]]) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def _create_shape_tensor(cls, shape): return make_tensor(name=dummy_name(), data_type=TensorProto.INT64, dims=[len(shape)], vals=np.asarray(shape, dtype=np.int64).tobytes(), raw=True)
def _create_gru(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert GRUs without access to the full model" assert pred_model is not None, "cannot convert GRUs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') linear_before_reset = attrs.pop('linear_before_reset', 0) direction = force_unicode(attrs.pop('direction', 'forward')) assert not attrs, "unsupported GRU attributes: " + str(attrs.keys()) assert direction in ['forward', 'bidirectional'], "unsupported backwards GRU" input_blob, W, R, B, sequence_lens, initial_h = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError("best-effort shape inference for GRU input failed") init_net = core.Net("init-net") pred_mh = ModelHelper() def make_gru(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 6 * direction_offset * hidden_size Bi = init_net.Slice(B, name + "_bias_i2h", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 3 * hidden_size]) Br = init_net.Slice(B, name + "_bias_gates", starts=[bias_offset + 3 * hidden_size], ends =[bias_offset + 6 * hidden_size]) weight_offset = 3 * direction_offset * hidden_size W_ = init_net.Slice(W, name + '/i2h_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 3 * hidden_size,-1]) R_ = init_net.Slice(R, name + '/gates_t_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 3 * hidden_size,-1]) # caffe2 has a different order from onnx. We need to rearrange # z r h -> r z h reforms = ((W_, 'i2h_w', True, [(0,-1)]), (R_, 'gate_t_w', False, [(0,-1)]), (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, [])) for name_from, name_to, do_concat, extra_dims in reforms: xz, xr, xh = ['%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output')] for i, x in enumerate([xz, xr, xh]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) if do_concat: init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = gru_cell.GRU( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, linear_before_reset=linear_before_reset ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last if direction == 'forward': hidden_t_all, hidden_t_last = make_gru(0) pred_mh.net = pred_mh.net.Clone( "dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] } ) elif direction == 'bidirectional': hidden_t_all_f, hidden_t_last_f = make_gru(0) hidden_t_all_b, hidden_t_last_b = make_gru(1) pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b], [n.outputs[0], dummy_name()], axis=2) pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b], [n.outputs[1], dummy_name()], axis=2) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def test_dummy_name(self): dummy_name([]) names_1 = [dummy_name() for _ in range(3)] dummy_name([]) names_2 = [dummy_name() for _ in range(3)] self.assertEqual(names_1, names_2) dummy_name(names_1) names_3 = [dummy_name() for _ in range(3)] self.assertFalse(set(names_1) & set(names_3)) dummy_name(set(names_1)) names_4 = [dummy_name() for _ in range(3)] self.assertFalse(set(names_1) & set(names_4))
def _create_reshape(cls, init_model, pred_model, n, opset_version): c2_op = cls._common_onnx_node_to_caffe2_op(init_model, pred_model, n, opset_version) # Caffe2 has an extra output c2_op.output.append(dummy_name()) return c2_op
def make_gru(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 6 * direction_offset * hidden_size Bi = init_net.Slice(B, name + "_bias_i2h", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 3 * hidden_size]) Br = init_net.Slice(B, name + "_bias_gates", starts=[bias_offset + 3 * hidden_size], ends =[bias_offset + 6 * hidden_size]) weight_offset = 3 * direction_offset * hidden_size W_ = init_net.Slice(W, name + '/i2h_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 3 * hidden_size,-1]) R_ = init_net.Slice(R, name + '/gates_t_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 3 * hidden_size,-1]) # caffe2 has a different order from onnx. We need to rearrange # z r h -> r z h reforms = ((W_, 'i2h_w', True, [(0,-1)]), (R_, 'gate_t_w', False, [(0,-1)]), (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, [])) for name_from, name_to, do_concat, extra_dims in reforms: xz, xr, xh = ['%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output')] for i, x in enumerate([xz, xr, xh]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) if do_concat: init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = gru_cell.GRU( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, linear_before_reset=linear_before_reset ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last
def test_dummy_name(self): n1 = dummy_name() n2 = dummy_name() assert n1 != n2, "Got same names in different calls: {}".format(n1)
def _create_slice(cls, init_model, pred_model, n, opset_version): op = cls._common_onnx_node_to_caffe2_op(init_model, pred_model, n, opset_version) args = {arg.name: arg for arg in op.arg} starts_vals = np.array( args.pop('starts').ints, dtype=np.int64).tolist() ends_vals = np.array( [i - 1 if i < 0 else i for i in args.pop('ends').ints], dtype=np.int64).tolist() if 'axes' in args: axes_vals = np.array( args.pop('axes').ints, dtype=np.int32).tolist() else: ndims = len(starts_vals) axes_vals = np.array(range(ndims), dtype=np.int32).tolist() data, = op.input ops = [] shape_tensor = dummy_name() ops.append(core.CreateOperator( 'Shape', [data], [shape_tensor] )) axes_tensor = dummy_name() ops.extend([ core.CreateOperator( 'GivenTensorIntFill', [], [axes_tensor], shape=[len(axes_vals)], values=axes_vals, ), ]) starts_vals_tensor = dummy_name() starts_tensor = dummy_name() casted_starts_tensor = dummy_name() ops.extend([ core.CreateOperator( 'GivenTensorInt64Fill', [], [starts_vals_tensor], shape=[len(starts_vals)], values=starts_vals, ), core.CreateOperator( 'ConstantFill', [shape_tensor], [starts_tensor], dtype=caffe2_pb2.TensorProto.INT64, value=0, ), core.CreateOperator( 'ScatterAssign', [starts_tensor, axes_tensor, starts_vals_tensor], [starts_tensor], ), # Slice only accepts starts as int core.CreateOperator( 'Cast', [starts_tensor], [casted_starts_tensor], to=caffe2_pb2.TensorProto.INT32, ), ]) ends_vals_tensor = dummy_name() ends_tensor = dummy_name() casted_ends_tensor = dummy_name() ops.extend([ core.CreateOperator( 'GivenTensorInt64Fill', [], [ends_vals_tensor], shape=[len(ends_vals)], values=ends_vals, ), core.CreateOperator( 'ConstantFill', [shape_tensor], [ends_tensor], dtype=caffe2_pb2.TensorProto.INT64, value=-1, ), core.CreateOperator( 'ScatterAssign', [ends_tensor, axes_tensor, ends_vals_tensor], [ends_tensor], ), # Slice only accepts ends as int core.CreateOperator( 'Cast', [ends_tensor], [casted_ends_tensor], to=caffe2_pb2.TensorProto.INT32, ), ]) op.input[:] = [data, casted_starts_tensor, casted_ends_tensor] del op.arg[:] op.arg.extend(args.values()) ops.append(op) return ops
def test_dummy_name(self): n1 = dummy_name() n2 = dummy_name() assert n1 != n2, "Got same names in different calls: {}".format(n1)
def caffe2_net_to_onnx_graph(cls, predict_net, init_net=None, value_info=None): if value_info is None: value_info = {} if not isinstance(value_info, dict): raise ValueError('Please pass value_info as a ' 'name -> (type, shape) dictionary') cls._filter_fake_init(init_net, value_info) cls._ssa_rewrite(predict_net, init_net, value_info) if init_net: initializer = cls.caffe2_init_net_to_initializer(init_net) value_info.update({ init.name: (init.data_type, init.dims) for init in initializer }) else: initializer = [] # Check whether we have got type shape info of all input missing = (set(list(predict_net.external_input)) - set(value_info.keys())) if missing: raise RuntimeError( 'Could not find value info of inputs: {}'.format( ', '.join(missing))) inputs = {} for name in predict_net.external_input: elem_type, shape = value_info[name] inputs[name] = np.random.randn(*shape).astype( mapping.TENSOR_TYPE_TO_NP_TYPE[elem_type]) ws, outputs = c2_native_run_net(init_net, predict_net, inputs) for name in predict_net.external_output: output = outputs[name] elem_type = mapping.NP_TYPE_TO_TENSOR_TYPE[output.dtype] shape = output.shape value_info[name] = (elem_type, shape) graph_def = GraphProto() graph_def.name = predict_net.name graph_def.initializer.extend(initializer) # This is a mapping from Caffe2 names to ONNX names graph_def.input.extend( make_tensor_value_info(name=name, elem_type=value_info[name][0], shape=value_info[name][1]) for name in predict_net.external_input) dummy_name( cls._all_names_in_net(predict_net) | cls._all_names_in_net(init_net)) for op in predict_net.op: shapes = {} for name in itertools.chain(op.input, op.output): blob = ws.FetchBlob(name) if hasattr(blob, 'shape'): shapes[name] = blob.shape nodes, const_tensors = cls.caffe2_op_to_onnx_node(op, shapes=shapes) graph_def.node.extend(nodes) graph_def.initializer.extend(const_tensors) graph_def.input.extend( [cls._extract_value_info(tensor) for tensor in const_tensors]) all_output = set( sum((list(node.output) for node in graph_def.node), [init.name for init in graph_def.initializer])) redundant_output = set(vi.name for vi in graph_def.output) - all_output if redundant_output: logger.warning( 'There are graph output not produced by any node or initializer: {}' '! Will drop them.'.format(', '.join(redundant_output))) graph_def.output.extend( make_tensor_value_info(name=name, elem_type=value_info[name][0], shape=value_info[name][1]) for name in predict_net.external_output if name in all_output) checker.check_graph(graph_def) return graph_def
def _create_shape_tensor(cls, shape): return make_tensor(name=dummy_name(), data_type=TensorProto.INT64, dims=[len(shape)], vals=np.asarray(shape, dtype=np.int64).tobytes(), raw=True)