def test_convolution_sync(self, net_type, num_workers, do, engine): m = ModelHelper(name="test_model") n = 1 d = 2 depth = 3 iters = 5 h = 5 w = 5 workspace.ResetWorkspace() use_cudnn = (engine == 'CUDNN') np.random.seed(1701) # Build a binary tree of conv layers, summing at each node. for i in reversed(range(depth)): for j in range(2**i): bottom_1 = "{}_{}".format(i + 1, 2 * j) bottom_2 = "{}_{}".format(i + 1, 2 * j + 1) mid_1 = "{}_{}_m".format(i + 1, 2 * j) mid_2 = "{}_{}_m".format(i + 1, 2 * j + 1) top = "{}_{}".format(i, j) w1, b1, w2, b2 = np.random.randn(4).tolist() brew.conv(m, bottom_1, mid_1, dim_in=d, dim_out=d, kernel=3, weight_init=('ConstantFill', dict(value=w1)), bias_init=('ConstantFill', dict(value=b1)), cudnn_state=np.random.randint(0, 3), stride=1, pad=1, deterministic=1, use_cudnn=use_cudnn, engine=engine) brew.conv(m, bottom_2, mid_2, dim_in=d, dim_out=d, kernel=3, stride=1, pad=1, weight_init=('ConstantFill', dict(value=w2)), bias_init=('ConstantFill', dict(value=b2)), deterministic=1, cudnn_state=np.random.randint(0, 3), use_cudnn=use_cudnn, engine=engine) m.net.Sum([mid_1, mid_2], top) m.net.Flatten(["0_0"], ["0_0_flat"]) m.net.SquaredL2Distance(["0_0_flat", "label"], "xent") m.net.AveragedLoss("xent", "loss") input_to_grad = m.AddGradientOperators(["loss"]) m.Proto().device_option.CopyFrom(do) m.param_init_net.Proto().device_option.CopyFrom(do) m.Proto().type = net_type m.Proto().num_workers = num_workers self.ws.run(m.param_init_net) def run(): import numpy as np np.random.seed(1701) input_blobs = ["{}_{}".format(depth, j) for j in range(2**depth)] for input_blob in input_blobs: self.ws.create_blob(input_blob).feed(np.random.randn( n, d, h, w).astype(np.float32), device_option=do) self.ws.create_blob("label").feed(np.random.randn( n, d * h * w).astype(np.float32), device_option=do) self.ws.run(m.net) gradients = [ self.ws.blobs[str(input_to_grad[input_blob])].fetch() for input_blob in input_blobs ] return gradients outputs = [run() for _ in range(iters)] for output in outputs[1:]: np.testing.assert_array_equal(outputs[0], output) np.testing.assert_allclose(np.sum(np.square(output)), 1763719461732352.0, rtol=1e-5)
def _create_gru(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert GRUs without access to the full model" assert pred_model is not None, "cannot convert GRUs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') linear_before_reset = attrs.pop('linear_before_reset') assert not attrs, "unsupported GRU attributes: " + str(attrs.keys()) input_blob, W, R, B, sequence_lens, initial_h = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError( "best-effort shape inference for GRU input failed") name = dummy_name() init_net = core.Net("init-net") pred_mh = ModelHelper() hidden_t_all, hidden_t_last = gru_cell.GRU( pred_mh, input_blob, sequence_lens, [initial_h], input_size, hidden_size, name, drop_states=True, forward_only=True, linear_before_reset=linear_before_reset) # input and recurrence biases are squashed together in onnx but not in caffe2 Bi = name + "_bias_i2h" Br = name + "_bias_gates" init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[3 * hidden_size]) init_net.Slice(B, Br, starts=[3 * hidden_size], ends=[6 * hidden_size]) # caffe2 has a different order from onnx. We need to rearrange # z r h -> r z h # # TODO implement support for return_params in gru_cell.GRU. # Until then, hardcode blob names. reforms = ((W, 'i2h_w', True, [ (0, input_size) ]), (R, 'gate_t_w', False, [(0, hidden_size)]), (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, [])) for name_from, name_to, do_concat, extra_dims in reforms: xz, xr, xh = [ '%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output') ] for i, x in enumerate([xz, xr, xh]): dim0 = i * hidden_size, (i + 1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) if do_concat: init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0) pred_mh.net = pred_mh.net.Clone("dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def _create_rnn_variant(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert RNNs without access to the full model" assert pred_model is not None, "cannot convert RNNs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') direction = force_unicode(attrs.pop('direction', 'forward')) if n.op_type == 'RNN': activation = force_unicode(attrs.pop('activations', ('tanh', ))[0]) elif n.op_type == 'GRU': linear_before_reset = attrs.pop('linear_before_reset', 0) assert not attrs, "unsupported RNN attributes: " + str(attrs.keys()) assert direction in ['forward', 'bidirectional' ], "unsupported backwards RNN/GRU/LSTM" if n.op_type in ['RNN', 'GRU']: input_blob, W, R, B, sequence_lens, initial_h = n.inputs elif n.op_type == 'LSTM': input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs if sequence_lens == "": sequence_lens = None for x in itertools.chain(init_model.graph.input, init_model.graph.value_info, pred_model.graph.input, pred_model.graph.value_info): if x.name == W: input_size = x.type.tensor_type.shape.dim[2].dim_value break else: raise RuntimeError( "best-effort shape inference for RNN/GRU/LSTM failed") pred_mh = ModelHelper() init_net = core.Net("init-net") init_net.Reshape(W, [W, cls.dummy_name()], shape=[1, -1, 0]) init_net.Squeeze(W, W, dims=[0]) init_net.Reshape(R, [R, cls.dummy_name()], shape=[1, -1, 0]) init_net.Squeeze(R, R, dims=[0]) init_net.Reshape(B, [B, cls.dummy_name()], shape=[1, -1]) init_net.Squeeze(B, B, dims=[0]) if n.op_type == 'RNN': def reform(*args): pass def make_cell(*args, **kwargs): return rnn_cell.BasicRNN(*args, activation=activation, **kwargs) def make_rnn(direction_offset): return cls._make_rnn_direction( input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens, pred_mh, init_net, input_size, hidden_size, 1, direction_offset, "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w", reform, make_cell, lambda x: x) elif n.op_type == 'GRU': def reform(Bi, Br, W_, R_, name, hidden_size, init_net): # caffe2 has a different order from onnx. We need to rearrange # z r h -> r z h reforms = ((W_, 'i2h_w', True, [ (0, -1) ]), (R_, 'gate_t_w', False, [ (0, -1) ]), (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, [])) cls._rnn_reform_weights(reforms, name, hidden_size, init_net, ['update', 'reset', 'output'], [1, 0, 2]) def make_cell(*args, **kwargs): return gru_cell.GRU(*args, linear_before_reset=linear_before_reset, **kwargs) def make_rnn(direction_offset): return cls._make_rnn_direction( input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens, pred_mh, init_net, input_size, hidden_size, 3, direction_offset, "_bias_i2h", "_bias_gates", "/i2h_w_pre", "/gates_t_w_pre", reform, make_cell, lambda x: x) elif n.op_type == 'LSTM': def reform(Bi, Br, W_, R_, name, hidden_size, init_net): # caffe2 has a different order from onnx. We need to rearrange # i o f c -> i f o c reforms = ((W_, 'i2h_w', True, [ (0, -1) ]), (R_, 'gates_t_w', True, [ (0, -1) ]), (Bi, 'i2h_b', True, []), (Br, 'gates_t_b', True, [])) cls._rnn_reform_weights(reforms, name, hidden_size, init_net, ['input', 'output', 'forget', 'cell'], [0, 2, 1, 3]) def make_cell(*args, **kwargs): return rnn_cell.LSTM(*args, **kwargs) def make_rnn(direction_offset): return cls._make_rnn_direction( input_blob, B, W, R, [(initial_h, '/initial_h'), (initial_c, '/initial_c')], sequence_lens, pred_mh, init_net, input_size, hidden_size, 4, direction_offset, "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w", reform, make_cell, lambda x: [x[0], x[1], x[3]]) if direction == 'forward': outputs = make_rnn(0) # in the forward case, storage is shared between the # last outputs. We need to decouple them so that the # VariableLengthSequencePadding only mutates # n.outputs[0] for i in range(1, len(outputs)): pred_mh.net.Copy(outputs[i], n.outputs[i]) if sequence_lens is not None: pred_mh.net.VariableLengthSequencePadding( [outputs[0], sequence_lens], [outputs[0]]) pred_mh.net.ExpandDims([outputs[0]], [n.outputs[0]], dims=[1]) elif direction == 'bidirectional': outputs_f = make_rnn(0) outputs_b = make_rnn(1) concatted_output, _ = pred_mh.net.Concat( [outputs_f[0], outputs_b[0]], [cls.dummy_name(), cls.dummy_name()], axis=2) if sequence_lens is not None: pred_mh.net.VariableLengthSequencePadding( [concatted_output, sequence_lens], [concatted_output]) reshaped_output, _ = pred_mh.net.Reshape( concatted_output, [cls.dummy_name(), cls.dummy_name()], shape=[0, 0, -1, 2]) pred_mh.net.Transpose(reshaped_output, n.outputs[0], axes=[0, 3, 1, 2]) for i in range(1, len(n.outputs)): pred_mh.net.Concat( [outputs_f[i], outputs_b[i]], [n.outputs[i], cls.dummy_name()], axis=0) # We want to decide whether to put all of our weight-reshaping # operators in the init net or the predict net. We can put # them in the init net iff the inputs to those operators are # already available, either as graph initializers, or as the # output of other operators in the init net. The latter case # occurs, for example, when exporting from pytorch to onnx. # In most production use, we expect has_initializers to be # true. initializers = {i.name for i in init_model.graph.initializer} outputs = { output for node in init_model.graph.node for output in node.output } has_initializers = all(x in initializers or x in outputs for x in (W, R, B)) pred_ops = [] init_ops = [] (init_ops if has_initializers else pred_ops).extend( init_net.Proto().op) pred_ops.extend(pred_mh.Proto().op) return Caffe2Ops(pred_ops, init_ops, list(pred_mh.Proto().external_input))
def _create_gru(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert GRUs without access to the full model" assert pred_model is not None, "cannot convert GRUs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') linear_before_reset = attrs.pop('linear_before_reset', 0) direction = force_unicode(attrs.pop('direction', 'forward')) assert not attrs, "unsupported GRU attributes: " + str(attrs.keys()) assert direction in ['forward', 'bidirectional'], "unsupported backwards GRU" input_blob, W, R, B, sequence_lens, initial_h = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError("best-effort shape inference for GRU input failed") init_net = core.Net("init-net") pred_mh = ModelHelper() def make_gru(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 6 * direction_offset * hidden_size Bi = init_net.Slice(B, name + "_bias_i2h", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 3 * hidden_size]) Br = init_net.Slice(B, name + "_bias_gates", starts=[bias_offset + 3 * hidden_size], ends =[bias_offset + 6 * hidden_size]) weight_offset = 3 * direction_offset * hidden_size W_ = init_net.Slice(W, name + '/i2h_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 3 * hidden_size,-1]) R_ = init_net.Slice(R, name + '/gates_t_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 3 * hidden_size,-1]) # caffe2 has a different order from onnx. We need to rearrange # z r h -> r z h reforms = ((W_, 'i2h_w', True, [(0,-1)]), (R_, 'gate_t_w', False, [(0,-1)]), (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, [])) for name_from, name_to, do_concat, extra_dims in reforms: xz, xr, xh = ['%s/%s_%s' % (name, prefix, name_to) for prefix in ('update', 'reset', 'output')] for i, x in enumerate([xz, xr, xh]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) if do_concat: init_net.Concat([xr, xz, xh], ['%s/%s' % (name, name_to), dummy_name()], axis=0) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = gru_cell.GRU( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, linear_before_reset=linear_before_reset ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last if direction == 'forward': hidden_t_all, hidden_t_last = make_gru(0) pred_mh.net = pred_mh.net.Clone( "dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] } ) elif direction == 'bidirectional': hidden_t_all_f, hidden_t_last_f = make_gru(0) hidden_t_all_b, hidden_t_last_b = make_gru(1) pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b], [n.outputs[0], dummy_name()], axis=2) pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b], [n.outputs[1], dummy_name()], axis=2) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def _create_lstm(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert LSTMs without access to the full model" assert pred_model is not None, "cannot convert LSTMs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys()) input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError( "best-effort shape inference for LSTM input failed") name = dummy_name() init_net = core.Net("init-net") pred_mh = ModelHelper() hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM( pred_mh, input_blob, sequence_lens, [initial_h, initial_c], input_size, hidden_size, name, drop_states=True, forward_only=True, return_params=True) # input and recurrence biases are squashed together in onnx but not in caffe2 Bi = name + "_bias_i2h" Br = name + "_bias_gates" init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[4 * hidden_size]) init_net.Slice(B, Br, starts=[4 * hidden_size], ends=[8 * hidden_size]) # caffe2 has a different order from onnx. We need to rearrange # i o f c -> i f o c reforms = ((W, params['input']['weights'], [(0, input_size)]), (R, params['recurrent']['weights'], [ (0, hidden_size) ]), (Bi, params['input']['biases'], []), (Br, params['recurrent']['biases'], [])) for name_from, name_to, extra_dims in reforms: xi, xo, xf, xc = [ name_from + suffix for suffix in ("_i", "_o", "_f", "_c") ] for i, x in enumerate([xi, xo, xf, xc]): dim0 = i * hidden_size, (i + 1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) init_net.Concat([xi, xf, xo, xc], [name_to, dummy_name()], axis=0) pred_mh.net = pred_mh.net.Clone("dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def _create_rnn(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert RNNs without access to the full model" assert pred_model is not None, "cannot convert RNNs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') activation = force_unicode(attrs.pop('activations', ('tanh',))[0]) direction = force_unicode(attrs.pop('direction', 'forward')) assert not attrs, "unsupported RNN attributes: " + str(attrs.keys()) assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN" input_blob, W, R, B, sequence_lens, initial_h = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError("best-effort shape inference for RNN input failed") init_net = core.Net("init-net") pred_mh = ModelHelper() def make_rnn(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 2 * direction_offset * hidden_size init_net.Slice(B, name + "/i2h_b", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 1 * hidden_size]) init_net.Slice(B, name + "/gates_t_b", starts=[bias_offset + 1 * hidden_size], ends =[bias_offset + 2 * hidden_size]) weight_offset = direction_offset * hidden_size init_net.Slice(W, name + '/i2h_w', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 1 * hidden_size,-1]) init_net.Slice(R, name + '/gates_t_w', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 1 * hidden_size,-1]) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = rnn_cell.BasicRNN( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, activation=activation ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last if direction == 'forward': hidden_t_all, hidden_t_last = make_rnn(0) pred_mh.net = pred_mh.net.Clone( "dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] } ) elif direction == 'bidirectional': hidden_t_all_f, hidden_t_last_f = make_rnn(0) hidden_t_all_b, hidden_t_last_b = make_rnn(1) pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b], [n.outputs[0], dummy_name()], axis=2) pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b], [n.outputs[1], dummy_name()], axis=2) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def _create_lstm(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert LSTMs without access to the full model" assert pred_model is not None, "cannot convert LSTMs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') direction = force_unicode(attrs.pop('direction', 'forward')) assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys()) assert direction in ['forward', 'bidirectional'], "unsupported backwards LSTM" input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError("best-effort shape inference for LSTM input failed") init_net = core.Net("init-net") pred_mh = ModelHelper() def make_lstm(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 8 * direction_offset * hidden_size Bi = init_net.Slice(B, name + "_bias_i2h", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 4 * hidden_size]) Br = init_net.Slice(B, name + "_bias_gates", starts=[bias_offset + 4 * hidden_size], ends =[bias_offset + 8 * hidden_size]) weight_offset = 4 * direction_offset * hidden_size W_ = init_net.Slice(W, name + '/i2h_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 4 * hidden_size,-1]) R_ = init_net.Slice(R, name + '/gates_t_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 4 * hidden_size,-1]) # caffe2 has a different order from onnx. We need to rearrange # i o f c -> i f o c reforms = ((W_, 'i2h_w', [(0, -1)]), (R_, 'gates_t_w', [(0, -1)]), (Bi, 'i2h_b' , []), (Br, 'gates_t_b', [])) for name_from, name_to, extra_dims in reforms: xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")] for i, x in enumerate([xi, xo, xf, xc]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) init_net.Concat([xi, xf, xo, xc], ['%s/%s' % (name, name_to), dummy_name()], axis=0) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) initial_c_sliced = name + '/initial_c' init_net.Slice(initial_c, initial_c_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last, _, cell_last, params = rnn_cell.LSTM( pred_mh, input, sequence_lens, [initial_h_sliced, initial_c_sliced], input_size, hidden_size, name, drop_states=False, forward_only=True, return_params=True ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last, cell_last if direction == 'forward': hidden_t_all, hidden_t_last, cell_last = make_lstm(0) # in the forward case, storage is shared between the three # outputs. We need to decouple them so that the # VariableLengthSequencePadding only mutates n.outputs[0] pred_mh.net.Copy(hidden_t_last, n.outputs[1]) pred_mh.net.Copy(cell_last, n.outputs[2]) pred_mh.net = pred_mh.net.Clone( "dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0] } ) elif direction == 'bidirectional': hidden_t_all_f, hidden_t_last_f, cell_last_f = make_lstm(0) hidden_t_all_b, hidden_t_last_b, cell_last_b = make_lstm(1) pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b], [n.outputs[0], dummy_name()], axis=2) pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b], [n.outputs[1], dummy_name()], axis=0) pred_mh.net.Concat([cell_last_f, cell_last_b], [n.outputs[2], dummy_name()], axis=0) if sequence_lens is not None: pred_mh.net.VariableLengthSequencePadding( [n.outputs[0], sequence_lens], [n.outputs[0]]) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def double_matmul(): model = ModelHelper(name="r") fc0 = brew.fc(model, "data", "fc0", 10, 10) fc1 = brew.fc(model, fc0, "fc1", 10, 10) model.Proto().external_output[:] = [str(fc0), str(fc1)] return model, [(1, 10)]
from caffe2.python import brew, core, scope, workspace from caffe2.python.modeling.parameter_info import ParameterTags from caffe2.python.model_helper import ModelHelper from caffe2.python.cnn import CNNModelHelper import unittest import numpy as np m, k, n = (1, 28 * 28, 10) # [m][k] * [k][n] = [m][n] x = np.random.rand(m, k).astype(np.float32) - 0.5 # x = m*k 2D tensor workspace.ResetWorkspace() # clear workspace workspace.FeedBlob("x", x) # feed x as a blob model = ModelHelper(name="test_model") # create model model.Proto() # print model's protocol buffer before add operator brew.fc( model, "x", "y", k, n ) # fully connected NN, weight = k*n 2D tensor /// bias, y = m*n 2D tensor brew.softmax(model, "y", "z") model.Validate() model.Proto() # print model's protocol buffer after add operator workspace.RunNetOnce( model.param_init_net) # init [y_w(weight), y_b(bias) (randomize)] # weight is 2D array, bias is 1D array workspace.Blobs() # print workspace's blobs # workspace.FetchBlob("y_w") # workspace.FetchBlob("y_b") workspace.RunNetOnce(model.net)
def _create_rnn_variant(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert RNNs without access to the full model" assert pred_model is not None, "cannot convert RNNs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') direction = force_unicode(attrs.pop('direction', 'forward')) if n.op_type == 'RNN': activation = force_unicode(attrs.pop('activations', ('tanh',))[0]) elif n.op_type == 'GRU': linear_before_reset = attrs.pop('linear_before_reset', 0) assert not attrs, "unsupported RNN attributes: " + str(attrs.keys()) assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN/GRU/LSTM" if n.op_type in ['RNN', 'GRU']: input_blob, W, R, B, sequence_lens, initial_h = n.inputs elif n.op_type == 'LSTM': input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs if sequence_lens == "": sequence_lens = None for x in itertools.chain(init_model.graph.input, init_model.graph.value_info, pred_model.graph.input, pred_model.graph.value_info): if x.name == W: input_size = x.type.tensor_type.shape.dim[1].dim_value break else: raise RuntimeError("best-effort shape inference for RNN/GRU/LSTM failed") init_net = core.Net("init-net") pred_mh = ModelHelper() if n.op_type == 'RNN': def reform(*args): pass def make_cell(*args, **kwargs): return rnn_cell.BasicRNN(*args, activation=activation, **kwargs) def make_rnn(direction_offset): return cls._make_rnn_direction( input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens, pred_mh, init_net, input_size, hidden_size, 1, direction_offset, "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w", reform, make_cell, lambda x: x) elif n.op_type == 'GRU': def reform(Bi, Br, W_, R_, name, hidden_size, init_net): # caffe2 has a different order from onnx. We need to rearrange # z r h -> r z h reforms = ((W_, 'i2h_w', True, [(0,-1)]), (R_, 'gate_t_w', False, [(0,-1)]), (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, [])) cls._rnn_reform_weights(reforms, name, hidden_size, init_net, ['update', 'reset', 'output'], [1, 0, 2]) def make_cell(*args, **kwargs): return gru_cell.GRU(*args, linear_before_reset=linear_before_reset, **kwargs) def make_rnn(direction_offset): return cls._make_rnn_direction( input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens, pred_mh, init_net, input_size, hidden_size, 3, direction_offset, "_bias_i2h", "_bias_gates", "/i2h_w_pre", "/gates_t_w_pre", reform, make_cell, lambda x: x) elif n.op_type == 'LSTM': def reform(Bi, Br, W_, R_, name, hidden_size, init_net): # caffe2 has a different order from onnx. We need to rearrange # i o f c -> i f o c reforms = ((W_, 'i2h_w', True, [(0, -1)]), (R_, 'gates_t_w', True, [(0, -1)]), (Bi, 'i2h_b' , True, []), (Br, 'gates_t_b', True, [])) cls._rnn_reform_weights(reforms, name, hidden_size, init_net, ['input', 'output', 'forget', 'cell'], [0, 2, 1, 3]) def make_cell(*args, **kwargs): return rnn_cell.LSTM(*args, **kwargs) def make_rnn(direction_offset): return cls._make_rnn_direction( input_blob, B, W, R, [(initial_h, '/initial_h'), (initial_c, '/initial_c')], sequence_lens, pred_mh, init_net, input_size, hidden_size, 4, direction_offset, "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w", reform, make_cell, lambda x: [x[0], x[1], x[3]]) if direction == 'forward': outputs = make_rnn(0) # in the forward case, storage is shared between the # last outputs. We need to decouple them so that the # VariableLengthSequencePadding only mutates # n.outputs[0] for i in range(1, len(outputs)): pred_mh.net.Copy(outputs[i], n.outputs[i]) pred_mh.net = pred_mh.net.Clone( "dummy-clone-net", blob_remap={ outputs[0]: n.outputs[0] } ) elif direction == 'bidirectional': outputs_f = make_rnn(0) outputs_b = make_rnn(1) pred_mh.net.Concat([outputs_f[0], outputs_b[0]], [n.outputs[0], cls.dummy_name()], axis=2) for i in range(1, len(n.outputs)): pred_mh.net.Concat([outputs_f[i], outputs_b[i]], [n.outputs[i], cls.dummy_name()], axis=0) if sequence_lens is not None: pred_mh.net.VariableLengthSequencePadding( [n.outputs[0], sequence_lens], [n.outputs[0]]) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))