def make_rnn(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 2 * direction_offset * hidden_size init_net.Slice(B, name + "/i2h_b", starts=[bias_offset + 0 * hidden_size], ends=[bias_offset + 1 * hidden_size]) init_net.Slice(B, name + "/gates_t_b", starts=[bias_offset + 1 * hidden_size], ends=[bias_offset + 2 * hidden_size]) weight_offset = direction_offset * hidden_size init_net.Slice(W, name + '/i2h_w', starts=[weight_offset + 0 * hidden_size, 0], ends=[weight_offset + 1 * hidden_size, -1]) init_net.Slice(R, name + '/gates_t_w', starts=[weight_offset + 0 * hidden_size, 0], ends=[weight_offset + 1 * hidden_size, -1]) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends=[direction_offset + 1, -1, -1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = rnn_cell.BasicRNN( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=False, forward_only=True, activation=activation) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last
def _create_rnn(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert RNNs without access to the full model" assert pred_model is not None, "cannot convert RNNs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') activation = attrs.pop('activations')[0] assert not attrs, "unsupported RNN attributes: " + str(attrs.keys()) input_blob, W, R, B, sequence_lens, initial_h = n.inputs if sequence_lens == "": sequence_lens = None input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError( "best-effort shape inference for RNN input failed") name = dummy_name() init_net = core.Net("init-net") pred_mh = ModelHelper() # input and recurrence biases are squashed together in onnx but not in caffe2 Bi = name + "/i2h_b" Br = name + "/gates_t_b" init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[1 * hidden_size]) init_net.Slice(B, Br, starts=[1 * hidden_size], ends=[2 * hidden_size]) hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(pred_mh, input_blob, sequence_lens, [initial_h], input_size, hidden_size, name, drop_states=True, forward_only=True, activation=activation) init_net.Copy(W, name + '/i2h_w') init_net.Copy(R, name + '/gates_t_w') pred_mh.net = pred_mh.net.Clone("dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] }) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def make_cell(*args, **kwargs): return rnn_cell.BasicRNN(*args, activation=activation, **kwargs)
def test_basic_rnn(self, seed, seq_length, batch_size, input_size, hidden_size, drop_states, sequence_lengths, gc, dc): np.random.seed(seed) seq_lengths_data = np.random.randint(1, seq_length + 1, size=(batch_size, )).astype( np.int32) input_blob_data = np.random.randn(seq_length, batch_size, input_size).astype(np.float32) initial_h_data = np.random.randn(batch_size, hidden_size).astype(np.float32) gates_t_w_data = np.random.randn(hidden_size, hidden_size).astype(np.float32) gates_t_b_data = np.random.randn(hidden_size).astype(np.float32) i2h_w_data = np.random.randn(hidden_size, input_size).astype(np.float32) i2h_b_data = np.random.randn(hidden_size).astype(np.float32) with core.DeviceScope(gc): with hu.temp_workspace(): workspace.FeedBlob('input_blob', input_blob_data, device_option=gc) workspace.FeedBlob('seq_lengths', seq_lengths_data, device_option=gc) workspace.FeedBlob('initial_h', initial_h_data, device_option=gc) workspace.FeedBlob('basic_rnn/gates_t_w', gates_t_w_data, device_option=gc) workspace.FeedBlob('basic_rnn/gates_t_b', gates_t_b_data, device_option=gc) workspace.FeedBlob('basic_rnn/i2h_w', i2h_w_data, device_option=gc) workspace.FeedBlob('basic_rnn/i2h_b', i2h_b_data, device_option=gc) model = ModelHelper(name='model') hidden_t_all, _ = rnn_cell.BasicRNN( model, 'input_blob', 'seq_lengths' if sequence_lengths else None, ['initial_h'], input_size, hidden_size, "basic_rnn", activation='tanh', forward_only=True, drop_states=drop_states) workspace.RunNetOnce(model.net) result = workspace.FetchBlob(hidden_t_all) reference = basic_rnn_reference( input_blob_data, initial_h_data, i2h_w_data, i2h_b_data, gates_t_w_data, gates_t_b_data, seq_lengths_data if sequence_lengths else None, drop_states=drop_states, use_sequence_lengths=sequence_lengths) np.testing.assert_allclose(result, reference, atol=1e-4, rtol=1e-4)
def make_rnn(direction_offset): name = cls.dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 2 * direction_offset * hidden_size init_net.Slice(B, name + "/i2h_b", starts=[bias_offset + 0 * hidden_size], ends=[bias_offset + 1 * hidden_size]) init_net.Slice(B, name + "/gates_t_b", starts=[bias_offset + 1 * hidden_size], ends=[bias_offset + 2 * hidden_size]) weight_offset = direction_offset * hidden_size init_net.Slice(W, name + '/i2h_w', starts=[weight_offset + 0 * hidden_size, 0], ends=[weight_offset + 1 * hidden_size, -1]) init_net.Slice(R, name + '/gates_t_w', starts=[weight_offset + 0 * hidden_size, 0], ends=[weight_offset + 1 * hidden_size, -1]) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends=[direction_offset + 1, -1, -1]) if direction_offset == 1: if sequence_lens is not None: seq_lens_for_reverse = sequence_lens else: input_shape = pred_mh.net.Shape(input_blob, name + '/input_shape') batch_size = pred_mh.net.Slice(input_shape, name + '/batch_size_slice', starts=[1], ends=[2]) seq_len = pred_mh.net.Slice(input_shape, name + '/seq_len_slice', starts=[0], ends=[1]) dummy_sequence_lens = pred_mh.net.Tile( [seq_len, batch_size], name + '/dummy_sequence_lens', axis=0) pred_mh.net.Reshape( dummy_sequence_lens, [dummy_sequence_lens, cls.dummy_name()], shape=[-1]) seq_lens_for_reverse = dummy_sequence_lens if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, seq_lens_for_reverse], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last = rnn_cell.BasicRNN( pred_mh, input, sequence_lens, [initial_h_sliced], input_size, hidden_size, name, drop_states=False, forward_only=True, activation=activation) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, seq_lens_for_reverse], name + "/output-reversed") return hidden_t_all, hidden_t_last
def create_gru_unit(self, emb_ls, user_emb_ids, model, tag, seq_q, hid_q): (tag_layer, tag_in, tag_out) = tag emb_ls_str = [] for user_emb_id in user_emb_ids: emb_ls_str.append(emb_ls[user_emb_id]) tag_cat = tag_layer + ":::_rnn_inputs" tag_cat_info = tag_cat + "_info" rnn_inputs, info = model.net.Concat(emb_ls_str, [tag_cat, tag_cat_info]) rnn_shape = model.net.Reshape( rnn_inputs, [tag_layer + ":::rnn_shape", "old_shape"], shape=(len(user_emb_ids), -1, self.input_size)) gates_t_w_data = np.random.randn( self.args.hidden_size, self.args.hidden_size).astype(np.float32) gates_t_b_data = np.random.randn(self.args.hidden_size).astype( np.float32) i2h_w_data = np.random.randn(self.args.hidden_size, self.input_size).astype(np.float32) i2h_b_data = np.random.randn(self.args.hidden_size).astype(np.float32) workspace.FeedBlob('rnn_0/gates_t_w', gates_t_w_data) workspace.FeedBlob('rnn_0/gates_t_b', gates_t_b_data) workspace.FeedBlob('rnn_0/i2h_w', i2h_w_data) workspace.FeedBlob('rnn_0/i2h_b', i2h_b_data) if seq_q: model.net.DequeueBlobs(seq_q, "seq_lengths") if hid_q: model.net.DequeueBlobs(hid_q, "initial_h") rnn_0_out, _ = rnn_cell.BasicRNN(model, tag_layer + ":::rnn_shape", 'seq_lengths', ['initial_h'], self.input_size, self.args.hidden_size, "rnn_0", activation="tanh", forward_only=True) output = brew.fc(self.model, rnn_0_out, None, dim_in=self.args.hidden_size, dim_out=self.args.hidden_size, axis=2, engine=self.args.engine, max_num_tasks=self.args.fc_workers) output = brew.softmax(self.model, output, axis=2) output = brew.sum(self.model, rnn_0_out, output, axis=2) # TODO: Need to make input_h_data an input to the overall model due to # batch-size gates_t_w_data = np.random.randn( self.args.hidden_size, self.args.hidden_size).astype(np.float32) gates_t_b_data = np.random.randn(self.args.hidden_size).astype( np.float32) i2h_w_data = np.random.randn(self.args.hidden_size, self.args.hidden_size).astype(np.float32) i2h_b_data = np.random.randn(self.args.hidden_size).astype(np.float32) workspace.FeedBlob('rnn_1/gates_t_w', gates_t_w_data) workspace.FeedBlob('rnn_1/gates_t_b', gates_t_b_data) workspace.FeedBlob('rnn_1/i2h_w', i2h_w_data) workspace.FeedBlob('rnn_1/i2h_b', i2h_b_data) rnn_1_all_out, rnn_1_out = rnn_cell.BasicRNN(model, output, 'seq_lengths', ['initial_h'], self.args.hidden_size, self.args.hidden_size, "rnn_1", activation="tanh", forward_only=True) return rnn_1_out