Exemple #1
0
        def make_rnn(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 2 * direction_offset * hidden_size
            init_net.Slice(B,
                           name + "/i2h_b",
                           starts=[bias_offset + 0 * hidden_size],
                           ends=[bias_offset + 1 * hidden_size])
            init_net.Slice(B,
                           name + "/gates_t_b",
                           starts=[bias_offset + 1 * hidden_size],
                           ends=[bias_offset + 2 * hidden_size])

            weight_offset = direction_offset * hidden_size
            init_net.Slice(W,
                           name + '/i2h_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends=[weight_offset + 1 * hidden_size, -1])
            init_net.Slice(R,
                           name + '/gates_t_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends=[weight_offset + 1 * hidden_size, -1])

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h,
                           initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends=[direction_offset + 1, -1, -1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(
                pred_mh,
                input,
                sequence_lens, [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=False,
                forward_only=True,
                activation=activation)

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last
Exemple #2
0
    def _create_rnn(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs)  # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        activation = attrs.pop('activations')[0]
        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n,
                                              input_blob, W)
        if input_size is None:
            raise RuntimeError(
                "best-effort shape inference for RNN input failed")

        name = dummy_name()
        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        # input and recurrence biases are squashed together in onnx but not in caffe2
        Bi = name + "/i2h_b"
        Br = name + "/gates_t_b"
        init_net.Slice(B, Bi, starts=[0 * hidden_size], ends=[1 * hidden_size])
        init_net.Slice(B, Br, starts=[1 * hidden_size], ends=[2 * hidden_size])

        hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(pred_mh,
                                                        input_blob,
                                                        sequence_lens,
                                                        [initial_h],
                                                        input_size,
                                                        hidden_size,
                                                        name,
                                                        drop_states=True,
                                                        forward_only=True,
                                                        activation=activation)

        init_net.Copy(W, name + '/i2h_w')
        init_net.Copy(R, name + '/gates_t_w')

        pred_mh.net = pred_mh.net.Clone("dummy-clone-net",
                                        blob_remap={
                                            hidden_t_all: n.outputs[0],
                                            hidden_t_last: n.outputs[1]
                                        })

        return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Exemple #3
0
 def make_cell(*args, **kwargs):
     return rnn_cell.BasicRNN(*args,
                              activation=activation,
                              **kwargs)
    def test_basic_rnn(self, seed, seq_length, batch_size, input_size,
                       hidden_size, drop_states, sequence_lengths, gc, dc):
        np.random.seed(seed)

        seq_lengths_data = np.random.randint(1,
                                             seq_length + 1,
                                             size=(batch_size, )).astype(
                                                 np.int32)
        input_blob_data = np.random.randn(seq_length, batch_size,
                                          input_size).astype(np.float32)
        initial_h_data = np.random.randn(batch_size,
                                         hidden_size).astype(np.float32)
        gates_t_w_data = np.random.randn(hidden_size,
                                         hidden_size).astype(np.float32)
        gates_t_b_data = np.random.randn(hidden_size).astype(np.float32)
        i2h_w_data = np.random.randn(hidden_size,
                                     input_size).astype(np.float32)
        i2h_b_data = np.random.randn(hidden_size).astype(np.float32)

        with core.DeviceScope(gc):
            with hu.temp_workspace():
                workspace.FeedBlob('input_blob',
                                   input_blob_data,
                                   device_option=gc)
                workspace.FeedBlob('seq_lengths',
                                   seq_lengths_data,
                                   device_option=gc)
                workspace.FeedBlob('initial_h',
                                   initial_h_data,
                                   device_option=gc)
                workspace.FeedBlob('basic_rnn/gates_t_w',
                                   gates_t_w_data,
                                   device_option=gc)
                workspace.FeedBlob('basic_rnn/gates_t_b',
                                   gates_t_b_data,
                                   device_option=gc)
                workspace.FeedBlob('basic_rnn/i2h_w',
                                   i2h_w_data,
                                   device_option=gc)
                workspace.FeedBlob('basic_rnn/i2h_b',
                                   i2h_b_data,
                                   device_option=gc)

                model = ModelHelper(name='model')
                hidden_t_all, _ = rnn_cell.BasicRNN(
                    model,
                    'input_blob',
                    'seq_lengths' if sequence_lengths else None, ['initial_h'],
                    input_size,
                    hidden_size,
                    "basic_rnn",
                    activation='tanh',
                    forward_only=True,
                    drop_states=drop_states)

                workspace.RunNetOnce(model.net)

                result = workspace.FetchBlob(hidden_t_all)

        reference = basic_rnn_reference(
            input_blob_data,
            initial_h_data,
            i2h_w_data,
            i2h_b_data,
            gates_t_w_data,
            gates_t_b_data,
            seq_lengths_data if sequence_lengths else None,
            drop_states=drop_states,
            use_sequence_lengths=sequence_lengths)

        np.testing.assert_allclose(result, reference, atol=1e-4, rtol=1e-4)
Exemple #5
0
        def make_rnn(direction_offset):
            name = cls.dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 2 * direction_offset * hidden_size
            init_net.Slice(B,
                           name + "/i2h_b",
                           starts=[bias_offset + 0 * hidden_size],
                           ends=[bias_offset + 1 * hidden_size])
            init_net.Slice(B,
                           name + "/gates_t_b",
                           starts=[bias_offset + 1 * hidden_size],
                           ends=[bias_offset + 2 * hidden_size])

            weight_offset = direction_offset * hidden_size
            init_net.Slice(W,
                           name + '/i2h_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends=[weight_offset + 1 * hidden_size, -1])
            init_net.Slice(R,
                           name + '/gates_t_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends=[weight_offset + 1 * hidden_size, -1])

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h,
                           initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends=[direction_offset + 1, -1, -1])

            if direction_offset == 1:
                if sequence_lens is not None:
                    seq_lens_for_reverse = sequence_lens
                else:
                    input_shape = pred_mh.net.Shape(input_blob,
                                                    name + '/input_shape')
                    batch_size = pred_mh.net.Slice(input_shape,
                                                   name + '/batch_size_slice',
                                                   starts=[1],
                                                   ends=[2])
                    seq_len = pred_mh.net.Slice(input_shape,
                                                name + '/seq_len_slice',
                                                starts=[0],
                                                ends=[1])
                    dummy_sequence_lens = pred_mh.net.Tile(
                        [seq_len, batch_size],
                        name + '/dummy_sequence_lens',
                        axis=0)
                    pred_mh.net.Reshape(
                        dummy_sequence_lens,
                        [dummy_sequence_lens,
                         cls.dummy_name()],
                        shape=[-1])
                    seq_lens_for_reverse = dummy_sequence_lens

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, seq_lens_for_reverse],
                    name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(
                pred_mh,
                input,
                sequence_lens, [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=False,
                forward_only=True,
                activation=activation)

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, seq_lens_for_reverse],
                    name + "/output-reversed")

            return hidden_t_all, hidden_t_last
    def create_gru_unit(self, emb_ls, user_emb_ids, model, tag, seq_q, hid_q):
        (tag_layer, tag_in, tag_out) = tag

        emb_ls_str = []
        for user_emb_id in user_emb_ids:
            emb_ls_str.append(emb_ls[user_emb_id])

        tag_cat = tag_layer + ":::_rnn_inputs"
        tag_cat_info = tag_cat + "_info"
        rnn_inputs, info = model.net.Concat(emb_ls_str,
                                            [tag_cat, tag_cat_info])
        rnn_shape = model.net.Reshape(
            rnn_inputs, [tag_layer + ":::rnn_shape", "old_shape"],
            shape=(len(user_emb_ids), -1, self.input_size))

        gates_t_w_data = np.random.randn(
            self.args.hidden_size, self.args.hidden_size).astype(np.float32)
        gates_t_b_data = np.random.randn(self.args.hidden_size).astype(
            np.float32)
        i2h_w_data = np.random.randn(self.args.hidden_size,
                                     self.input_size).astype(np.float32)
        i2h_b_data = np.random.randn(self.args.hidden_size).astype(np.float32)

        workspace.FeedBlob('rnn_0/gates_t_w', gates_t_w_data)
        workspace.FeedBlob('rnn_0/gates_t_b', gates_t_b_data)
        workspace.FeedBlob('rnn_0/i2h_w', i2h_w_data)
        workspace.FeedBlob('rnn_0/i2h_b', i2h_b_data)

        if seq_q:
            model.net.DequeueBlobs(seq_q, "seq_lengths")
        if hid_q:
            model.net.DequeueBlobs(hid_q, "initial_h")

        rnn_0_out, _ = rnn_cell.BasicRNN(model,
                                         tag_layer + ":::rnn_shape",
                                         'seq_lengths', ['initial_h'],
                                         self.input_size,
                                         self.args.hidden_size,
                                         "rnn_0",
                                         activation="tanh",
                                         forward_only=True)

        output = brew.fc(self.model,
                         rnn_0_out,
                         None,
                         dim_in=self.args.hidden_size,
                         dim_out=self.args.hidden_size,
                         axis=2,
                         engine=self.args.engine,
                         max_num_tasks=self.args.fc_workers)

        output = brew.softmax(self.model, output, axis=2)
        output = brew.sum(self.model, rnn_0_out, output, axis=2)

        # TODO: Need to make input_h_data an input to the overall model due to
        # batch-size
        gates_t_w_data = np.random.randn(
            self.args.hidden_size, self.args.hidden_size).astype(np.float32)
        gates_t_b_data = np.random.randn(self.args.hidden_size).astype(
            np.float32)
        i2h_w_data = np.random.randn(self.args.hidden_size,
                                     self.args.hidden_size).astype(np.float32)
        i2h_b_data = np.random.randn(self.args.hidden_size).astype(np.float32)

        workspace.FeedBlob('rnn_1/gates_t_w', gates_t_w_data)
        workspace.FeedBlob('rnn_1/gates_t_b', gates_t_b_data)
        workspace.FeedBlob('rnn_1/i2h_w', i2h_w_data)
        workspace.FeedBlob('rnn_1/i2h_b', i2h_b_data)

        rnn_1_all_out, rnn_1_out = rnn_cell.BasicRNN(model,
                                                     output,
                                                     'seq_lengths',
                                                     ['initial_h'],
                                                     self.args.hidden_size,
                                                     self.args.hidden_size,
                                                     "rnn_1",
                                                     activation="tanh",
                                                     forward_only=True)

        return rnn_1_out