コード例 #1
0
    def test_crf_viterbi(self, num_tags, num_words):
        model = CNNModelHelper(name='external')
        predictions = np.random.randn(num_words, num_tags).astype(np.float32)
        transitions = np.random.uniform(low=-1,
                                        high=1,
                                        size=(num_tags + 2,
                                              num_tags + 2)).astype(np.float32)
        predictions_blob, transitions_blob = (model.net.AddExternalInputs(
            'predictions', 'crf_transitions'))
        workspace.FeedBlob(str(transitions_blob), transitions)
        workspace.FeedBlob(str(predictions_blob), predictions)
        crf_layer = crf.CRFWithLoss(model, num_tags, transitions_blob)

        updated_predictions = crf_update_predictions(model, crf_layer,
                                                     predictions_blob)
        ref_predictions = crf_layer.update_predictions(predictions_blob)

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)

        updated_predictions = workspace.FetchBlob(str(updated_predictions))
        ref_predictions = workspace.FetchBlob(str(ref_predictions))
        np.testing.assert_allclose(updated_predictions,
                                   ref_predictions,
                                   atol=1e-4,
                                   rtol=1e-4,
                                   err_msg='Mismatch in CRF predictions')
コード例 #2
0
ファイル: crf_test.py プロジェクト: GeekLiB/caffe2-master
    def test_crf_gradient(self, num_tags, num_words):
        base_model = CNNModelHelper(name='base_model')
        transitions = np.random.randn(num_tags + 2,
                                      num_tags + 2).astype(np.float32)
        predictions = np.random.randn(num_words, 1,
                                      num_tags + 2).astype(np.float32)
        initial = np.random.randn(1, num_tags + 2).astype(np.float32)
        predictions_blob, transitions_blob, initial_blob = (
            base_model.net.AddExternalInputs('predictions_blob',
                                             'crf_transitions', 'inital_blob'))

        workspace.FeedBlob(str(predictions_blob), predictions)
        workspace.FeedBlob(str(transitions_blob), transitions)
        workspace.FeedBlob(str(initial_blob), initial)

        crf_layer = crf.CRFWithLoss(base_model, num_tags, transitions_blob)
        crf_layer.build_crf_net(predictions_blob, initial_blob,
                                transitions_blob)
        op = base_model.net._net.op[-1]
        workspace.RunNetOnce(base_model.param_init_net)
        gradients_to_check = (index
                              for (index, input_name) in enumerate(op.input)
                              if input_name != "crf_net/zero_segment_id")

        inputs = [workspace.FetchBlob(name) for name in op.input]
        for param in gradients_to_check:
            self.assertGradientChecks(
                device_option=hu.cpu_do,
                op=op,
                inputs=inputs,
                outputs_to_check=param,
                outputs_with_grads=[1],
                threshold=0.05,
                stepsize=0.001,
            )
コード例 #3
0
    def lstm(self, create_lstm, t, n, d, ref, outputs_with_grads):
        model = CNNModelHelper(name='external')
        input_blob, seq_lengths, hidden_init, cell_init = (
            model.net.AddExternalInputs('input_blob', 'seq_lengths',
                                        'hidden_init', 'cell_init'))

        create_lstm(model,
                    input_blob,
                    seq_lengths, (hidden_init, cell_init),
                    d,
                    d,
                    scope="external/recurrent",
                    outputs_with_grads=outputs_with_grads)

        op = model.net._net.op[-1]

        workspace.RunNetOnce(model.param_init_net)
        input_blob = op.input[0]

        def generate_random_state(n, d):
            ndim = int(np.random.choice(3, 1)) + 1
            if ndim == 1:
                return np.random.randn(1, n, d).astype(np.float32)
            random_state = np.random.randn(n, d).astype(np.float32)
            if ndim == 3:
                random_state = random_state.reshape([1, n, d])
            return random_state

        workspace.FeedBlob(str(input_blob),
                           np.random.randn(t, n, d * 4).astype(np.float32))
        workspace.FeedBlob("hidden_init", generate_random_state(n, d))
        workspace.FeedBlob("cell_init", generate_random_state(n, d))
        workspace.FeedBlob(
            "seq_lengths",
            np.random.randint(1, t + 1, size=(n, )).astype(np.int32))
        inputs = [workspace.FetchBlob(name) for name in op.input]

        print(op.input)
        print(inputs)

        self.assertReferenceChecks(
            hu.cpu_do,
            op,
            inputs,
            ref,
            outputs_to_check=range(4),
        )

        # Checking for input, gates_t_w and gates_t_b gradients
        for param in range(5):
            self.assertGradientChecks(
                device_option=hu.cpu_do,
                op=op,
                inputs=inputs,
                outputs_to_check=param,
                outputs_with_grads=outputs_with_grads,
                threshold=0.01,
                stepsize=0.005,
            )
コード例 #4
0
    def test_cnn_model_helper_deprecated(self):
        X = np.random.rand(64, 32, 32, 3).astype(np.float32) - 0.5

        workspace.FeedBlob("x", X)
        # CNNModelHelper is going to be deprecated soon. This test is only
        # covering some CNNModelHelper logic
        model = CNNModelHelper(name="test_model", order='NHWC')
        self.assertEqual(model.arg_scope['order'], 'NHWC')
コード例 #5
0
    def test_lstm_new(self, t, n, d):
        model = CNNModelHelper(name='external')

        def create_lstm(
                model, input_blob, seq_lengths, init, dim_in, dim_out, scope):
            recurrent.LSTM(
                model, input_blob, seq_lengths, init,
                dim_in, dim_out, scope="external/recurrent")

        self.lstm(model, create_lstm, t, n, d, lstm_reference,
                  gradients_to_check=[0, 1, 2, 3, 4],
                  outputs_to_check=[0, 1, 2, 3])
コード例 #6
0
ファイル: crf_test.py プロジェクト: GeekLiB/caffe2-master
 def test_crf_with_loss_op(self, num_tags, num_words):
     model = CNNModelHelper(name='external')
     embeddings_dim = 200
     embeddings = np.random.randn(num_words,
                                  embeddings_dim).astype(np.float32)
     transitions = np.random.uniform(low=-1,
                                     high=1,
                                     size=(num_tags + 2,
                                           num_tags + 2)).astype(np.float32)
     labels = np.random.randint(num_tags, size=(num_words)).astype(np.int64)
     embeddings_blob, labels_blob, transitions_blob = (
         model.net.AddExternalInputs('embeddings_blob', 'labels_blob',
                                     'crf_transitions'))
     workspace.FeedBlob(str(embeddings_blob), embeddings)
     workspace.FeedBlob(str(labels_blob), labels)
     workspace.FeedBlob(str(transitions_blob), transitions)
     predictions_blob = model.FC(embeddings_blob, "fc_0", embeddings_dim,
                                 num_tags, ('UniformFill', {
                                     'min': -1.0
                                 }, {
                                     'max': 1.0
                                 }), ('UniformFill', {
                                     'min': -1.0
                                 }, {
                                     'max': 1.0
                                 }))
     crf_layer = crf.CRFWithLoss(model, num_tags, transitions_blob)
     crf_loss = crf_layer.crf_loss(predictions_blob, labels_blob)
     model.net.AddGradientOperators([crf_loss])
     workspace.RunNetOnce(model.param_init_net)
     workspace.RunNetOnce(model.net)
     loss = workspace.FetchBlob(str(crf_loss))
     predictions = workspace.FetchBlob(str(predictions_blob))
     np.testing.assert_allclose(
         loss,
         self._compute_loss_manual(predictions, num_tags, labels,
                                   transitions),
         atol=0.001,
         rtol=0.001,
         err_msg='CRF LOSS is not matching the reference')
コード例 #7
0
    def test_lstm_old(self, t, n, d):
        model = CNNModelHelper(name='external')

        def create_lstm(
                model, input_blob, seq_lengths, init, dim_in, dim_out, scope):
            model.LSTM(
                input_blob, seq_lengths, init,
                dim_in, dim_out, scope="external/recurrent")

        # CNNModelHelper.LSTM returns only 3 outputs. But the operator itself
        # returns 5. We ignore the rest.
        self.lstm(model, create_lstm, t, n, d, old_lstm_reference,
                  gradients_to_check=[0, 2, 3, 4, 5],
                  outputs_to_check=[0, 3, 4])
コード例 #8
0
    def test_milstm(self, t, n, d):
        for outputs_with_grads in [[0], [1], [0, 1, 2, 3]]:
            model = CNNModelHelper(name='external')

            def create_milstm(
                    model, input_blob, seq_lengths,
                    init, dim_in, dim_out, scope):
                recurrent.MILSTM(
                    model, input_blob, seq_lengths, init,
                    dim_in, dim_out, scope="external/recurrent",
                    outputs_with_grads=outputs_with_grads)

            self.lstm(model, create_milstm, t, n, d, milstm_reference,
                      gradients_to_check=[0, 1, 2, 3, 4],
                      outputs_to_check=[0, 1, 2, 3],
                      outputs_with_grads=outputs_with_grads)
コード例 #9
0
 def apply_over_sequence(
     self,
     model,
     inputs,
     seq_lengths,
     initial_states,
     outputs_with_grads=None,
 ):
     preprocessed_inputs = self.prepare_input(model, inputs)
     step_model = CNNModelHelper(name=self.name, param_model=model)
     input_t, timestep = step_model.net.AddScopedExternalInputs(
         'input_t',
         'timestep',
     )
     states_prev = step_model.net.AddScopedExternalInputs(*[
         s + '_prev' for s in self.get_state_names()
     ])
     states = self._apply(
         model=step_model,
         input_t=input_t,
         seq_lengths=seq_lengths,
         states=states_prev,
         timestep=timestep,
     )
     if outputs_with_grads is None:
         outputs_with_grads = self.get_outputs_with_grads()
     # states_for_all_steps consits of combination of
     # states gather for all steps and final states. It looks like this:
     # (state_1_all, state_1_final, state_2_all, state_2_final, ...)
     states_for_all_steps = recurrent.recurrent_net(
         net=model.net,
         cell_net=step_model.net,
         inputs=[(input_t, preprocessed_inputs)],
         initial_cell_inputs=zip(states_prev, initial_states),
         links=dict(zip(states_prev, states)),
         timestep=timestep,
         scope=self.name,
         outputs_with_grads=outputs_with_grads,
         recompute_blobs_on_backward=self.recompute_blobs,
         forward_only=self.forward_only,
     )
     output = self._prepare_output_sequence(
         model,
         states_for_all_steps,
         outputs_with_grads,
     )
     return output, states_for_all_steps
コード例 #10
0
ファイル: rnn_cell_test.py プロジェクト: zbxzc35/caffe2
def _prepare_lstm(t, n, d, create_lstm, outputs_with_grads, memory_optim,
                  forget_bias, forward_only, drop_states):
    print("Dims: ", t, n, d)

    model = CNNModelHelper(name='external')
    input_blob, seq_lengths, hidden_init, cell_init = (
        model.net.AddExternalInputs('input_blob', 'seq_lengths', 'hidden_init',
                                    'cell_init'))

    create_lstm(
        model,
        input_blob,
        seq_lengths,
        (hidden_init, cell_init),
        d,
        d,
        scope="external/recurrent",
        outputs_with_grads=outputs_with_grads,
        memory_optimization=memory_optim,
        forget_bias=forget_bias,
        forward_only=forward_only,
        drop_states=drop_states,
    )

    workspace.RunNetOnce(model.param_init_net)

    def generate_random_state(n, d):
        ndim = int(np.random.choice(3, 1)) + 1
        if ndim == 1:
            return np.random.randn(1, n, d).astype(np.float32)
        random_state = np.random.randn(n, d).astype(np.float32)
        if ndim == 3:
            random_state = random_state.reshape([1, n, d])
        return random_state

    workspace.FeedBlob("hidden_init", generate_random_state(n, d))
    workspace.FeedBlob("cell_init", generate_random_state(n, d))
    workspace.FeedBlob(
        "seq_lengths",
        np.random.randint(1, t + 1, size=(n, )).astype(np.int32))

    return model.net
コード例 #11
0
ファイル: rnn_cell.py プロジェクト: zlbing/caffe2
 def apply_over_sequence(
     self,
     model,
     inputs,
     seq_lengths,
     initial_states,
     outputs_with_grads=None,
 ):
     preprocessed_inputs = self.prepare_input(model, inputs)
     step_model = CNNModelHelper(name=self.name, param_model=model)
     input_t, timestep = step_model.net.AddScopedExternalInputs(
         'input_t',
         'timestep',
     )
     states_prev = step_model.net.AddScopedExternalInputs(
         *[s + '_prev' for s in self.get_state_names()])
     states = self._apply(
         model=step_model,
         input_t=input_t,
         seq_lengths=seq_lengths,
         states=states_prev,
         timestep=timestep,
     )
     return recurrent.recurrent_net(
         net=model.net,
         cell_net=step_model.net,
         inputs=[(input_t, preprocessed_inputs)],
         initial_cell_inputs=zip(states_prev, initial_states),
         links=dict(zip(states_prev, states)),
         timestep=timestep,
         scope=self.name,
         outputs_with_grads=(outputs_with_grads
                             if outputs_with_grads is not None else
                             self.get_outputs_with_grads()),
         recompute_blobs_on_backward=self.recompute_blobs,
         forward_only=self.forward_only,
     )
コード例 #12
0
def MILSTM(model,
           input_blob,
           seq_lengths,
           initial_states,
           dim_in,
           dim_out,
           scope,
           outputs_with_grads=(0, ),
           memory_optimization=False,
           forget_bias=0.0):
    '''
    Adds MI flavor of standard LSTM recurrent network operator to a model.
    See https://arxiv.org/pdf/1606.06630.pdf

    model: CNNModelHelper object new operators would be added to

    input_blob: the input sequence in a format T x N x D
    where T is sequence size, N - batch size and D - input dimention

    seq_lengths: blob containing sequence lengths which would be passed to
    LSTMUnit operator

    initial_states: a tupple of (hidden_input_blob, cell_input_blob)
    which are going to be inputs to the cell net on the first iteration

    dim_in: input dimention

    dim_out: output dimention

    outputs_with_grads : position indices of output blobs which will receive
    external error gradient during backpropagation

    memory_optimization: if enabled, the LSTM step is recomputed on backward step
                   so that we don't need to store forward activations for each
                   timestep. Saves memory with cost of computation.
    '''
    def s(name):
        # We have to manually scope due to our internal/external blob
        # relationships.
        return "{}/{}".format(str(scope), str(name))

    """ initial bulk fully-connected """
    input_blob = model.FC(input_blob,
                          s('i2h'),
                          dim_in=dim_in,
                          dim_out=4 * dim_out,
                          axis=2)
    """ the step net """
    step_model = CNNModelHelper(name='milstm_cell', param_model=model)
    input_t, timestep, cell_t_prev, hidden_t_prev = (
        step_model.net.AddScopedExternalInputs('input_t', 'timestep',
                                               'cell_t_prev', 'hidden_t_prev'))
    # hU^T
    # Shape: [1, batch_size, 4 * hidden_size]
    prev_t = step_model.FC(hidden_t_prev,
                           s('prev_t'),
                           dim_in=dim_out,
                           dim_out=4 * dim_out,
                           axis=2)
    # defining MI parameters
    alpha = step_model.param_init_net.ConstantFill([], [s('alpha')],
                                                   shape=[4 * dim_out],
                                                   value=1.0)
    beta1 = step_model.param_init_net.ConstantFill([], [s('beta1')],
                                                   shape=[4 * dim_out],
                                                   value=1.0)
    beta2 = step_model.param_init_net.ConstantFill([], [s('beta2')],
                                                   shape=[4 * dim_out],
                                                   value=1.0)
    b = step_model.param_init_net.ConstantFill([], [s('b')],
                                               shape=[4 * dim_out],
                                               value=0.0)
    model.params.extend([alpha, beta1, beta2, b])
    # alpha * (xW^T * hU^T)
    # Shape: [1, batch_size, 4 * hidden_size]
    alpha_tdash = step_model.net.Mul([prev_t, input_t], s('alpha_tdash'))
    # Shape: [batch_size, 4 * hidden_size]
    alpha_tdash_rs, _ = step_model.net.Reshape(
        alpha_tdash,
        [s('alpha_tdash_rs'), s('alpha_tdash_old_shape')],
        shape=[-1, 4 * dim_out],
    )
    alpha_t = step_model.net.Mul([alpha_tdash_rs, alpha],
                                 s('alpha_t'),
                                 broadcast=1,
                                 use_grad_hack=1)
    # beta1 * hU^T
    # Shape: [batch_size, 4 * hidden_size]
    prev_t_rs, _ = step_model.net.Reshape(
        prev_t,
        [s('prev_t_rs'), s('prev_t_old_shape')],
        shape=[-1, 4 * dim_out],
    )
    beta1_t = step_model.net.Mul([prev_t_rs, beta1],
                                 s('beta1_t'),
                                 broadcast=1,
                                 use_grad_hack=1)
    # beta2 * xW^T
    # Shape: [batch_szie, 4 * hidden_size]
    input_t_rs, _ = step_model.net.Reshape(
        input_t,
        [s('input_t_rs'), s('input_t_old_shape')],
        shape=[-1, 4 * dim_out],
    )
    beta2_t = step_model.net.Mul([input_t_rs, beta2],
                                 s('beta2_t'),
                                 broadcast=1,
                                 use_grad_hack=1)
    # Add 'em all up
    gates_tdash = step_model.net.Sum([alpha_t, beta1_t, beta2_t],
                                     s('gates_tdash'))
    gates_t = step_model.net.Add([gates_tdash, b],
                                 s('gates_t'),
                                 broadcast=1,
                                 use_grad_hack=1)
    # # Shape: [1, batch_size, 4 * hidden_size]
    gates_t_rs, _ = step_model.net.Reshape(
        gates_t,
        [s('gates_t_rs'), s('gates_t_old_shape')],
        shape=[1, -1, 4 * dim_out],
    )

    hidden_t, cell_t = step_model.net.LSTMUnit(
        [hidden_t_prev, cell_t_prev, gates_t_rs, seq_lengths, timestep],
        [s('hidden_t'), s('cell_t')],
        forget_bias=forget_bias,
    )
    step_model.net.AddExternalOutputs(cell_t, hidden_t)
    """ recurrent network """
    (hidden_input_blob, cell_input_blob) = initial_states
    output, last_output, all_states, last_state = recurrent_net(
        net=model.net,
        cell_net=step_model.net,
        inputs=[(input_t, input_blob)],
        initial_cell_inputs=[
            (hidden_t_prev, hidden_input_blob),
            (cell_t_prev, cell_input_blob),
        ],
        links={
            hidden_t_prev: hidden_t,
            cell_t_prev: cell_t,
        },
        timestep=timestep,
        scope=scope,
        outputs_with_grads=outputs_with_grads,
        recompute_blobs_on_backward=[gates_t] if memory_optimization else None)
    return output, last_output, all_states, last_state
コード例 #13
0
def LSTMWithAttention(
    model,
    decoder_inputs,
    decoder_input_lengths,
    initial_decoder_hidden_state,
    initial_decoder_cell_state,
    initial_attention_weighted_encoder_context,
    encoder_output_dim,
    encoder_outputs,
    decoder_input_dim,
    decoder_state_dim,
    scope,
    attention_type=AttentionType.Regular,
    outputs_with_grads=(0, 4),
    weighted_encoder_outputs=None,
    lstm_memory_optimization=False,
    attention_memory_optimization=False,
    forget_bias=0.0,
):
    '''
    Adds a LSTM with attention mechanism to a model.

    The implementation is based on https://arxiv.org/abs/1409.0473, with
    a small difference in the order
    how we compute new attention context and new hidden state, similarly to
    https://arxiv.org/abs/1508.04025.

    The model uses encoder-decoder naming conventions,
    where the decoder is the sequence the op is iterating over,
    while computing the attention context over the encoder.

    model: CNNModelHelper object new operators would be added to

    decoder_inputs: the input sequence in a format T x N x D
    where T is sequence size, N - batch size and D - input dimention

    decoder_input_lengths: blob containing sequence lengths
    which would be passed to LSTMUnit operator

    initial_decoder_hidden_state: initial hidden state of LSTM

    initial_decoder_cell_state: initial cell state of LSTM

    initial_attention_weighted_encoder_context: initial attention context

    encoder_output_dim: dimension of encoder outputs

    encoder_outputs: the sequence, on which we compute the attention context
    at every iteration

    decoder_input_dim: input dimention (last dimension on decoder_inputs)

    decoder_state_dim: size of hidden states of LSTM

    attention_type: One of: AttentionType.Regular, AttentionType.Recurrent.
    Determines which type of attention mechanism to use.

    outputs_with_grads : position indices of output blobs which will receive
    external error gradient during backpropagation

    weighted_encoder_outputs: encoder outputs to be used to compute attention
    weights. In the basic case it's just linear transformation of
    encoder outputs (that the default, when weighted_encoder_outputs is None).
    However, it can be something more complicated - like a separate
    encoder network (for example, in case of convolutional encoder)

    lstm_memory_optimization: recompute LSTM activations on backward pass, so
                 we don't need to store their values in forward passes

    attention_memory_optimization: recompute attention for backward pass
    '''
    def s(name):
        # We have to manually scope due to our internal/external blob
        # relationships.
        return "{}/{}".format(str(scope), str(name))

    decoder_inputs = model.FC(
        decoder_inputs,
        s('i2h'),
        dim_in=decoder_input_dim,
        dim_out=4 * decoder_state_dim,
        axis=2,
    )
    # [batch_size, encoder_output_dim, encoder_length]
    encoder_outputs_transposed = model.Transpose(
        encoder_outputs,
        s('encoder_outputs_transposed'),
        axes=[1, 2, 0],
    )
    if weighted_encoder_outputs is None:
        weighted_encoder_outputs = model.FC(
            encoder_outputs,
            s('weighted_encoder_outputs'),
            dim_in=encoder_output_dim,
            dim_out=encoder_output_dim,
            axis=2,
        )
    step_model = CNNModelHelper(
        name='lstm_with_attention_cell',
        param_model=model,
    )
    (
        input_t,
        timestep,
        cell_t_prev,
        hidden_t_prev,
        attention_weighted_encoder_context_t_prev,
    ) = (step_model.net.AddScopedExternalInputs(
        'input_t',
        'timestep',
        'cell_t_prev',
        'hidden_t_prev',
        'attention_weighted_encoder_context_t_prev',
    ))
    step_model.net.AddExternalInputs(encoder_outputs_transposed,
                                     weighted_encoder_outputs)

    gates_concatenated_input_t, _ = step_model.net.Concat(
        [hidden_t_prev, attention_weighted_encoder_context_t_prev],
        [
            s('gates_concatenated_input_t'),
            s('_gates_concatenated_input_t_concat_dims'),
        ],
        axis=2,
    )
    gates_t = step_model.FC(
        gates_concatenated_input_t,
        s('gates_t'),
        dim_in=decoder_state_dim + encoder_output_dim,
        dim_out=4 * decoder_state_dim,
        axis=2,
    )
    step_model.net.Sum([gates_t, input_t], gates_t)

    hidden_t_intermediate, cell_t = step_model.net.LSTMUnit(
        [hidden_t_prev, cell_t_prev, gates_t, decoder_input_lengths, timestep],
        ['hidden_t_intermediate', s('cell_t')],
        forget_bias=forget_bias,
    )
    if attention_type == AttentionType.Recurrent:
        attention_weighted_encoder_context_t, _, attention_blobs = apply_recurrent_attention(
            model=step_model,
            encoder_output_dim=encoder_output_dim,
            encoder_outputs_transposed=encoder_outputs_transposed,
            weighted_encoder_outputs=weighted_encoder_outputs,
            decoder_hidden_state_t=hidden_t_intermediate,
            decoder_hidden_state_dim=decoder_state_dim,
            scope=scope,
            attention_weighted_encoder_context_t_prev=(
                attention_weighted_encoder_context_t_prev),
        )
    else:
        attention_weighted_encoder_context_t, _, attention_blobs = apply_regular_attention(
            model=step_model,
            encoder_output_dim=encoder_output_dim,
            encoder_outputs_transposed=encoder_outputs_transposed,
            weighted_encoder_outputs=weighted_encoder_outputs,
            decoder_hidden_state_t=hidden_t_intermediate,
            decoder_hidden_state_dim=decoder_state_dim,
            scope=scope,
        )
    hidden_t = step_model.Copy(hidden_t_intermediate, s('hidden_t'))
    step_model.net.AddExternalOutputs(
        cell_t,
        hidden_t,
        attention_weighted_encoder_context_t,
    )
    recompute_blobs = []
    if attention_memory_optimization:
        recompute_blobs.extend(attention_blobs)
    if lstm_memory_optimization:
        recompute_blobs.extend([gates_t])

    return recurrent_net(
        net=model.net,
        cell_net=step_model.net,
        inputs=[
            (input_t, decoder_inputs),
        ],
        initial_cell_inputs=[
            (hidden_t_prev, initial_decoder_hidden_state),
            (cell_t_prev, initial_decoder_cell_state),
            (
                attention_weighted_encoder_context_t_prev,
                initial_attention_weighted_encoder_context,
            ),
        ],
        links={
            hidden_t_prev:
            hidden_t,
            cell_t_prev:
            cell_t,
            attention_weighted_encoder_context_t_prev:
            (attention_weighted_encoder_context_t),
        },
        timestep=timestep,
        scope=scope,
        outputs_with_grads=outputs_with_grads,
        recompute_blobs_on_backward=recompute_blobs,
    )
コード例 #14
0
def LSTM(model,
         input_blob,
         seq_lengths,
         initial_states,
         dim_in,
         dim_out,
         scope,
         outputs_with_grads=(0, ),
         return_params=False,
         memory_optimization=False,
         forget_bias=0.0):
    '''
    Adds a standard LSTM recurrent network operator to a model.

    model: CNNModelHelper object new operators would be added to

    input_blob: the input sequence in a format T x N x D
    where T is sequence size, N - batch size and D - input dimention

    seq_lengths: blob containing sequence lengths which would be passed to
    LSTMUnit operator

    initial_states: a tupple of (hidden_input_blob, cell_input_blob)
    which are going to be inputs to the cell net on the first iteration

    dim_in: input dimention

    dim_out: output dimention

    outputs_with_grads : position indices of output blobs which will receive
    external error gradient during backpropagation

    return_params: if True, will return a dictionary of parameters of the LSTM

    memory_optimization: if enabled, the LSTM step is recomputed on backward step
                   so that we don't need to store forward activations for each
                   timestep. Saves memory with cost of computation.
    '''
    def s(name):
        # We have to manually scope due to our internal/external blob
        # relationships.
        return "{}/{}".format(str(scope), str(name))

    """ initial bulk fully-connected """
    input_blob = model.FC(input_blob,
                          s('i2h'),
                          dim_in=dim_in,
                          dim_out=4 * dim_out,
                          axis=2)
    """ the step net """
    step_model = CNNModelHelper(name='lstm_cell', param_model=model)
    input_t, timestep, cell_t_prev, hidden_t_prev = (
        step_model.net.AddScopedExternalInputs('input_t', 'timestep',
                                               'cell_t_prev', 'hidden_t_prev'))
    gates_t = step_model.FC(hidden_t_prev,
                            s('gates_t'),
                            dim_in=dim_out,
                            dim_out=4 * dim_out,
                            axis=2)
    step_model.net.Sum([gates_t, input_t], gates_t)
    hidden_t, cell_t = step_model.net.LSTMUnit(
        [hidden_t_prev, cell_t_prev, gates_t, seq_lengths, timestep],
        [s('hidden_t'), s('cell_t')],
        forget_bias=forget_bias,
    )
    step_model.net.AddExternalOutputs(cell_t, hidden_t)
    """ recurrent network """
    (hidden_input_blob, cell_input_blob) = initial_states
    output, last_output, all_states, last_state = recurrent_net(
        net=model.net,
        cell_net=step_model.net,
        inputs=[(input_t, input_blob)],
        initial_cell_inputs=[
            (hidden_t_prev, hidden_input_blob),
            (cell_t_prev, cell_input_blob),
        ],
        links={
            hidden_t_prev: hidden_t,
            cell_t_prev: cell_t,
        },
        timestep=timestep,
        scope=scope,
        outputs_with_grads=outputs_with_grads,
        recompute_blobs_on_backward=[gates_t] if memory_optimization else None)
    if return_params:
        params = {
            'input': {
                'weights': input_blob + "_w",
                'biases': input_blob + '_b'
            },
            'recurrent': {
                'weights': gates_t + "_w",
                'biases': gates_t + '_b'
            }
        }
        return output, last_output, all_states, last_state, params
    else:
        return output, last_output, all_states, last_state
コード例 #15
0
ファイル: recurrent.py プロジェクト: yalechang/caffe2
def LSTM(model,
         input_blob,
         seq_lengths,
         initial_states,
         dim_in,
         dim_out,
         scope,
         outputs_with_grads=(0, )):
    '''
    Adds a standard LSTM recurrent network operator to a model.

    model: CNNModelHelper object new operators would be added to

    input_blob: the input sequence in a format T x N x D
    where T is sequence size, N - batch size and D - input dimention

    seq_lengths: blob containing sequence lengths which would be passed to
    LSTMUnit operator

    initial_states: a tupple of (hidden_input_blob, cell_input_blob)
    which are going to be inputs to the cell net on the first iteration

    dim_in: input dimention

    dim_out: output dimention

    outputs_with_grads : position indices of output blobs which will receive
    external error gradient during backpropagation
    '''
    def s(name):
        # We have to manually scope due to our internal/external blob
        # relationships.
        return "{}/{}".format(str(scope), str(name))

    """ initial bulk fully-connected """
    input_blob = model.FC(input_blob,
                          s('i2h'),
                          dim_in=dim_in,
                          dim_out=4 * dim_out,
                          axis=2)
    """ the step net """
    step_model = CNNModelHelper(name='lstm_cell', param_model=model)
    input_t, timestep, cell_t_prev, hidden_t_prev = (
        step_model.net.AddScopedExternalInputs('input_t', 'timestep',
                                               'cell_t_prev', 'hidden_t_prev'))
    gates_t = step_model.FC(hidden_t_prev,
                            s('gates_t'),
                            dim_in=dim_out,
                            dim_out=4 * dim_out,
                            axis=2)
    step_model.net.Sum([gates_t, input_t], gates_t)
    hidden_t, cell_t = step_model.net.LSTMUnit(
        [hidden_t_prev, cell_t_prev, gates_t, seq_lengths, timestep],
        [s('hidden_t'), s('cell_t')],
    )
    step_model.net.AddExternalOutputs(cell_t, hidden_t)
    """ recurrent network """
    (hidden_input_blob, cell_input_blob) = initial_states
    output, last_output, all_states, last_state = recurrent_net(
        net=model.net,
        cell_net=step_model.net,
        inputs=[(input_t, input_blob)],
        initial_cell_inputs=[
            (hidden_t_prev, hidden_input_blob),
            (cell_t_prev, cell_input_blob),
        ],
        links={
            hidden_t_prev: hidden_t,
            cell_t_prev: cell_t,
        },
        timestep=timestep,
        scope=scope,
        outputs_with_grads=outputs_with_grads,
    )
    return output, last_output, all_states, last_state
コード例 #16
0
ファイル: crf.py プロジェクト: zlbing/caffe2
    def build_crf_net(self, input_blob, initial_state, transitions):
        '''
            Adds the crf_net recurrent operator to the model.

            model: CNNModelHelper object new operators would be added to

            input_blob: the input sequence in a format T x N x D
            where T is sequence size, N - batch size and D - input dimention
            ##Only supports batch-size 1##

            seq_lengths: blob containing sequence lengths (unused)
            '''

        scope = 'crf_net'

        def s(name):
            ''
            # We have to manually scope due to our internal/external blob
            # relationships.
            return "{}/{}".format(str(scope), str(name))

        step_model = CNNModelHelper(name='crf_step', param_model=self.model)
        input_t, cell_t_prev, _ = (step_model.net.AddExternalInputs(
            'input_t', 'cell_t_prev', transitions))
        zero_segment_id = step_model.param_init_net.ConstantFill(
            [],
            [s('zero_segment_id')],
            value=0,
            shape=[self.num_classes_padded],
            dtype=core.DataType.INT32,
        )

        # A hack to bypass model cloning for test
        step_model.param_init_net.AddExternalOutput(zero_segment_id)
        """ the CRF step """
        # Do tile
        prev_transpose = step_model.Transpose(
            cell_t_prev,
            [s('prev_transpose')],
            axes=(0, 2, 1),
        )
        prev_tiled = step_model.net.Tile(
            prev_transpose,
            [s('prev_tiled')],
            tiles=self.num_classes_padded,
            axis=2,
        )
        input_t_tiled = step_model.net.Tile(
            input_t,
            [s('input_t_tiled')],
            tiles=self.num_classes_padded,
            axis=1,
        )
        input_with_prev = step_model.net.Add([prev_tiled, input_t_tiled],
                                             [s('input_with_prev')])
        all_with_transitions = step_model.net.Add(
            [input_with_prev, transitions],
            [s('prev_with_transitions')],
            broadcast=1,
            use_grad_hack=1,
        )
        all_with_transitions_reshaped, _ = step_model.net.Reshape(
            all_with_transitions, [
                s('all_with_transitions_reshaped'),
                s('all_with_transitions_orig')
            ],
            shape=(self.num_classes_padded, self.num_classes_padded))
        cell_t = step_model.net.SortedSegmentRangeLogSumExp(
            [all_with_transitions_reshaped, zero_segment_id],
            [s('cell_t')],
        )
        step_model.net.AddExternalOutputs(cell_t)
        """ recurrent network """
        cell_input_blob = initial_state
        out_all, out_last = recurrent.recurrent_net(net=self.model.net,
                                                    cell_net=step_model.net,
                                                    inputs=[(input_t,
                                                             input_blob)],
                                                    initial_cell_inputs=[
                                                        (cell_t_prev,
                                                         cell_input_blob),
                                                    ],
                                                    links={
                                                        cell_t_prev: cell_t,
                                                    },
                                                    scope=scope,
                                                    outputs_with_grads=(1, ))
        return out_last
コード例 #17
0
        with open(args.params, 'r') as f:
            init_def.ParseFromString(f.read())
            init_def.device_option.CopyFrom(device_opts)
            workspace.RunNetOnce(init_def)

        net_def = caffe2_pb2.NetDef()
        with open(net_path, 'r') as f:
            net_def.ParseFromString(f.read())
            net_def.device_option.CopyFrom(device_opts)
            for op in net_def.op:
                op.engine = 'CUDNN'
            workspace.CreateNet(net_def)
    elif args.network.startswith('resnet') or args.network == 'alexnet':
        if args.network.startswith('resnet'):
            model = CNNModelHelper(order='NCHW',
                                   name=args.network,
                                   use_cudnn=True,
                                   cudnn_exhaustive_search=True)
            num_layers = int(args.network[6:])
            softmax = create_resnet(model,
                                    'data',
                                    num_layers=num_layers,
                                    num_input_channels=3,
                                    num_labels=1000,
                                    label=None,
                                    no_bias=True,
                                    no_loss=True)
        elif args.network == 'alexnet':
            model = CNNModelHelper(
                order='NCHW',
                name=args.network,
                #  use_cudnn=True,
コード例 #18
0
ファイル: rnn_cell_test.py プロジェクト: zbxzc35/caffe2
    def test_multi_lstm(
        self,
        input_length,
        dim_in,
        max_num_units,
        num_layers,
        batch_size,
        gc,
        dc,
    ):
        model = CNNModelHelper(name='external')
        with core.DeviceScope(gc):
            (
                input_sequence,
                seq_lengths,
            ) = model.net.AddExternalInputs(
                'input_sequence',
                'seq_lengths',
            )
            dim_out = [
                np.random.randint(1, max_num_units + 1)
                for _ in range(num_layers)
            ]
            h_all, h_last, c_all, c_last = rnn_cell.LSTM(
                model=model,
                input_blob=input_sequence,
                seq_lengths=seq_lengths,
                initial_states=None,
                dim_in=dim_in,
                dim_out=dim_out,
                scope='test',
                outputs_with_grads=(0, ),
                return_params=False,
                memory_optimization=False,
                forget_bias=0.0,
                forward_only=False,
                return_last_layer_only=True,
            )

        workspace.RunNetOnce(model.param_init_net)

        seq_lengths_val = np.random.randint(
            1,
            input_length + 1,
            size=(batch_size),
        ).astype(np.int32)
        input_sequence_val = np.random.randn(
            input_length,
            batch_size,
            dim_in,
        ).astype(np.float32)
        workspace.FeedBlob(seq_lengths, seq_lengths_val)
        workspace.FeedBlob(input_sequence, input_sequence_val)

        hidden_input_list = []
        cell_input_list = []
        i2h_w_list = []
        i2h_b_list = []
        gates_w_list = []
        gates_b_list = []

        for i in range(num_layers):
            hidden_input_list.append(
                workspace.FetchBlob(
                    'test/initial_hidden_state_{}'.format(i)), )
            cell_input_list.append(
                workspace.FetchBlob('test/initial_cell_state_{}'.format(i)), )
            i2h_w_list.append(
                workspace.FetchBlob('test/layer_{}/i2h_w'.format(i)), )
            i2h_b_list.append(
                workspace.FetchBlob('test/layer_{}/i2h_b'.format(i)), )
            gates_w_list.append(
                workspace.FetchBlob('test/layer_{}/gates_t_w'.format(i)), )
            gates_b_list.append(
                workspace.FetchBlob('test/layer_{}/gates_t_b'.format(i)), )

        workspace.RunNetOnce(model.net)
        h_all_calc = workspace.FetchBlob(h_all)
        h_last_calc = workspace.FetchBlob(h_last)
        c_all_calc = workspace.FetchBlob(c_all)
        c_last_calc = workspace.FetchBlob(c_last)

        h_all_ref, h_last_ref, c_all_ref, c_last_ref = multi_lstm_reference(
            input_sequence_val,
            hidden_input_list,
            cell_input_list,
            i2h_w_list,
            i2h_b_list,
            gates_w_list,
            gates_b_list,
            seq_lengths_val,
            forget_bias=0.0,
        )

        h_all_delta = np.abs(h_all_ref - h_all_calc).sum()
        h_last_delta = np.abs(h_last_ref - h_last_calc).sum()
        c_all_delta = np.abs(c_all_ref - c_all_calc).sum()
        c_last_delta = np.abs(c_last_ref - c_last_calc).sum()

        self.assertAlmostEqual(h_all_delta, 0.0, places=5)
        self.assertAlmostEqual(h_last_delta, 0.0, places=5)
        self.assertAlmostEqual(c_all_delta, 0.0, places=5)
        self.assertAlmostEqual(c_last_delta, 0.0, places=5)

        input_values = {
            'input_sequence': input_sequence_val,
            'seq_lengths': seq_lengths_val,
        }
        for param in model.GetParams():
            value = workspace.FetchBlob(param)
            input_values[str(param)] = value

        output_sum = model.net.SumElements(
            [h_all],
            'output_sum',
            average=True,
        )
        fake_loss = model.net.Tanh(output_sum, )
        for param in model.GetParams():
            gradient_checker.NetGradientChecker.Check(
                model.net,
                outputs_with_grad=[fake_loss],
                input_values=input_values,
                input_to_check=str(param),
                print_net=False,
                step_size=0.0001,
                threshold=0.05,
            )
コード例 #19
0
ファイル: conv_test.py プロジェクト: GeekLiB/caffe2-master
    def test_convolution_sync(self, net_type, num_workers, do, engine):
        from caffe2.python.cnn import CNNModelHelper
        m = CNNModelHelper()
        n = 1
        d = 2
        depth = 3
        iters = 5
        h = 5
        w = 5
        workspace.ResetWorkspace()

        np.random.seed(1701)
        # Build a binary tree of conv layers, summing at each node.
        for i in reversed(range(depth)):
            for j in range(2**i):
                bottom_1 = "{}_{}".format(i + 1, 2 * j)
                bottom_2 = "{}_{}".format(i + 1, 2 * j + 1)
                mid_1 = "{}_{}_m".format(i + 1, 2 * j)
                mid_2 = "{}_{}_m".format(i + 1, 2 * j + 1)
                top = "{}_{}".format(i, j)
                w1, b1, w2, b2 = np.random.randn(4).tolist()
                m.Conv(bottom_1,
                       mid_1,
                       dim_in=d,
                       dim_out=d,
                       kernel=3,
                       weight_init=m.ConstantInit(w1),
                       bias_init=m.ConstantInit(b1),
                       cudnn_state=np.random.randint(0, 3),
                       stride=1,
                       pad=1,
                       deterministic=1,
                       engine=engine)
                m.Conv(bottom_2,
                       mid_2,
                       dim_in=d,
                       dim_out=d,
                       kernel=3,
                       stride=1,
                       pad=1,
                       weight_init=m.ConstantInit(w2),
                       bias_init=m.ConstantInit(b2),
                       deterministic=1,
                       cudnn_state=np.random.randint(0, 3),
                       engine=engine)
                m.net.Sum([mid_1, mid_2], top)

        m.net.Flatten(["0_0"], ["0_0_flat"])
        m.net.SquaredL2Distance(["0_0_flat", "label"], "xent")
        m.net.AveragedLoss("xent", "loss")
        input_to_grad = m.AddGradientOperators(["loss"])
        m.Proto().device_option.CopyFrom(do)
        m.param_init_net.Proto().device_option.CopyFrom(do)
        m.Proto().type = net_type
        m.Proto().num_workers = num_workers
        self.ws.run(m.param_init_net)

        def run():
            import numpy as np
            np.random.seed(1701)
            input_blobs = ["{}_{}".format(depth, j) for j in range(2**depth)]
            for input_blob in input_blobs:
                self.ws.create_blob(input_blob).feed(np.random.randn(
                    n, d, h, w).astype(np.float32),
                                                     device_option=do)
                self.ws.create_blob("label").feed(np.random.randn(
                    n, d * h * w).astype(np.float32),
                                                  device_option=do)
            self.ws.run(m.net)
            gradients = [
                self.ws.blobs[str(input_to_grad[input_blob])].fetch()
                for input_blob in input_blobs
            ]
            return gradients

        outputs = [run() for _ in range(iters)]
        for output in outputs[1:]:
            np.testing.assert_array_equal(outputs[0], output)
            np.testing.assert_allclose(np.sum(np.square(output)),
                                       1763719461732352.0,
                                       rtol=1e-5)
コード例 #20
0
    def test_lstm_with_recurrent_attention(
        self,
        encoder_output_length,
        encoder_output_dim,
        decoder_input_length,
        decoder_state_dim,
        batch_size,
        gc,
        dc,
    ):
        with core.DeviceScope(gc):
            model = CNNModelHelper(name="external")
            (
                encoder_outputs,
                decoder_inputs,
                decoder_input_lengths,
                initial_decoder_hidden_state,
                initial_decoder_cell_state,
                initial_attention_weighted_encoder_context,
            ) = model.net.AddExternalInputs(
                "encoder_outputs",
                "decoder_inputs",
                "decoder_input_lengths",
                "initial_decoder_hidden_state",
                "initial_decoder_cell_state",
                "initial_attention_weighted_encoder_context",
            )
            recurrent.LSTMWithAttention(
                model=model,
                decoder_inputs=decoder_inputs,
                decoder_input_lengths=decoder_input_lengths,
                initial_decoder_hidden_state=initial_decoder_hidden_state,
                initial_decoder_cell_state=initial_decoder_cell_state,
                initial_attention_weighted_encoder_context=(
                    initial_attention_weighted_encoder_context),
                encoder_output_dim=encoder_output_dim,
                encoder_outputs=encoder_outputs,
                decoder_input_dim=decoder_state_dim,
                decoder_state_dim=decoder_state_dim,
                scope='external/LSTMWithAttention',
                attention_type=AttentionType.Recurrent)
            op = model.net._net.op[-1]
        workspace.RunNetOnce(model.param_init_net)

        # This is original decoder_inputs after linear layer
        decoder_input_blob = op.input[0]

        workspace.FeedBlob(
            decoder_input_blob,
            np.random.randn(
                decoder_input_length,
                batch_size,
                decoder_state_dim * 4,
            ).astype(np.float32))
        workspace.FeedBlob(
            "external/LSTMWithAttention/encoder_outputs_transposed",
            np.random.randn(
                batch_size,
                encoder_output_dim,
                encoder_output_length,
            ).astype(np.float32),
        )
        workspace.FeedBlob(
            "external/LSTMWithAttention/weighted_encoder_outputs",
            np.random.randn(
                encoder_output_length,
                batch_size,
                encoder_output_dim,
            ).astype(np.float32),
        )
        workspace.FeedBlob(
            decoder_input_lengths,
            np.random.randint(0, decoder_input_length + 1,
                              size=(batch_size, )).astype(np.int32))
        workspace.FeedBlob(
            initial_decoder_hidden_state,
            np.random.randn(1, batch_size,
                            decoder_state_dim).astype(np.float32))
        workspace.FeedBlob(
            initial_decoder_cell_state,
            np.random.randn(1, batch_size,
                            decoder_state_dim).astype(np.float32))
        workspace.FeedBlob(
            initial_attention_weighted_encoder_context,
            np.random.randn(1, batch_size,
                            encoder_output_dim).astype(np.float32))
        inputs = [workspace.FetchBlob(name) for name in op.input]

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            reference=lstm_with_recurrent_attention_reference,
            grad_reference=None,
            output_to_grad=None,
            outputs_to_check=range(6),
        )
        gradients_to_check = [
            index for (index, input_name) in enumerate(op.input)
            if input_name != "decoder_input_lengths"
        ]
        for param in gradients_to_check:
            self.assertGradientChecks(
                device_option=gc,
                op=op,
                inputs=inputs,
                outputs_to_check=param,
                outputs_with_grads=[0, 4],
                threshold=0.01,
                stepsize=0.001,
            )
コード例 #21
0
    def test_mul_rnn(self, T, n, d):
        model = CNNModelHelper(name='external')

        one_blob = model.param_init_net.ConstantFill(
            [], value=1.0, shape=[1, n, d])
        input_blob = model.net.AddExternalInput('input')

        step = ModelHelperBase(name='step', param_model=model)
        input_t, output_t_prev = step.net.AddExternalInput(
            'input_t', 'output_t_prev')
        output_t = step.net.Mul([input_t, output_t_prev])
        step.net.AddExternalOutput(output_t)

        recurrent.recurrent_net(
            net=model.net,
            cell_net=step.net,
            inputs=[(input_t, input_blob)],
            initial_cell_inputs=[(output_t_prev, one_blob)],
            links={output_t_prev: output_t},
            scope="test_mul_rnn",
        )

        workspace.FeedBlob(
            str(input_blob), np.random.randn(T, n, d).astype(np.float32))
        workspace.RunNetOnce(model.param_init_net)

        op = model.net._net.op[-1]

        def reference(input, initial_input):
            recurrent_input = initial_input
            result = np.zeros(shape=input.shape)

            for t_cur in range(T):
                recurrent_input = recurrent_input * input[t_cur]
                result[t_cur] = recurrent_input

            shape = list(input.shape)
            shape[0] = 1
            return (result, result[-1].reshape(shape))

        def grad_reference(output_grad, ref_output, inputs):
            input = inputs[0]
            output = ref_output[0]
            initial_input = inputs[1]
            input_grad = np.zeros(shape=input.shape)
            right_grad = 0

            for t_cur in range(T - 1, -1, -1):
                prev_output = output[t_cur - 1] if t_cur > 0 else initial_input
                input_grad[t_cur] = (output_grad[t_cur] +
                                     right_grad) * prev_output
                right_grad = input[t_cur] * (output_grad[t_cur] + right_grad)
            return (input_grad, right_grad.reshape([1, n, d]))

        self.assertReferenceChecks(
            device_option=hu.cpu_do,
            op=op,
            inputs=[
                workspace.FetchBlob(name)
                for name in [input_blob, one_blob]
            ],
            reference=reference,
            grad_reference=grad_reference,
            output_to_grad=op.output[0],
            outputs_to_check=[0, 1],
        )
コード例 #22
0
    def test_stateful_convolution_forward_only(
        self,
        sequence_length,
        conv_window,
        batch_size,
        state_size,
    ):
        '''
        This unit test demonstrates another ways of using RecurrentNetwork.

        Imagine, that you want to compute convolution over a sequence,
        but sequence elements are not given to you from the beginning,
        so you have to loop over the sequence and compute convolution
        for each element separately. This situation can occur,
        during inference/generation step of the neural networks.

        First of all, you have to provide actual input via recurrent states,
        since the input of RecurrentNetwork should be known in advance.
        Here, we use `fake_inputs` as the input,
        and it's used by the op to extract batch size and sequence length.
        The actual input sequence is stored in the recurrent state
        `input_state`. At every step we generate a new element via input_state_t
        (in this example, input_state_t is generated at random, but
        in a real situation it can be created using convolution output
        from the previous step).

        A few important differences from regular RecurrentNetwork usecase:

        1. input_state_t_prev is not only a single previous element of
        input_state sequence. It is last conv_window elements including (!)
        the current one - input_state_t. We specify that using `link_window`
        argument of RecurrentNetwork. We need that many elements to
        compute a single convolution step. Also, note that `link_window`
        specifies how many element to link starting at
        `timestep` + `link_offset` position.

        2. First few steps might require additional zero padding from the left,
        since there is no enough element of input_state sequence are available.
        So the initial_state for input_state contains several elements
        (exactly how many pads we need for the first step). Also, because of
        that all offseting over input_state sequnece is being shifted
        by length of initial_input_state: see `link_offset` and `alias_offset`
        arguments of RecurrentNetwork.

        In this test, we assert that we get the same result
        if we apply convolution over all elements simultaneously,
        since the whole input_state sequence was generated at the end.
    '''
        model = CNNModelHelper(name='model')
        fake_inputs = model.param_init_net.UniformFill(
            [],
            'fake_inputs',
            min=-1.0,
            max=1.0,
            shape=[sequence_length, batch_size, state_size],
        )
        initial_input_state = model.param_init_net.ConstantFill(
            [],
            'initial_input_state',
            value=0.0,
            shape=[conv_window - 1, batch_size, state_size],
        )
        initial_output_state = model.param_init_net.ConstantFill(
            [],
            'initial_output_state',
            value=0.0,
            shape=[1, batch_size, state_size],
        )
        step_model = CNNModelHelper(name='step_model', param_model=model)
        (
            fake_input_t,
            timestep,
            input_state_t_prev,
        ) = step_model.net.AddExternalInputs(
            'fake_input_t',
            'timestep',
            'input_state_t_prev',
        )
        conv_filter = step_model.param_init_net.XavierFill(
            [],
            'conv_filter',
            shape=[state_size, 1, conv_window, state_size],
        )
        conv_bias = step_model.param_init_net.ConstantFill(
            [],
            'conv_bias',
            shape=[state_size],
            value=0.0,
        )
        step_model.params.extend([conv_filter, conv_bias])
        input_state_t = step_model.net.UniformFill(
            [],
            'input_state_t',
            min=-1.0,
            max=1.0,
            shape=[1, batch_size, state_size],
        )
        output_state_t = self._convolution_1d(
            model=step_model,
            inputs=input_state_t_prev,
            conv_window=conv_window,
            conv_filter=conv_filter,
            conv_bias=conv_bias,
            output_name='output_state_t',
            left_pad=False,
        )
        initial_recurrent_states = [initial_input_state, initial_output_state]
        all_inputs = ([fake_inputs] + step_model.params +
                      initial_recurrent_states)
        all_outputs = ['input_state_all', 'output_state_all']
        recurrent_states = ['input_state', 'output_state']
        input_state_all, output_state_all, _ = model.net.RecurrentNetwork(
            all_inputs,
            all_outputs + ['step_workspaces'],
            param=map(all_inputs.index, step_model.params),
            alias_src=recurrent_states,
            alias_dst=all_outputs,
            alias_offset=[conv_window - 1, 1],
            recurrent_states=recurrent_states,
            initial_recurrent_state_ids=map(
                all_inputs.index,
                initial_recurrent_states,
            ),
            link_internal=map(
                str,
                [input_state_t_prev, input_state_t, output_state_t],
            ),
            link_external=['input_state', 'input_state', 'output_state'],
            link_offset=[0, conv_window - 1, 1],
            link_window=[conv_window, 1, 1],
            backward_link_internal=[],
            backward_link_external=[],
            backward_link_offset=[],
            step_net=str(step_model.net.Proto()),
            backward_step_net='',
            timestep='timestep' if timestep is None else str(timestep),
            outputs_with_grads=[],
        )

        output_states_2 = self._convolution_1d(
            model=model,
            inputs=input_state_all,
            conv_window=conv_window,
            conv_filter=conv_filter,
            conv_bias=conv_bias,
            output_name='output_states_2',
            left_pad=True,
        )

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)

        np.testing.assert_almost_equal(
            workspace.FetchBlob(output_state_all),
            workspace.FetchBlob(output_states_2),
            decimal=3,
        )
コード例 #23
0
ファイル: rnn_cell_test.py プロジェクト: mfalconi/caffe2
    def lstm_base(self, lstm_type, outputs_with_grads, memory_optim,
                  input_tensor, forget_bias, fwd_only, drop_states):
        print("LSTM test parameters: ", locals())
        create_lstm, ref = lstm_type
        t, n, d = input_tensor.shape
        assert d % 4 == 0
        d = d // 4
        print("Dims: ", t, n, d)
        ref = partial(ref, forget_bias=forget_bias, drop_states=drop_states)

        model = CNNModelHelper(name='external')
        input_blob, seq_lengths, hidden_init, cell_init = (
            model.net.AddExternalInputs(
                'input_blob', 'seq_lengths', 'hidden_init', 'cell_init'))

        create_lstm(
            model, input_blob, seq_lengths, (hidden_init, cell_init),
            d, d, scope="external/recurrent",
            outputs_with_grads=outputs_with_grads,
            memory_optimization=memory_optim,
            forget_bias=forget_bias,
            forward_only=fwd_only,
            drop_states=drop_states,
        )

        op = model.net._net.op[-1]

        workspace.RunNetOnce(model.param_init_net)
        input_blob = op.input[0]

        def generate_random_state(n, d):
            ndim = int(np.random.choice(3, 1)) + 1
            if ndim == 1:
                return np.random.randn(1, n, d).astype(np.float32)
            random_state = np.random.randn(n, d).astype(np.float32)
            if ndim == 3:
                random_state = random_state.reshape([1, n, d])
            return random_state

        workspace.FeedBlob(
            str(input_blob), np.random.randn(t, n, d * 4).astype(np.float32))
        workspace.FeedBlob("hidden_init", generate_random_state(n, d))
        workspace.FeedBlob("cell_init", generate_random_state(n, d))
        workspace.FeedBlob(
            "seq_lengths",
            np.random.randint(1, t + 1, size=(n,)).astype(np.int32)
        )
        inputs = [workspace.FetchBlob(name) for name in op.input]

        self.assertReferenceChecks(
            hu.cpu_do,
            op,
            inputs,
            ref,
            outputs_to_check=range(4),
        )

        # Checking for input, gates_t_w and gates_t_b gradients
        if not fwd_only:
            for param in range(5):
                self.assertGradientChecks(
                    device_option=hu.cpu_do,
                    op=op,
                    inputs=inputs,
                    outputs_to_check=param,
                    outputs_with_grads=outputs_with_grads,
                    threshold=0.01,
                    stepsize=0.005,
                )