Beispiel #1
0
def test_recurrence_substituted():
    """test whether it is possible to use intermediate layers as recurrence inputs"""
    sequence = InputLayer((None, None, 3), name='input sequence')
    sequence_intermediate = InputLayer((None, None, 5),
                                       name='intermediate values sequence')
    initial = InputLayer((None, 10), name='gru zero tick')

    # step
    inp = InputLayer((None, 3), name='input')
    intermediate = DenseLayer(inp, 5, name='intermediate')
    prev_gru = InputLayer((None, 10), name='prev rnn')
    gru = GRUCell(prev_gru, intermediate, name='rnn')

    #regular recurrence, provide inputs, intermediate is computed regularly
    rec = agentnet.Recurrence(
        input_sequences={inp: sequence},
        state_variables={gru: prev_gru},
        state_init={gru: initial},  # defaults to zeros
        unroll_scan=False)

    weights = get_all_params(rec)
    assert intermediate.b in weights

    gru_states = rec[gru]

    run = theano.function(
        [sequence.input_var, initial.input_var],
        get_output(gru_states),
    )

    assert tuple(run(np.random.randn(5, 25, 3),
                     np.random.randn(5, 10)).shape) == (5, 25, 10)

    #recurrence with substituted intermediate values
    rec2 = agentnet.Recurrence(
        input_sequences={intermediate: sequence_intermediate},
        state_variables={gru: prev_gru},
        state_init={gru: initial},  # defaults to zeros
        unroll_scan=False)

    weights2 = get_all_params(rec2)
    assert intermediate.b not in weights2

    gru_states2 = rec2[gru]

    run = theano.function(
        [sequence_intermediate.input_var, initial.input_var],
        get_output(gru_states2),
    )

    assert tuple(run(np.random.randn(5, 25, 5),
                     np.random.randn(5, 10)).shape) == (5, 25, 10)
Beispiel #2
0
def test_recurrence_mask():
    """test mask_input"""
    np.random.seed(1337)

    sequence = InputLayer((None, None, 2), name='input sequence')
    mask = InputLayer((None, None), name="rnn mask [batch,tick]")

    # step
    inp = InputLayer((None, 2))
    prev_rnn = InputLayer((None, 3))
    rnn = RNNCell(prev_rnn, inp, name='rnn',
                  nonlinearity=lasagne.nonlinearities.linear,
                  b=lasagne.init.Constant(100.0))  # init with positive constant to make sure hiddens change

    rec = agentnet.Recurrence(input_sequences={inp: sequence},
                              state_variables={rnn: prev_rnn},
                              unroll_scan=False,
                              mask_input=mask)

    rnn_states = rec[rnn]
    run = theano.function([sequence.input_var, mask.input_var], get_output(rnn_states))

    seq = np.random.randn(4, 5, 2)
    mask = np.zeros([4, 5])
    mask[:2, :3] = 1
    mask[2:, 2:] = 1
    out = run(seq, mask)

    assert tuple(out.shape) == (4, 5, 3)

    diff_out = np.diff(out, axis=1)
    assert np.all(np.diff(out, axis=1)[:2, 2:] == 0)
    assert np.all(np.diff(out, axis=1)[:2, :2] != 0)
    assert np.all(np.diff(out, axis=1)[2:, 1:] != 0)
    assert np.all(np.diff(out, axis=1)[2:, :1] == 0)
Beispiel #3
0
def test_recurrence():
    """minimalstic test"""
    sequence = InputLayer((None, None, 3), name='input sequence')
    initial = InputLayer((None, 10), name='gru zero tick')

    # step
    inp = InputLayer((None, 3))
    prev_gru = InputLayer((None, 10))
    gru = GRUCell(prev_gru, inp, name='rnn')

    rec = agentnet.Recurrence(
        input_sequences={inp: sequence},
        state_variables={gru: prev_gru},
        state_init={gru: initial},  # defaults to zeros
        unroll_scan=False)

    weights = get_all_params(rec)

    gru_states = rec[gru]

    run = theano.function(
        [sequence.input_var, initial.input_var],
        get_output(gru_states),
    )

    assert tuple(run(np.random.randn(5, 25, 3),
                     np.random.randn(5, 10)).shape) == (5, 25, 10)
Beispiel #4
0
def test_attention_2d():
    """
    Almost a copy-paste of previous test, but this time attention is applied to an image instead
    of a 1d sequence.
    """

    # step inner graph
    class step:
        image = InputLayer((None, 3, 24, 24),
                           name='placeholder for 24x24 image (to be attended)')
        prev_gru = InputLayer((None, 15), name='gru prev state (15 units)')

        #get image dimensions
        n_channels, width, height = image.output_shape[1:]

        #flatten all image spots to look like 1d sequence
        image_chunks = reshape(dimshuffle(image, [0, 2, 3, 1]),
                               (-1, width * height, n_channels))

        attention = AttentionLayer(image_chunks, prev_gru, num_units=16)

        gru = GRUCell(prev_gru,
                      attention['attn'],
                      name='rnn that reads enc_sequence with attention')

        #weights from inside attention - reshape back into image
        attn_probs = reshape(attention['probs'], (-1, width, height))

    # outer graph

    input_image = InputLayer(
        (None, 3, 24, 24),
        name='24x24-pixel RGB image to be sent into step.image')

    rec = agentnet.Recurrence(input_nonsequences={step.image: input_image},
                              state_variables={step.gru: step.prev_gru},
                              tracked_outputs=[step.attn_probs],
                              unroll_scan=False,
                              n_steps=10)

    weights = get_all_params(rec)

    gru_states, attention_probs_seq = rec[step.gru, step.attn_probs]

    run = theano.function([input_image.input_var],
                          get_output([gru_states, attention_probs_seq]),
                          updates=rec.get_automatic_updates(),
                          allow_input_downcast=True)

    #run on surrogate data
    gru_seq, probs_seq = run(np.random.randn(5, 3, 24, 24))

    assert gru_seq.shape == (5, 10, 15
                             )  #hidden GRU strates, 5 samples/10ticks/15units
    assert probs_seq.shape == (
        5, 10, 24, 24
    )  #attention sequences, 5 samples/10ticks/24width/24height
Beispiel #5
0
def test_recurrence_larger():
    """larger recurrence"""
    sequence = InputLayer((None, None, 3), name='input sequence')
    initial_cell = InputLayer((None, 20), name='lstm cell zero tick')

    # step
    inp = InputLayer((None, 3))
    prev_rnn = InputLayer((None, 10))
    rnn = RNNCell(prev_rnn, inp, name='rnn')

    prev_lstm_cell = InputLayer((None, 20))  #lstm cell
    prev_lstm_hid = InputLayer((None, 20))  #lstm output
    lstm_cell, lstm_hid = LSTMCell(prev_lstm_cell,
                                   prev_lstm_hid,
                                   input_or_inputs=rnn)

    lstm_hid = DropoutLayer(
        lstm_hid, p=0.5)  #dropout hid, but not cell. Just to check it works

    from collections import OrderedDict  #one can use regular dict but that causes a warning

    rec = agentnet.Recurrence(
        input_sequences={inp: sequence},
        state_variables=OrderedDict({
            rnn: prev_rnn,
            lstm_hid: prev_lstm_hid,
            lstm_cell: prev_lstm_cell
        }),
        state_init={lstm_cell: initial_cell},  # defaults to zeros
        unroll_scan=False)

    weights = get_all_params(rec)

    rnn_states = rec[rnn]
    lstm_cell_states = rec[lstm_cell]
    lstm_hid_states = rec[lstm_hid]

    run = theano.function(
        [sequence.input_var, initial_cell.input_var],
        get_output([rnn_states, lstm_cell_states, lstm_hid_states]),
        updates=rec.get_automatic_updates(
        )  #if any randomness is used AND unroll_scan,
        # one has to pass automatic updates
    )

    out = run(np.random.randn(5, 25, 3), np.random.randn(5, 20))

    assert tuple(out[0].shape) == (5, 25, 10)  #rnn
    assert tuple(out[1].shape) == (5, 25, 20)  #lstm cell
    assert tuple(out[2].shape) == (5, 25, 20)  #lstm hid (aka output)
def test_out_batch1():
    """minimalstic test for batch_size=1,
    https://github.com/yandexdataschool/AgentNet/issues/79"""
    # step
    prev_out = InputLayer((None,))
    prev_gru = InputLayer((None, 10))
    gru = GRUCell(prev_gru, EmbeddingLayer(prev_out, 3, 3), name='rnn')
    probs = DenseLayer(gru, 3, nonlinearity=lasagne.nonlinearities.softmax)
    out = EpsilonGreedyResolver(probs)

    batch_size = 1

    rec = agentnet.Recurrence(state_variables={gru: prev_gru,
                                               out: prev_out,},
                              unroll_scan=False,
                              n_steps=5,
                              batch_size=batch_size)

    run = theano.function([], get_output(rec[out]), updates=rec.get_automatic_updates())

    assert tuple(run().shape) == (1, 5)
Beispiel #7
0
def test_multihead_attention():
    """
    minimalstic test that showcases attentive RNN that reads some chunk
    of input sequence on each tick and outputs nothing.

    This time it uses Multihead DotAttention [aka multiplicative attention] instead of regular one.
    """

    # step inner graph
    class step:
        enc_activations = InputLayer(
            (None, None, 12),
            name='placeholder for encoder activations (to be attended)')
        prev_gru = InputLayer((None, 15), name='gru prev state (15 units)')

        keys_seq = DenseLayer(enc_activations,
                              30,
                              num_leading_axes=2,
                              nonlinearity=None)

        attention = multihead_attention(
            enc_activations,
            prev_gru,
            key_sequence=keys_seq,
            num_heads=3,
            use_dense_layer=True,
        )

        gru = GRUCell(prev_gru,
                      attention['attn'],
                      name='rnn that reads enc_sequence with attention')

        attn, attn_probs = attention['attn'], attention[
            'probs']  # weights from inside attention

    # outer graph

    encoder_activations = InputLayer(
        (None, None, 12),
        name='encoder sequence (will be sent to enc_sequence)')

    rec = agentnet.Recurrence(
        input_nonsequences={step.enc_activations: encoder_activations},
        state_variables={step.gru: step.prev_gru},
        tracked_outputs=[step.attn_probs, step.attn],
        unroll_scan=False,
        n_steps=10)

    weights = get_all_params(rec)

    gru_states, attn_heads_seq, attention_probs_seq = rec[step.gru, step.attn,
                                                          step.attn_probs]

    run = theano.function(
        [encoder_activations.input_var],
        get_output([gru_states, attn_heads_seq, attention_probs_seq]),
        updates=rec.get_automatic_updates(),
        allow_input_downcast=True)

    # run on surrogate data
    gru_seq, heads_seq, probs_seq = run(np.random.randn(5, 25, 12))

    assert gru_seq.shape == (5, 10, 15
                             )  # hidden GRU strates, 5 samples/10ticks/15units
    assert probs_seq.shape == (
        5, 10, 3, 25
    )  # attention sequences, 5 samples/10ticks/3heads/25 input seq length
    assert heads_seq.shape == (
        5, 10, 3 * 12)  # attention sequences, 5 samples/10ticks/3heads*30units
    # hard attention
    hard_outputs = get_output([gru_states, attention_probs_seq],
                              recurrence_flags={'hard_attention': True})

    hard_run = theano.function([encoder_activations.input_var],
                               hard_outputs,
                               updates=rec.get_automatic_updates(),
                               allow_input_downcast=True)

    # run on surrogate data
    _, hard_probs_seq = hard_run(np.random.randn(5, 25, 12))

    # check if probs are one-hot
    assert hard_probs_seq.shape == (
        5, 10, 3, 25
    )  # attention sequences, 5 samples/10ticks/3heads/25 input seq length
    assert len(np.unique(hard_probs_seq.ravel())) == 2  # only 0's and 1's