def test_recurrence_substituted(): """test whether it is possible to use intermediate layers as recurrence inputs""" sequence = InputLayer((None, None, 3), name='input sequence') sequence_intermediate = InputLayer((None, None, 5), name='intermediate values sequence') initial = InputLayer((None, 10), name='gru zero tick') # step inp = InputLayer((None, 3), name='input') intermediate = DenseLayer(inp, 5, name='intermediate') prev_gru = InputLayer((None, 10), name='prev rnn') gru = GRUCell(prev_gru, intermediate, name='rnn') #regular recurrence, provide inputs, intermediate is computed regularly rec = agentnet.Recurrence( input_sequences={inp: sequence}, state_variables={gru: prev_gru}, state_init={gru: initial}, # defaults to zeros unroll_scan=False) weights = get_all_params(rec) assert intermediate.b in weights gru_states = rec[gru] run = theano.function( [sequence.input_var, initial.input_var], get_output(gru_states), ) assert tuple(run(np.random.randn(5, 25, 3), np.random.randn(5, 10)).shape) == (5, 25, 10) #recurrence with substituted intermediate values rec2 = agentnet.Recurrence( input_sequences={intermediate: sequence_intermediate}, state_variables={gru: prev_gru}, state_init={gru: initial}, # defaults to zeros unroll_scan=False) weights2 = get_all_params(rec2) assert intermediate.b not in weights2 gru_states2 = rec2[gru] run = theano.function( [sequence_intermediate.input_var, initial.input_var], get_output(gru_states2), ) assert tuple(run(np.random.randn(5, 25, 5), np.random.randn(5, 10)).shape) == (5, 25, 10)
def test_recurrence_mask(): """test mask_input""" np.random.seed(1337) sequence = InputLayer((None, None, 2), name='input sequence') mask = InputLayer((None, None), name="rnn mask [batch,tick]") # step inp = InputLayer((None, 2)) prev_rnn = InputLayer((None, 3)) rnn = RNNCell(prev_rnn, inp, name='rnn', nonlinearity=lasagne.nonlinearities.linear, b=lasagne.init.Constant(100.0)) # init with positive constant to make sure hiddens change rec = agentnet.Recurrence(input_sequences={inp: sequence}, state_variables={rnn: prev_rnn}, unroll_scan=False, mask_input=mask) rnn_states = rec[rnn] run = theano.function([sequence.input_var, mask.input_var], get_output(rnn_states)) seq = np.random.randn(4, 5, 2) mask = np.zeros([4, 5]) mask[:2, :3] = 1 mask[2:, 2:] = 1 out = run(seq, mask) assert tuple(out.shape) == (4, 5, 3) diff_out = np.diff(out, axis=1) assert np.all(np.diff(out, axis=1)[:2, 2:] == 0) assert np.all(np.diff(out, axis=1)[:2, :2] != 0) assert np.all(np.diff(out, axis=1)[2:, 1:] != 0) assert np.all(np.diff(out, axis=1)[2:, :1] == 0)
def test_recurrence(): """minimalstic test""" sequence = InputLayer((None, None, 3), name='input sequence') initial = InputLayer((None, 10), name='gru zero tick') # step inp = InputLayer((None, 3)) prev_gru = InputLayer((None, 10)) gru = GRUCell(prev_gru, inp, name='rnn') rec = agentnet.Recurrence( input_sequences={inp: sequence}, state_variables={gru: prev_gru}, state_init={gru: initial}, # defaults to zeros unroll_scan=False) weights = get_all_params(rec) gru_states = rec[gru] run = theano.function( [sequence.input_var, initial.input_var], get_output(gru_states), ) assert tuple(run(np.random.randn(5, 25, 3), np.random.randn(5, 10)).shape) == (5, 25, 10)
def test_attention_2d(): """ Almost a copy-paste of previous test, but this time attention is applied to an image instead of a 1d sequence. """ # step inner graph class step: image = InputLayer((None, 3, 24, 24), name='placeholder for 24x24 image (to be attended)') prev_gru = InputLayer((None, 15), name='gru prev state (15 units)') #get image dimensions n_channels, width, height = image.output_shape[1:] #flatten all image spots to look like 1d sequence image_chunks = reshape(dimshuffle(image, [0, 2, 3, 1]), (-1, width * height, n_channels)) attention = AttentionLayer(image_chunks, prev_gru, num_units=16) gru = GRUCell(prev_gru, attention['attn'], name='rnn that reads enc_sequence with attention') #weights from inside attention - reshape back into image attn_probs = reshape(attention['probs'], (-1, width, height)) # outer graph input_image = InputLayer( (None, 3, 24, 24), name='24x24-pixel RGB image to be sent into step.image') rec = agentnet.Recurrence(input_nonsequences={step.image: input_image}, state_variables={step.gru: step.prev_gru}, tracked_outputs=[step.attn_probs], unroll_scan=False, n_steps=10) weights = get_all_params(rec) gru_states, attention_probs_seq = rec[step.gru, step.attn_probs] run = theano.function([input_image.input_var], get_output([gru_states, attention_probs_seq]), updates=rec.get_automatic_updates(), allow_input_downcast=True) #run on surrogate data gru_seq, probs_seq = run(np.random.randn(5, 3, 24, 24)) assert gru_seq.shape == (5, 10, 15 ) #hidden GRU strates, 5 samples/10ticks/15units assert probs_seq.shape == ( 5, 10, 24, 24 ) #attention sequences, 5 samples/10ticks/24width/24height
def test_recurrence_larger(): """larger recurrence""" sequence = InputLayer((None, None, 3), name='input sequence') initial_cell = InputLayer((None, 20), name='lstm cell zero tick') # step inp = InputLayer((None, 3)) prev_rnn = InputLayer((None, 10)) rnn = RNNCell(prev_rnn, inp, name='rnn') prev_lstm_cell = InputLayer((None, 20)) #lstm cell prev_lstm_hid = InputLayer((None, 20)) #lstm output lstm_cell, lstm_hid = LSTMCell(prev_lstm_cell, prev_lstm_hid, input_or_inputs=rnn) lstm_hid = DropoutLayer( lstm_hid, p=0.5) #dropout hid, but not cell. Just to check it works from collections import OrderedDict #one can use regular dict but that causes a warning rec = agentnet.Recurrence( input_sequences={inp: sequence}, state_variables=OrderedDict({ rnn: prev_rnn, lstm_hid: prev_lstm_hid, lstm_cell: prev_lstm_cell }), state_init={lstm_cell: initial_cell}, # defaults to zeros unroll_scan=False) weights = get_all_params(rec) rnn_states = rec[rnn] lstm_cell_states = rec[lstm_cell] lstm_hid_states = rec[lstm_hid] run = theano.function( [sequence.input_var, initial_cell.input_var], get_output([rnn_states, lstm_cell_states, lstm_hid_states]), updates=rec.get_automatic_updates( ) #if any randomness is used AND unroll_scan, # one has to pass automatic updates ) out = run(np.random.randn(5, 25, 3), np.random.randn(5, 20)) assert tuple(out[0].shape) == (5, 25, 10) #rnn assert tuple(out[1].shape) == (5, 25, 20) #lstm cell assert tuple(out[2].shape) == (5, 25, 20) #lstm hid (aka output)
def test_out_batch1(): """minimalstic test for batch_size=1, https://github.com/yandexdataschool/AgentNet/issues/79""" # step prev_out = InputLayer((None,)) prev_gru = InputLayer((None, 10)) gru = GRUCell(prev_gru, EmbeddingLayer(prev_out, 3, 3), name='rnn') probs = DenseLayer(gru, 3, nonlinearity=lasagne.nonlinearities.softmax) out = EpsilonGreedyResolver(probs) batch_size = 1 rec = agentnet.Recurrence(state_variables={gru: prev_gru, out: prev_out,}, unroll_scan=False, n_steps=5, batch_size=batch_size) run = theano.function([], get_output(rec[out]), updates=rec.get_automatic_updates()) assert tuple(run().shape) == (1, 5)
def test_multihead_attention(): """ minimalstic test that showcases attentive RNN that reads some chunk of input sequence on each tick and outputs nothing. This time it uses Multihead DotAttention [aka multiplicative attention] instead of regular one. """ # step inner graph class step: enc_activations = InputLayer( (None, None, 12), name='placeholder for encoder activations (to be attended)') prev_gru = InputLayer((None, 15), name='gru prev state (15 units)') keys_seq = DenseLayer(enc_activations, 30, num_leading_axes=2, nonlinearity=None) attention = multihead_attention( enc_activations, prev_gru, key_sequence=keys_seq, num_heads=3, use_dense_layer=True, ) gru = GRUCell(prev_gru, attention['attn'], name='rnn that reads enc_sequence with attention') attn, attn_probs = attention['attn'], attention[ 'probs'] # weights from inside attention # outer graph encoder_activations = InputLayer( (None, None, 12), name='encoder sequence (will be sent to enc_sequence)') rec = agentnet.Recurrence( input_nonsequences={step.enc_activations: encoder_activations}, state_variables={step.gru: step.prev_gru}, tracked_outputs=[step.attn_probs, step.attn], unroll_scan=False, n_steps=10) weights = get_all_params(rec) gru_states, attn_heads_seq, attention_probs_seq = rec[step.gru, step.attn, step.attn_probs] run = theano.function( [encoder_activations.input_var], get_output([gru_states, attn_heads_seq, attention_probs_seq]), updates=rec.get_automatic_updates(), allow_input_downcast=True) # run on surrogate data gru_seq, heads_seq, probs_seq = run(np.random.randn(5, 25, 12)) assert gru_seq.shape == (5, 10, 15 ) # hidden GRU strates, 5 samples/10ticks/15units assert probs_seq.shape == ( 5, 10, 3, 25 ) # attention sequences, 5 samples/10ticks/3heads/25 input seq length assert heads_seq.shape == ( 5, 10, 3 * 12) # attention sequences, 5 samples/10ticks/3heads*30units # hard attention hard_outputs = get_output([gru_states, attention_probs_seq], recurrence_flags={'hard_attention': True}) hard_run = theano.function([encoder_activations.input_var], hard_outputs, updates=rec.get_automatic_updates(), allow_input_downcast=True) # run on surrogate data _, hard_probs_seq = hard_run(np.random.randn(5, 25, 12)) # check if probs are one-hot assert hard_probs_seq.shape == ( 5, 10, 3, 25 ) # attention sequences, 5 samples/10ticks/3heads/25 input seq length assert len(np.unique(hard_probs_seq.ravel())) == 2 # only 0's and 1's