def test_attention_recurrent(): rng = numpy.random.RandomState(1234) dim = 5 batch_size = 4 input_length = 20 attended_dim = 10 attended_length = 15 wrapped = SimpleRecurrent(dim, Identity()) attention = SequenceContentAttention(state_names=wrapped.apply.states, attended_dim=attended_dim, match_dim=attended_dim) recurrent = AttentionRecurrent(wrapped, attention, seed=1234) recurrent.weights_init = IsotropicGaussian(0.5) recurrent.biases_init = Constant(0) recurrent.initialize() attended = tensor.tensor3("attended") attended_mask = tensor.matrix("attended_mask") inputs = tensor.tensor3("inputs") inputs_mask = tensor.matrix("inputs_mask") outputs = recurrent.apply(inputs=inputs, mask=inputs_mask, attended=attended, attended_mask=attended_mask) states, glimpses, weights = outputs assert states.ndim == 3 assert glimpses.ndim == 3 assert weights.ndim == 3 # For values. def rand(size): return rng.uniform(size=size).astype(floatX) # For masks. def generate_mask(length, batch_size): mask = numpy.ones((length, batch_size), dtype=floatX) # To make it look like read data for i in range(batch_size): mask[1 + rng.randint(0, length - 1):, i] = 0.0 return mask input_vals = rand((input_length, batch_size, dim)) input_mask_vals = generate_mask(input_length, batch_size) attended_vals = rand((attended_length, batch_size, attended_dim)) attended_mask_vals = generate_mask(attended_length, batch_size) func = theano.function([inputs, inputs_mask, attended, attended_mask], [states, glimpses, weights]) states_vals, glimpses_vals, weight_vals = func(input_vals, input_mask_vals, attended_vals, attended_mask_vals) assert states_vals.shape == (input_length, batch_size, dim) assert glimpses_vals.shape == (input_length, batch_size, attended_dim) assert (len(ComputationGraph(outputs).shared_variables) == len( Selector(recurrent).get_params())) # weights for not masked position must be zero assert numpy.all(weight_vals * (1 - attended_mask_vals.T) == 0) # weights for masked positions must be non-zero assert numpy.all(abs(weight_vals + (1 - attended_mask_vals.T)) > 1e-5) # weights from different steps should be noticeably different assert (abs(weight_vals[0] - weight_vals[1])).sum() > 1e-2 # weights for all state after the last masked position should be same for i in range(batch_size): last = int(input_mask_vals[:, i].sum()) for j in range(last, input_length): assert_allclose(weight_vals[last, i], weight_vals[j, i]) # freeze sums assert_allclose(weight_vals.sum(), input_length * batch_size, 1e-5) assert_allclose(states_vals.sum(), 113.429, rtol=1e-5) assert_allclose(glimpses_vals.sum(), 415.901, rtol=1e-5)
def test_with_attention(): inp_dim = 2 inp_len = 10 attended_dim = 3 attended_len = 11 batch_size = 4 n_steps = 30 transition = TestTransition( dim=inp_dim, attended_dim=attended_dim, activation=Identity()) attention = SequenceContentAttention( transition.apply.states, match_dim=inp_dim, name="attention") att_trans = AttentionRecurrent( transition, attention, add_contexts=False) att_trans.weights_init = IsotropicGaussian(0.01) att_trans.biases_init = Constant(0) att_trans.initialize() attended = tensor.tensor3("attended") attended_mask = tensor.matrix("attended_mask") inputs = tensor.tensor3("inputs") inputs_mask = tensor.matrix("inputs_mask") states, glimpses, weights = att_trans.apply( inputs=inputs, mask=inputs_mask, attended=attended, attended_mask=attended_mask) assert states.ndim == 3 assert glimpses.ndim == 3 assert weights.ndim == 3 input_vals = numpy.zeros((inp_len, batch_size, inp_dim), dtype=floatX) input_mask_vals = numpy.ones((inp_len, batch_size), dtype=floatX) attended_vals = numpy.zeros((attended_len, batch_size, attended_dim), dtype=floatX) attended_mask_vals = numpy.ones((attended_len, batch_size), dtype=floatX) func = theano.function([inputs, inputs_mask, attended, attended_mask], [states, glimpses, weights]) states_vals, glimpses_vals, weight_vals = func( input_vals, input_mask_vals, attended_vals, attended_mask_vals) assert states_vals.shape == input_vals.shape assert glimpses_vals.shape == (inp_len, batch_size, attended_dim) assert weight_vals.shape == (inp_len, batch_size, attended_len) # Test SequenceGenerator using AttentionTransition generator = SequenceGenerator( LinearReadout(readout_dim=inp_dim, source_names=["states"], emitter=TestEmitter(name="emitter"), name="readout"), transition=transition, attention=attention, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), add_contexts=False, name="generator") outputs = tensor.tensor3('outputs') costs = generator.cost(outputs, attended=attended, attended_mask=attended_mask) costs_vals = costs.eval({outputs: input_vals, attended: attended_vals, attended_mask: attended_mask_vals}) assert costs_vals.shape == (inp_len, batch_size) results = ( generator.generate(n_steps=n_steps, batch_size=attended.shape[1], attended=attended, attended_mask=attended_mask)) assert len(results) == 5 states_vals, outputs_vals, glimpses_vals, weights_vals, costs_vals = ( theano.function([attended, attended_mask], results) (attended_vals, attended_mask_vals)) assert states_vals.shape == (n_steps, batch_size, inp_dim) assert states_vals.shape == outputs_vals.shape assert glimpses_vals.shape == (n_steps, batch_size, attended_dim) assert weights_vals.shape == (n_steps, batch_size, attended_len) assert costs_vals.shape == (n_steps, batch_size)
def test_attention_recurrent(): rng = numpy.random.RandomState(1234) dim = 5 batch_size = 4 input_length = 20 attended_dim = 10 attended_length = 15 wrapped = SimpleRecurrent(dim, Identity()) attention = SequenceContentAttention( state_names=wrapped.apply.states, attended_dim=attended_dim, match_dim=attended_dim) recurrent = AttentionRecurrent(wrapped, attention, seed=1234) recurrent.weights_init = IsotropicGaussian(0.5) recurrent.biases_init = Constant(0) recurrent.initialize() attended = tensor.tensor3("attended") attended_mask = tensor.matrix("attended_mask") inputs = tensor.tensor3("inputs") inputs_mask = tensor.matrix("inputs_mask") outputs = recurrent.apply( inputs=inputs, mask=inputs_mask, attended=attended, attended_mask=attended_mask) states, glimpses, weights = outputs assert states.ndim == 3 assert glimpses.ndim == 3 assert weights.ndim == 3 # For values. def rand(size): return rng.uniform(size=size).astype(theano.config.floatX) # For masks. def generate_mask(length, batch_size): mask = numpy.ones((length, batch_size), dtype=theano.config.floatX) # To make it look like read data for i in range(batch_size): mask[1 + rng.randint(0, length - 1):, i] = 0.0 return mask input_vals = rand((input_length, batch_size, dim)) input_mask_vals = generate_mask(input_length, batch_size) attended_vals = rand((attended_length, batch_size, attended_dim)) attended_mask_vals = generate_mask(attended_length, batch_size) func = theano.function([inputs, inputs_mask, attended, attended_mask], [states, glimpses, weights]) states_vals, glimpses_vals, weight_vals = func( input_vals, input_mask_vals, attended_vals, attended_mask_vals) assert states_vals.shape == (input_length, batch_size, dim) assert glimpses_vals.shape == (input_length, batch_size, attended_dim) assert (len(ComputationGraph(outputs).shared_variables) == len(Selector(recurrent).get_params())) # weights for not masked position must be zero assert numpy.all(weight_vals * (1 - attended_mask_vals.T) == 0) # weights for masked positions must be non-zero assert numpy.all(abs(weight_vals + (1 - attended_mask_vals.T)) > 1e-5) # weights from different steps should be noticeably different assert (abs(weight_vals[0] - weight_vals[1])).sum() > 1e-2 # weights for all state after the last masked position should be same for i in range(batch_size): last = int(input_mask_vals[:, i].sum()) for j in range(last, input_length): assert_allclose(weight_vals[last, i], weight_vals[j, i], 1e-5) # freeze sums assert_allclose(weight_vals.sum(), input_length * batch_size, 1e-5) assert_allclose(states_vals.sum(), 113.429, rtol=1e-5) assert_allclose(glimpses_vals.sum(), 415.901, rtol=1e-5)
attended_tr.shape = (seq_length, batch_size, features) from theano import tensor, function from blocks.bricks.attention import AttentionRecurrent attended = tensor.tensor3('attended') ssa = SimpleSequenceAttention(['states'],[3],3) ar = AttentionRecurrent( transition = transition, attention = ssa, ) ar.weights_init = initialization.Constant(0.) ar.biases_init = initialization.Constant(1.) ar.initialize() inputs = tensor.tensor3('inputs') #ar.apply(attended = attended_tv, n_steps = n_steps, batch_size = 2) states, glimpses, step = ar.initial_states(1, attended = attended) glimpses, step =ar.take_glimpses(attended = attended, states = states, glimpses = glimpses, step = step) states =ar.compute_states(inputs = inputs, attended = attended, states = states, glimpses = glimpses, step = step) distributed = ar.distribute.apply(inputs = inputs, glimpses = glimpses) states = ar.compute_states(states = states, inputs = inputs[0], glimpses = glimpses, step = step, attended = attended) batch_size = 2 features = 3 #input_tr = numpy.zeros((seq_length, batch_size, features)).astype('float32') input_tr = generated_sequence_t[1:]