def test_sequence_content_attention(): # Disclaimer: only check dimensions, not values rng = numpy.random.RandomState([2014, 12, 2]) seq_len = 5 batch_size = 6 state_dim = 2 attended_dim = 3 match_dim = 4 attention = SequenceContentAttention(state_names=["states"], state_dims=[state_dim], attended_dim=attended_dim, match_dim=match_dim, weights_init=IsotropicGaussian(0.5), biases_init=Constant(0)) attention.initialize() sequences = tensor.tensor3('sequences') states = tensor.matrix('states') mask = tensor.matrix('mask') glimpses, weights = attention.take_glimpses(sequences, attended_mask=mask, states=states) assert glimpses.ndim == 2 assert weights.ndim == 2 seq_values = numpy.zeros((seq_len, batch_size, attended_dim), dtype=theano.config.floatX) states_values = numpy.zeros((batch_size, state_dim), dtype=theano.config.floatX) mask_values = numpy.zeros((seq_len, batch_size), dtype=theano.config.floatX) # randomly generate a sensible mask for sed_idx in range(batch_size): mask_values[:rng.randint(1, seq_len), sed_idx] = 1 glimpses_values, weight_values = theano.function( [sequences, states, mask], [glimpses, weights])(seq_values, states_values, mask_values) assert glimpses_values.shape == (batch_size, attended_dim) assert weight_values.shape == (batch_size, seq_len) assert numpy.all(weight_values >= 0) assert numpy.all(weight_values <= 1) assert numpy.all(weight_values.sum(axis=1) == 1) assert numpy.all((weight_values.T == 0) == (mask_values == 0))
def test_sequence_content_attention(): # Disclaimer: only check dimensions, not values rng = numpy.random.RandomState([2014, 12, 2]) seq_len = 5 batch_size = 6 state_dim = 2 attended_dim = 3 match_dim = 4 attention = SequenceContentAttention( state_names=["states"], state_dims=[state_dim], attended_dim=attended_dim, match_dim=match_dim, weights_init=IsotropicGaussian(0.5), biases_init=Constant(0)) attention.initialize() sequences = tensor.tensor3('sequences') states = tensor.matrix('states') mask = tensor.matrix('mask') glimpses, weights = attention.take_glimpses( sequences, attended_mask=mask, states=states) assert glimpses.ndim == 2 assert weights.ndim == 2 seq_values = numpy.zeros((seq_len, batch_size, attended_dim), dtype=theano.config.floatX) states_values = numpy.zeros((batch_size, state_dim), dtype=theano.config.floatX) mask_values = numpy.zeros((seq_len, batch_size), dtype=theano.config.floatX) # randomly generate a sensible mask for sed_idx in range(batch_size): mask_values[:rng.randint(1, seq_len), sed_idx] = 1 glimpses_values, weight_values = theano.function( [sequences, states, mask], [glimpses, weights])( seq_values, states_values, mask_values) assert glimpses_values.shape == (batch_size, attended_dim) assert weight_values.shape == (batch_size, seq_len) assert numpy.all(weight_values >= 0) assert numpy.all(weight_values <= 1) assert numpy.all(weight_values.sum(axis=1) == 1) assert numpy.all((weight_values.T == 0) == (mask_values == 0))
def test_compute_weights_with_zero_mask(): state_dim = 2 attended_dim = 3 match_dim = 4 attended_length = 5 batch_size = 6 attention = SequenceContentAttention( state_names=["states"], state_dims=[state_dim], attended_dim=attended_dim, match_dim=match_dim, weights_init=IsotropicGaussian(0.5), biases_init=Constant(0)) attention.initialize() energies = tensor.as_tensor_variable( numpy.random.rand(attended_length, batch_size)) mask = tensor.as_tensor_variable( numpy.zeros((attended_length, batch_size))) weights = attention.compute_weights(energies, mask).eval() assert numpy.all(numpy.isfinite(weights))
def test_compute_weights_with_zero_mask(): state_dim = 2 attended_dim = 3 match_dim = 4 attended_length = 5 batch_size = 6 attention = SequenceContentAttention( state_names=["states"], state_dims=[state_dim], attended_dim=attended_dim, match_dim=match_dim, weights_init=IsotropicGaussian(0.5), biases_init=Constant(0)) attention.initialize() energies = tensor.as_tensor_variable( numpy.random.rand(attended_length, batch_size)) mask = tensor.as_tensor_variable( numpy.zeros((attended_length, batch_size))) weights = attention.compute_weights(energies, mask).eval() assert numpy.all(numpy.isfinite(weights))
def test_stable_attention_weights(): state_dim = 2 attended_dim = 3 match_dim = 4 attended_length = 5 batch_size = 6 attention = SequenceContentAttention(state_names=["states"], state_dims=[state_dim], attended_dim=attended_dim, match_dim=match_dim, weights_init=IsotropicGaussian(0.5), biases_init=Constant(0)) attention.initialize() # Random high energies with mu=800, sigma=50 energies_val = (50. * numpy.random.randn(attended_length, batch_size) + 800).astype(theano.config.floatX) energies = tensor.as_tensor_variable(energies_val) mask = tensor.as_tensor_variable(numpy.ones((attended_length, batch_size))) weights = attention.compute_weights(energies, mask).eval() assert numpy.all(numpy.isfinite(weights))
def test_stable_attention_weights(): state_dim = 2 attended_dim = 3 match_dim = 4 attended_length = 5 batch_size = 6 attention = SequenceContentAttention( state_names=["states"], state_dims=[state_dim], attended_dim=attended_dim, match_dim=match_dim, weights_init=IsotropicGaussian(0.5), biases_init=Constant(0)) attention.initialize() # Random high energies with mu=800, sigma=50 energies_val = ( 50. * numpy.random.randn(attended_length, batch_size) + 800 ).astype(theano.config.floatX) energies = tensor.as_tensor_variable(energies_val) mask = tensor.as_tensor_variable( numpy.ones((attended_length, batch_size))) weights = attention.compute_weights(energies, mask).eval() assert numpy.all(numpy.isfinite(weights))