Exemple #1
0
def test_attention_recurrent():
    rng = numpy.random.RandomState(1234)

    dim = 5
    batch_size = 4
    input_length = 20

    attended_dim = 10
    attended_length = 15

    wrapped = SimpleRecurrent(dim, Identity())
    attention = SequenceContentAttention(state_names=wrapped.apply.states,
                                         attended_dim=attended_dim,
                                         match_dim=attended_dim)
    recurrent = AttentionRecurrent(wrapped, attention, seed=1234)
    recurrent.weights_init = IsotropicGaussian(0.5)
    recurrent.biases_init = Constant(0)
    recurrent.initialize()

    attended = tensor.tensor3("attended")
    attended_mask = tensor.matrix("attended_mask")
    inputs = tensor.tensor3("inputs")
    inputs_mask = tensor.matrix("inputs_mask")
    outputs = recurrent.apply(inputs=inputs,
                              mask=inputs_mask,
                              attended=attended,
                              attended_mask=attended_mask)
    states, glimpses, weights = outputs
    assert states.ndim == 3
    assert glimpses.ndim == 3
    assert weights.ndim == 3

    # For values.
    def rand(size):
        return rng.uniform(size=size).astype(floatX)

    # For masks.
    def generate_mask(length, batch_size):
        mask = numpy.ones((length, batch_size), dtype=floatX)
        # To make it look like read data
        for i in range(batch_size):
            mask[1 + rng.randint(0, length - 1):, i] = 0.0
        return mask

    input_vals = rand((input_length, batch_size, dim))
    input_mask_vals = generate_mask(input_length, batch_size)
    attended_vals = rand((attended_length, batch_size, attended_dim))
    attended_mask_vals = generate_mask(attended_length, batch_size)

    func = theano.function([inputs, inputs_mask, attended, attended_mask],
                           [states, glimpses, weights])
    states_vals, glimpses_vals, weight_vals = func(input_vals, input_mask_vals,
                                                   attended_vals,
                                                   attended_mask_vals)
    assert states_vals.shape == (input_length, batch_size, dim)
    assert glimpses_vals.shape == (input_length, batch_size, attended_dim)

    assert (len(ComputationGraph(outputs).shared_variables) == len(
        Selector(recurrent).get_params()))

    # weights for not masked position must be zero
    assert numpy.all(weight_vals * (1 - attended_mask_vals.T) == 0)
    # weights for masked positions must be non-zero
    assert numpy.all(abs(weight_vals + (1 - attended_mask_vals.T)) > 1e-5)
    # weights from different steps should be noticeably different
    assert (abs(weight_vals[0] - weight_vals[1])).sum() > 1e-2
    # weights for all state after the last masked position should be same
    for i in range(batch_size):
        last = int(input_mask_vals[:, i].sum())
        for j in range(last, input_length):
            assert_allclose(weight_vals[last, i], weight_vals[j, i])

    # freeze sums
    assert_allclose(weight_vals.sum(), input_length * batch_size, 1e-5)
    assert_allclose(states_vals.sum(), 113.429, rtol=1e-5)
    assert_allclose(glimpses_vals.sum(), 415.901, rtol=1e-5)
def test_with_attention():
    inp_dim = 2
    inp_len = 10
    attended_dim = 3
    attended_len = 11
    batch_size = 4
    n_steps = 30

    transition = TestTransition(
        dim=inp_dim, attended_dim=attended_dim, activation=Identity())
    attention = SequenceContentAttention(
        transition.apply.states, match_dim=inp_dim, name="attention")
    att_trans = AttentionRecurrent(
        transition, attention, add_contexts=False)
    att_trans.weights_init = IsotropicGaussian(0.01)
    att_trans.biases_init = Constant(0)
    att_trans.initialize()

    attended = tensor.tensor3("attended")
    attended_mask = tensor.matrix("attended_mask")
    inputs = tensor.tensor3("inputs")
    inputs_mask = tensor.matrix("inputs_mask")
    states, glimpses, weights = att_trans.apply(
        inputs=inputs, mask=inputs_mask,
        attended=attended, attended_mask=attended_mask)
    assert states.ndim == 3
    assert glimpses.ndim == 3
    assert weights.ndim == 3

    input_vals = numpy.zeros((inp_len, batch_size, inp_dim),
                             dtype=floatX)
    input_mask_vals = numpy.ones((inp_len, batch_size),
                                 dtype=floatX)
    attended_vals = numpy.zeros((attended_len, batch_size, attended_dim),
                                dtype=floatX)
    attended_mask_vals = numpy.ones((attended_len, batch_size),
                                    dtype=floatX)

    func = theano.function([inputs, inputs_mask, attended, attended_mask],
                           [states, glimpses, weights])
    states_vals, glimpses_vals, weight_vals = func(
        input_vals, input_mask_vals,
        attended_vals, attended_mask_vals)

    assert states_vals.shape == input_vals.shape
    assert glimpses_vals.shape == (inp_len, batch_size, attended_dim)
    assert weight_vals.shape == (inp_len, batch_size, attended_len)

    # Test SequenceGenerator using AttentionTransition
    generator = SequenceGenerator(
        LinearReadout(readout_dim=inp_dim, source_names=["states"],
                      emitter=TestEmitter(name="emitter"),
                      name="readout"),
        transition=transition,
        attention=attention,
        weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
        add_contexts=False, name="generator")

    outputs = tensor.tensor3('outputs')
    costs = generator.cost(outputs, attended=attended,
                           attended_mask=attended_mask)
    costs_vals = costs.eval({outputs: input_vals,
                            attended: attended_vals,
                            attended_mask: attended_mask_vals})
    assert costs_vals.shape == (inp_len, batch_size)

    results = (
        generator.generate(n_steps=n_steps, batch_size=attended.shape[1],
                           attended=attended, attended_mask=attended_mask))
    assert len(results) == 5
    states_vals, outputs_vals, glimpses_vals, weights_vals, costs_vals = (
        theano.function([attended, attended_mask], results)
        (attended_vals, attended_mask_vals))
    assert states_vals.shape == (n_steps, batch_size, inp_dim)
    assert states_vals.shape == outputs_vals.shape
    assert glimpses_vals.shape == (n_steps, batch_size, attended_dim)
    assert weights_vals.shape == (n_steps, batch_size, attended_len)
    assert costs_vals.shape == (n_steps, batch_size)
Exemple #3
0
def test_attention_recurrent():
    rng = numpy.random.RandomState(1234)

    dim = 5
    batch_size = 4
    input_length = 20

    attended_dim = 10
    attended_length = 15

    wrapped = SimpleRecurrent(dim, Identity())
    attention = SequenceContentAttention(
        state_names=wrapped.apply.states,
        attended_dim=attended_dim, match_dim=attended_dim)
    recurrent = AttentionRecurrent(wrapped, attention, seed=1234)
    recurrent.weights_init = IsotropicGaussian(0.5)
    recurrent.biases_init = Constant(0)
    recurrent.initialize()

    attended = tensor.tensor3("attended")
    attended_mask = tensor.matrix("attended_mask")
    inputs = tensor.tensor3("inputs")
    inputs_mask = tensor.matrix("inputs_mask")
    outputs = recurrent.apply(
        inputs=inputs, mask=inputs_mask,
        attended=attended, attended_mask=attended_mask)
    states, glimpses, weights = outputs
    assert states.ndim == 3
    assert glimpses.ndim == 3
    assert weights.ndim == 3

    # For values.
    def rand(size):
        return rng.uniform(size=size).astype(theano.config.floatX)

    # For masks.
    def generate_mask(length, batch_size):
        mask = numpy.ones((length, batch_size), dtype=theano.config.floatX)
        # To make it look like read data
        for i in range(batch_size):
            mask[1 + rng.randint(0, length - 1):, i] = 0.0
        return mask

    input_vals = rand((input_length, batch_size, dim))
    input_mask_vals = generate_mask(input_length, batch_size)
    attended_vals = rand((attended_length, batch_size, attended_dim))
    attended_mask_vals = generate_mask(attended_length, batch_size)

    func = theano.function([inputs, inputs_mask, attended, attended_mask],
                           [states, glimpses, weights])
    states_vals, glimpses_vals, weight_vals = func(
        input_vals, input_mask_vals,
        attended_vals, attended_mask_vals)
    assert states_vals.shape == (input_length, batch_size, dim)
    assert glimpses_vals.shape == (input_length, batch_size, attended_dim)

    assert (len(ComputationGraph(outputs).shared_variables) ==
            len(Selector(recurrent).get_params()))

    # weights for not masked position must be zero
    assert numpy.all(weight_vals * (1 - attended_mask_vals.T) == 0)
    # weights for masked positions must be non-zero
    assert numpy.all(abs(weight_vals + (1 - attended_mask_vals.T)) > 1e-5)
    # weights from different steps should be noticeably different
    assert (abs(weight_vals[0] - weight_vals[1])).sum() > 1e-2
    # weights for all state after the last masked position should be same
    for i in range(batch_size):
        last = int(input_mask_vals[:, i].sum())
        for j in range(last, input_length):
            assert_allclose(weight_vals[last, i], weight_vals[j, i], 1e-5)

    # freeze sums
    assert_allclose(weight_vals.sum(), input_length * batch_size, 1e-5)
    assert_allclose(states_vals.sum(), 113.429, rtol=1e-5)
    assert_allclose(glimpses_vals.sum(), 415.901, rtol=1e-5)
Exemple #4
0
attended_tr = numpy.array( range(batch_size*seq_length*features)).astype('float32')
attended_tr.shape = (seq_length, batch_size, features)

from theano import tensor, function
from blocks.bricks.attention import AttentionRecurrent

attended = tensor.tensor3('attended')
ssa = SimpleSequenceAttention(['states'],[3],3)

ar = AttentionRecurrent(
    transition = transition,
    attention = ssa,
    )

ar.weights_init = initialization.Constant(0.)
ar.biases_init = initialization.Constant(1.)
ar.initialize()

inputs = tensor.tensor3('inputs')

#ar.apply(attended = attended_tv, n_steps = n_steps, batch_size = 2)
states, glimpses, step = ar.initial_states(1, attended = attended)
glimpses, step =ar.take_glimpses(attended = attended, states = states, glimpses = glimpses, step = step)
states =ar.compute_states(inputs = inputs, attended = attended, states = states, glimpses = glimpses, step = step)
distributed = ar.distribute.apply(inputs = inputs, glimpses = glimpses)
states = ar.compute_states(states = states, inputs = inputs[0], glimpses = glimpses, step = step, attended = attended)

batch_size = 2
features = 3
#input_tr = numpy.zeros((seq_length, batch_size, features)).astype('float32')