def __init__(self,
                 readout,
                 transition,
                 attention=None,
                 fork_inputs=None,
                 add_contexts=True,
                 **kwargs):
        if not fork_inputs:
            fork_inputs = [
                name for name in transition.apply.sequences if name != 'mask'
            ]

        fork = Fork(fork_inputs)
        if attention:
            distribute = Distribute(fork_inputs,
                                    attention.take_glimpses.outputs[0])
            transition = AttentionRecurrent(transition,
                                            attention,
                                            distribute,
                                            add_contexts=add_contexts,
                                            name="att_trans")
        else:
            transition = FakeAttentionRecurrent(transition,
                                                name="with_fake_attention")
        super(SequenceGenerator, self).__init__(readout, transition, fork,
                                                **kwargs)
Exemple #2
0
    def __init__(self,
                 trg_space_idx,
                 readout,
                 transition,
                 attention=None,
                 transition_depth=1,
                 igru_depth=1,
                 trg_dgru_depth=1,
                 add_contexts=True,
                 **kwargs):
        self.trg_space_idx = trg_space_idx
        self.transition_depth = transition_depth
        self.igru_depth = igru_depth
        self.trg_dgru_depth = trg_dgru_depth
        self.igru_states_name = [
            'igru_states' + RECURRENTSTACK_SEPARATOR + str(i)
            for i in range(self.igru_depth)
        ]
        self.feedback_name = [
            'feedback' + RECURRENTSTACK_SEPARATOR + str(i)
            for i in range(self.trg_dgru_depth)
        ]

        normal_inputs = [
            name for name in transition.apply.sequences if 'mask' not in name
        ]
        kwargs.setdefault('fork', Fork(normal_inputs))
        transition = AttentionRecurrent(transition,
                                        attention,
                                        add_contexts=add_contexts,
                                        name="att_trans")
        super(SequenceGeneratorDCNMT, self).__init__(readout, transition,
                                                     **kwargs)
Exemple #3
0
 def __init__(self,
              base_encoder,
              state_dim=1000,
              self_attendable=False,
              **kwargs):
     """Constructor.
     
     Args:
         base_encoder (Brick): Low level encoder network which
                               produces annotations to attend to
         state_dim (int): Size of the recurrent layer.
         self_attendable (bool): If true, the annotator can attend
                                 to its own previous states. If 
                                 false it can only attend to base
                                 annotations
     """
     super(HierarchicalAnnotator, self).__init__(**kwargs)
     self.state_dim = state_dim * 2
     self.base_encoder = base_encoder
     self.self_attendable = self_attendable
     trans_core = GatedRecurrent(activation=Tanh(), dim=self.state_dim)
     if self_attendable:
         self.attention = SelfAttendableContentAttention(
             state_names=trans_core.apply.states,
             attended_dim=self.state_dim,
             match_dim=self.state_dim,
             num_steps=10,
             name="hier_attention")
     else:
         self.attention = SequenceContentAttention(
             state_names=trans_core.apply.states,
             attended_dim=self.state_dim,
             match_dim=self.state_dim,
             name="hier_attention")
     self.transition = AttentionRecurrent(trans_core,
                                          self.attention,
                                          name="hier_att_trans")
     self.children = [self.transition]
Exemple #4
0
 def __init__(self, readout, transition, attention=None,
              add_contexts=True, **kwargs):
     normal_inputs = [name for name in transition.apply.sequences
                      if 'mask' not in name]
     kwargs.setdefault('fork', Fork(normal_inputs))
     if attention:
         transition = AttentionRecurrent(
             transition, attention,
             add_contexts=add_contexts, name="att_trans")
     else:
         transition = FakeAttentionRecurrent(transition,
                                             name="with_fake_attention")
     super(SequenceGenerator, self).__init__(
         readout, transition, **kwargs)
Exemple #5
0
 def __init__(self,
              readout,
              transition,
              attention,
              add_contexts=True,
              **kwargs):
     normal_inputs = [
         name for name in transition.apply.sequences if 'mask' not in name
     ]
     kwargs.setdefault('fork', Fork(normal_inputs))
     transition = AttentionRecurrent(transition,
                                     attention,
                                     add_contexts=add_contexts,
                                     name="att_trans")
     super(InitialContextSequenceGenerator,
           self).__init__(readout, transition, **kwargs)
Exemple #6
0
 def __init__(self,
              trg_space_idx,
              readout,
              transition,
              attention=None,
              transition_layers=1,
              add_contexts=True,
              **kwargs):
     self.trg_space_idx = trg_space_idx
     self.transition_layers = transition_layers
     normal_inputs = [
         name for name in transition.apply.sequences if 'mask' not in name
     ]
     kwargs.setdefault('fork', Fork(normal_inputs))
     transition = AttentionRecurrent(transition,
                                     attention,
                                     add_contexts=add_contexts,
                                     name="att_trans")
     super(SequenceGeneratorDCNMT, self).__init__(readout, transition,
                                                  **kwargs)
Exemple #7
0
 def __init__(self,
              base_encoder, 
              state_dim=1000, 
              self_attendable=False, 
              **kwargs):
     """Constructor.
     
     Args:
         base_encoder (Brick): Low level encoder network which
                               produces annotations to attend to
         state_dim (int): Size of the recurrent layer.
         self_attendable (bool): If true, the annotator can attend
                                 to its own previous states. If 
                                 false it can only attend to base
                                 annotations
     """
     super(HierarchicalAnnotator, self).__init__(**kwargs)
     self.state_dim = state_dim*2
     self.base_encoder = base_encoder
     self.self_attendable = self_attendable
     trans_core = GatedRecurrent(activation=Tanh(), dim=self.state_dim)
     if self_attendable:
         self.attention = SelfAttendableContentAttention(
                 state_names=trans_core.apply.states,
                 attended_dim=self.state_dim,
                 match_dim=self.state_dim,
                 num_steps=10,
                 name="hier_attention")
     else:
         self.attention = SequenceContentAttention(
                 state_names=trans_core.apply.states,
                 attended_dim=self.state_dim,
                 match_dim=self.state_dim,
                 name="hier_attention")
     self.transition = AttentionRecurrent(trans_core, 
                                          self.attention, 
                                          name="hier_att_trans")
     self.children = [self.transition]
Exemple #8
0
def test_attention_recurrent():
    rng = numpy.random.RandomState(1234)

    dim = 5
    batch_size = 4
    input_length = 20

    attended_dim = 10
    attended_length = 15

    wrapped = SimpleRecurrent(dim, Identity())
    attention = SequenceContentAttention(state_names=wrapped.apply.states,
                                         attended_dim=attended_dim,
                                         match_dim=attended_dim)
    recurrent = AttentionRecurrent(wrapped, attention, seed=1234)
    recurrent.weights_init = IsotropicGaussian(0.5)
    recurrent.biases_init = Constant(0)
    recurrent.initialize()

    attended = tensor.tensor3("attended")
    attended_mask = tensor.matrix("attended_mask")
    inputs = tensor.tensor3("inputs")
    inputs_mask = tensor.matrix("inputs_mask")
    outputs = recurrent.apply(inputs=inputs,
                              mask=inputs_mask,
                              attended=attended,
                              attended_mask=attended_mask)
    states, glimpses, weights = outputs
    assert states.ndim == 3
    assert glimpses.ndim == 3
    assert weights.ndim == 3

    # For values.
    def rand(size):
        return rng.uniform(size=size).astype(floatX)

    # For masks.
    def generate_mask(length, batch_size):
        mask = numpy.ones((length, batch_size), dtype=floatX)
        # To make it look like read data
        for i in range(batch_size):
            mask[1 + rng.randint(0, length - 1):, i] = 0.0
        return mask

    input_vals = rand((input_length, batch_size, dim))
    input_mask_vals = generate_mask(input_length, batch_size)
    attended_vals = rand((attended_length, batch_size, attended_dim))
    attended_mask_vals = generate_mask(attended_length, batch_size)

    func = theano.function([inputs, inputs_mask, attended, attended_mask],
                           [states, glimpses, weights])
    states_vals, glimpses_vals, weight_vals = func(input_vals, input_mask_vals,
                                                   attended_vals,
                                                   attended_mask_vals)
    assert states_vals.shape == (input_length, batch_size, dim)
    assert glimpses_vals.shape == (input_length, batch_size, attended_dim)

    assert (len(ComputationGraph(outputs).shared_variables) == len(
        Selector(recurrent).get_params()))

    # weights for not masked position must be zero
    assert numpy.all(weight_vals * (1 - attended_mask_vals.T) == 0)
    # weights for masked positions must be non-zero
    assert numpy.all(abs(weight_vals + (1 - attended_mask_vals.T)) > 1e-5)
    # weights from different steps should be noticeably different
    assert (abs(weight_vals[0] - weight_vals[1])).sum() > 1e-2
    # weights for all state after the last masked position should be same
    for i in range(batch_size):
        last = int(input_mask_vals[:, i].sum())
        for j in range(last, input_length):
            assert_allclose(weight_vals[last, i], weight_vals[j, i])

    # freeze sums
    assert_allclose(weight_vals.sum(), input_length * batch_size, 1e-5)
    assert_allclose(states_vals.sum(), 113.429, rtol=1e-5)
    assert_allclose(glimpses_vals.sum(), 415.901, rtol=1e-5)
Exemple #9
0
def test_attention_recurrent():
    rng = numpy.random.RandomState(1234)

    dim = 5
    batch_size = 4
    input_length = 20

    attended_dim = 10
    attended_length = 15

    wrapped = SimpleRecurrent(dim, Identity())
    attention = SequenceContentAttention(
        state_names=wrapped.apply.states,
        attended_dim=attended_dim, match_dim=attended_dim)
    recurrent = AttentionRecurrent(wrapped, attention, seed=1234)
    recurrent.weights_init = IsotropicGaussian(0.5)
    recurrent.biases_init = Constant(0)
    recurrent.initialize()

    attended = tensor.tensor3("attended")
    attended_mask = tensor.matrix("attended_mask")
    inputs = tensor.tensor3("inputs")
    inputs_mask = tensor.matrix("inputs_mask")
    outputs = recurrent.apply(
        inputs=inputs, mask=inputs_mask,
        attended=attended, attended_mask=attended_mask)
    states, glimpses, weights = outputs
    assert states.ndim == 3
    assert glimpses.ndim == 3
    assert weights.ndim == 3

    # For values.
    def rand(size):
        return rng.uniform(size=size).astype(theano.config.floatX)

    # For masks.
    def generate_mask(length, batch_size):
        mask = numpy.ones((length, batch_size), dtype=theano.config.floatX)
        # To make it look like read data
        for i in range(batch_size):
            mask[1 + rng.randint(0, length - 1):, i] = 0.0
        return mask

    input_vals = rand((input_length, batch_size, dim))
    input_mask_vals = generate_mask(input_length, batch_size)
    attended_vals = rand((attended_length, batch_size, attended_dim))
    attended_mask_vals = generate_mask(attended_length, batch_size)

    func = theano.function([inputs, inputs_mask, attended, attended_mask],
                           [states, glimpses, weights])
    states_vals, glimpses_vals, weight_vals = func(
        input_vals, input_mask_vals,
        attended_vals, attended_mask_vals)
    assert states_vals.shape == (input_length, batch_size, dim)
    assert glimpses_vals.shape == (input_length, batch_size, attended_dim)

    assert (len(ComputationGraph(outputs).shared_variables) ==
            len(Selector(recurrent).get_params()))

    # weights for not masked position must be zero
    assert numpy.all(weight_vals * (1 - attended_mask_vals.T) == 0)
    # weights for masked positions must be non-zero
    assert numpy.all(abs(weight_vals + (1 - attended_mask_vals.T)) > 1e-5)
    # weights from different steps should be noticeably different
    assert (abs(weight_vals[0] - weight_vals[1])).sum() > 1e-2
    # weights for all state after the last masked position should be same
    for i in range(batch_size):
        last = int(input_mask_vals[:, i].sum())
        for j in range(last, input_length):
            assert_allclose(weight_vals[last, i], weight_vals[j, i], 1e-5)

    # freeze sums
    assert_allclose(weight_vals.sum(), input_length * batch_size, 1e-5)
    assert_allclose(states_vals.sum(), 113.429, rtol=1e-5)
    assert_allclose(glimpses_vals.sum(), 415.901, rtol=1e-5)
Exemple #10
0
class HierarchicalAnnotator(Initializable):
    """This annotator creates higher level annotations by using a 
    network which is similar to the attentional decoder network to
    produce a sequence of new annotations.
    """
    def __init__(self,
                 base_encoder,
                 state_dim=1000,
                 self_attendable=False,
                 **kwargs):
        """Constructor.
        
        Args:
            base_encoder (Brick): Low level encoder network which
                                  produces annotations to attend to
            state_dim (int): Size of the recurrent layer.
            self_attendable (bool): If true, the annotator can attend
                                    to its own previous states. If 
                                    false it can only attend to base
                                    annotations
        """
        super(HierarchicalAnnotator, self).__init__(**kwargs)
        self.state_dim = state_dim * 2
        self.base_encoder = base_encoder
        self.self_attendable = self_attendable
        trans_core = GatedRecurrent(activation=Tanh(), dim=self.state_dim)
        if self_attendable:
            self.attention = SelfAttendableContentAttention(
                state_names=trans_core.apply.states,
                attended_dim=self.state_dim,
                match_dim=self.state_dim,
                num_steps=10,
                name="hier_attention")
        else:
            self.attention = SequenceContentAttention(
                state_names=trans_core.apply.states,
                attended_dim=self.state_dim,
                match_dim=self.state_dim,
                name="hier_attention")
        self.transition = AttentionRecurrent(trans_core,
                                             self.attention,
                                             name="hier_att_trans")
        self.children = [self.transition]

    def _push_allocation_config(self):
        """Sets the dimensions of rnn inputs. """
        self.rnn_inputs = {
            name: shared_floatx_zeros(self.transition.get_dim(name))
            for name in self.transition.apply.sequences if name != 'mask'
        }

    @application(inputs=['base_annotations', 'base_mask'],
                 outputs=['annotations', 'annotations_mask'])
    def apply(self, base_annotations, base_mask):
        ann_representation = self.transition.apply(**merge(
            self.rnn_inputs, {
                'mask': base_mask,
                'attended': base_annotations,
                'attended_mask': base_mask
            }))[0]
        return ann_representation, base_mask
Exemple #11
0
class HierarchicalAnnotator(Initializable):
    """This annotator creates higher level annotations by using a 
    network which is similar to the attentional decoder network to
    produce a sequence of new annotations.
    """
    
    def __init__(self,
                 base_encoder, 
                 state_dim=1000, 
                 self_attendable=False, 
                 **kwargs):
        """Constructor.
        
        Args:
            base_encoder (Brick): Low level encoder network which
                                  produces annotations to attend to
            state_dim (int): Size of the recurrent layer.
            self_attendable (bool): If true, the annotator can attend
                                    to its own previous states. If 
                                    false it can only attend to base
                                    annotations
        """
        super(HierarchicalAnnotator, self).__init__(**kwargs)
        self.state_dim = state_dim*2
        self.base_encoder = base_encoder
        self.self_attendable = self_attendable
        trans_core = GatedRecurrent(activation=Tanh(), dim=self.state_dim)
        if self_attendable:
            self.attention = SelfAttendableContentAttention(
                    state_names=trans_core.apply.states,
                    attended_dim=self.state_dim,
                    match_dim=self.state_dim,
                    num_steps=10,
                    name="hier_attention")
        else:
            self.attention = SequenceContentAttention(
                    state_names=trans_core.apply.states,
                    attended_dim=self.state_dim,
                    match_dim=self.state_dim,
                    name="hier_attention")
        self.transition = AttentionRecurrent(trans_core, 
                                             self.attention, 
                                             name="hier_att_trans")
        self.children = [self.transition]

    def _push_allocation_config(self):
        """Sets the dimensions of rnn inputs. """
        self.rnn_inputs = {name: shared_floatx_zeros(
                                            self.transition.get_dim(name))
                              for name in self.transition.apply.sequences 
                              if name != 'mask'}
    
    @application(inputs=['base_annotations', 'base_mask'],
                 outputs=['annotations', 'annotations_mask'])
    def apply(self, base_annotations, base_mask):
        ann_representation = self.transition.apply(
            **merge(self.rnn_inputs, {
                'mask': base_mask,
                'attended': base_annotations,
                'attended_mask': base_mask}))[0]
        return ann_representation, base_mask
def test_with_attention():
    inp_dim = 2
    inp_len = 10
    attended_dim = 3
    attended_len = 11
    batch_size = 4
    n_steps = 30

    transition = TestTransition(
        dim=inp_dim, attended_dim=attended_dim, activation=Identity())
    attention = SequenceContentAttention(
        transition.apply.states, match_dim=inp_dim, name="attention")
    att_trans = AttentionRecurrent(
        transition, attention, add_contexts=False)
    att_trans.weights_init = IsotropicGaussian(0.01)
    att_trans.biases_init = Constant(0)
    att_trans.initialize()

    attended = tensor.tensor3("attended")
    attended_mask = tensor.matrix("attended_mask")
    inputs = tensor.tensor3("inputs")
    inputs_mask = tensor.matrix("inputs_mask")
    states, glimpses, weights = att_trans.apply(
        inputs=inputs, mask=inputs_mask,
        attended=attended, attended_mask=attended_mask)
    assert states.ndim == 3
    assert glimpses.ndim == 3
    assert weights.ndim == 3

    input_vals = numpy.zeros((inp_len, batch_size, inp_dim),
                             dtype=floatX)
    input_mask_vals = numpy.ones((inp_len, batch_size),
                                 dtype=floatX)
    attended_vals = numpy.zeros((attended_len, batch_size, attended_dim),
                                dtype=floatX)
    attended_mask_vals = numpy.ones((attended_len, batch_size),
                                    dtype=floatX)

    func = theano.function([inputs, inputs_mask, attended, attended_mask],
                           [states, glimpses, weights])
    states_vals, glimpses_vals, weight_vals = func(
        input_vals, input_mask_vals,
        attended_vals, attended_mask_vals)

    assert states_vals.shape == input_vals.shape
    assert glimpses_vals.shape == (inp_len, batch_size, attended_dim)
    assert weight_vals.shape == (inp_len, batch_size, attended_len)

    # Test SequenceGenerator using AttentionTransition
    generator = SequenceGenerator(
        LinearReadout(readout_dim=inp_dim, source_names=["states"],
                      emitter=TestEmitter(name="emitter"),
                      name="readout"),
        transition=transition,
        attention=attention,
        weights_init=IsotropicGaussian(0.01), biases_init=Constant(0),
        add_contexts=False, name="generator")

    outputs = tensor.tensor3('outputs')
    costs = generator.cost(outputs, attended=attended,
                           attended_mask=attended_mask)
    costs_vals = costs.eval({outputs: input_vals,
                            attended: attended_vals,
                            attended_mask: attended_mask_vals})
    assert costs_vals.shape == (inp_len, batch_size)

    results = (
        generator.generate(n_steps=n_steps, batch_size=attended.shape[1],
                           attended=attended, attended_mask=attended_mask))
    assert len(results) == 5
    states_vals, outputs_vals, glimpses_vals, weights_vals, costs_vals = (
        theano.function([attended, attended_mask], results)
        (attended_vals, attended_mask_vals))
    assert states_vals.shape == (n_steps, batch_size, inp_dim)
    assert states_vals.shape == outputs_vals.shape
    assert glimpses_vals.shape == (n_steps, batch_size, attended_dim)
    assert weights_vals.shape == (n_steps, batch_size, attended_len)
    assert costs_vals.shape == (n_steps, batch_size)
Exemple #13
0
# seq_length * batch_size * features
batch_size = 2
seq_length = n_steps
features = 3

attended_tr = numpy.array( range(batch_size*seq_length*features)).astype('float32')
attended_tr.shape = (seq_length, batch_size, features)

from theano import tensor, function
from blocks.bricks.attention import AttentionRecurrent

attended = tensor.tensor3('attended')
ssa = SimpleSequenceAttention(['states'],[3],3)

ar = AttentionRecurrent(
    transition = transition,
    attention = ssa,
    )

ar.weights_init = initialization.Constant(0.)
ar.biases_init = initialization.Constant(1.)
ar.initialize()

inputs = tensor.tensor3('inputs')

#ar.apply(attended = attended_tv, n_steps = n_steps, batch_size = 2)
states, glimpses, step = ar.initial_states(1, attended = attended)
glimpses, step =ar.take_glimpses(attended = attended, states = states, glimpses = glimpses, step = step)
states =ar.compute_states(inputs = inputs, attended = attended, states = states, glimpses = glimpses, step = step)
distributed = ar.distribute.apply(inputs = inputs, glimpses = glimpses)
states = ar.compute_states(states = states, inputs = inputs[0], glimpses = glimpses, step = step, attended = attended)