Beispiel #1
0
    def __init__(self,
                 embedding_dim,
                 state_dim,
                 use_local_attention=False,
                 window_size=10,
                 **kwargs):
        super(SentenceEncoder, self).__init__(**kwargs)

        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.rnn = GRU(activation=Tanh(),
                       dim=state_dim,
                       attended_dim=embedding_dim)
        self.input_fork = Fork(
            [name for name in self.rnn.apply.sequences if name != 'mask'],
            prototype=Linear(),
            name='input_fork')
        self.energy_computer = SumMatchFunction_posTag(
            name="wordAtt_energy_comp")
        self.attention = SequenceContentAttention_withExInput(
            state_names=['states'],
            state_dims=[state_dim],
            attended_dim=embedding_dim,
            match_dim=state_dim,
            posTag_dim=self.state_dim,
            energy_computer=self.energy_computer,
            use_local_attention=use_local_attention,
            window_size=window_size,
            name="word_attention")

        self.children = [self.rnn, self.input_fork, self.attention]
Beispiel #2
0
    def __init__(self, networks, dims, **kwargs):
        super(DropMultiLayerEncoder, self).__init__(**kwargs)
        self.dims = dims
        self.networks = networks
        self.use_bias = True

        self.hid_linear_trans_forw = [
            Fork([
                name for name in networks[i].prototype.apply.sequences if name
                not in ['mask', 'drops_states', 'drops_cells', 'drops_igates']
            ],
                 name='fork_forw_{}'.format(i),
                 prototype=Linear(),
                 **kwargs) for i in range(len(networks))
        ]

        self.hid_linear_trans_back = [
            Fork([
                name for name in networks[i].prototype.apply.sequences if name
                not in ['mask', 'drops_states', 'drops_cells', 'drops_igates']
            ],
                 name='fork_back_{}'.format(i),
                 prototype=Linear(),
                 **kwargs) for i in range(len(networks))
        ]

        self.out_linear_trans = Linear(name='out_linear', **kwargs)
        self.children = (networks + self.hid_linear_trans_forw +
                         self.hid_linear_trans_back + [self.out_linear_trans])
        self.num_layers = len(networks)
    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.bidir = BidirectionalWMT15(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                             prototype=Linear(),
                             name='fwd_fork')
        self.back_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                              prototype=Linear(),
                              name='back_fork')

        self.children = [
            self.lookup, self.bidir, self.fwd_fork, self.back_fork
        ]
Beispiel #4
0
 def __init__(self,
              vocab_size,
              embedding_dim,
              igru_state_dim,
              emitter=None,
              feedback_brick=None,
              merge=None,
              merge_prototype=None,
              post_merge=None,
              merged_dim=None,
              igru=None,
              **kwargs):
     self.igru = igru
     self.lookup = LookupTable(name='embeddings')
     self.vocab_size = vocab_size
     self.igru_state_dim = igru_state_dim
     self.gru_to_softmax = Linear(input_dim=igru_state_dim,
                                  output_dim=vocab_size)
     self.embedding_dim = embedding_dim
     self.gru_fork = Fork([
         name for name in self.igru.apply.sequences
         if name != 'mask' and name != 'input_states'
     ],
                          prototype=Linear(),
                          name='gru_fork')
     kwargs['children'] = [
         self.igru, self.lookup, self.gru_to_softmax, self.gru_fork
     ]
     super(Interpolator, self).__init__(emitter=emitter,
                                        feedback_brick=feedback_brick,
                                        merge=merge,
                                        merge_prototype=merge_prototype,
                                        post_merge=post_merge,
                                        merged_dim=merged_dim,
                                        **kwargs)
Beispiel #5
0
	def __init__(self, dimen, vocab_size): #{
		# No idea what this is doing, but otherwise "allocated" is not set
		super(MorphGen, self).__init__(self)

		# The encoder 
		encoder = Bidirectional(SimpleRecurrent(dim=dimen, activation=Tanh()))

		# What is this doing ? 
		fork = Fork([name for name in encoder.prototype.apply.sequences if name != 'mask'])
		fork.input_dim = dimen
		fork.output_dims = [encoder.prototype.get_dim(name) for name in fork.input_names]

		lookup = LookupTable(vocab_size, dimen)

		transition = SimpleRecurrent(dim=dimen, activation=Tanh(), name="transition")

		atten = SequenceContentAttention(state_names=transition.apply.states,attended_dim=2*dimen, match_dim=dimen, name="attention")

		readout = Readout(
			readout_dim=vocab_size,
			source_names=[transition.apply.states[0],
			atten.take_glimpses.outputs[0]],
			emitter=SoftmaxEmitter(name="emitter"),
			feedback_brick=LookupFeedback(vocab_size, dimen),
			name="readout");

		generator = SequenceGenerator(readout=readout, transition=transition, attention=atten,name="generator")
	
		self.lookup = lookup
		self.fork = fork
		self.encoder = encoder
		self.generator = generator
		self.children = [lookup, fork, encoder, generator]
Beispiel #6
0
    def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth,
                 **kwargs):
        super(Decimator, self).__init__(**kwargs)

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.dgru_state_dim = dgru_state_dim
        self.embedding_dim = embedding_dim
        self.lookup = LookupTable(name='embeddings')
        self.dgru_depth = dgru_depth
        # representation
        self.dgru = RecurrentStack([
            DGRU(activation=Tanh(), dim=self.dgru_state_dim)
            for _ in range(dgru_depth)
        ],
                                   skip_connections=True)
        # importance of this representation
        self.bidir_w = Bidirectional(RecurrentWithFork(
            DGRU(activation=Tanh(), dim=self.dgru_state_dim // 2),
            self.embedding_dim,
            name='src_word_with_fork'),
                                     name='bidir_src_word_encoder')

        self.gru_fork = Fork(
            [name for name in self.dgru.apply.sequences if name != 'mask'],
            prototype=Linear(),
            name='gru_fork')
        # map to a energy scalar
        self.wl = Linear(input_dim=dgru_state_dim, output_dim=1)

        self.children = [
            self.lookup, self.dgru, self.gru_fork, self.bidir_w, self.wl
        ]
class BidirectionalEncoder(Initializable):
    """Encoder of RNNsearch model."""
    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.bidir = NewBidirectional(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                             prototype=Linear(),
                             name='fwd_fork')
        self.back_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                              prototype=Linear(),
                              name='back_fork')

        self.children = [
            self.lookup, self.bidir, self.fwd_fork, self.back_fork
        ]

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim

        self.fwd_fork.input_dim = self.embedding_dim
        self.fwd_fork.output_dims = [
            self.bidir.children[0].get_dim(name)
            for name in self.fwd_fork.output_names
        ]
        self.back_fork.input_dim = self.embedding_dim
        self.back_fork.output_dims = [
            self.bidir.children[1].get_dim(name)
            for name in self.back_fork.output_names
        ]

    @application(inputs=['source_sentence', 'source_sentence_mask'],
                 outputs=['representation'])
    def apply(self, source_sentence, source_sentence_mask):
        # Time as first dimension.
        source_sentence = source_sentence.T
        source_sentence_mask = source_sentence_mask.T

        embeddings = self.lookup.apply(source_sentence)

        representation = self.bidir.apply(
            # Conversion to embedding representation here.
            merge(self.fwd_fork.apply(embeddings, as_dict=True),
                  {'mask': source_sentence_mask}),
            merge(self.back_fork.apply(embeddings, as_dict=True),
                  {'mask': source_sentence_mask}))
        self.representation = representation
        return representation
Beispiel #8
0
    def __init__(self, dimension, alphabet_size, **kwargs):
        super(WordReverser, self).__init__(**kwargs)
        encoder = Bidirectional(
            SimpleRecurrent(dim=dimension, activation=Tanh()))
        fork = Fork([name for name in encoder.prototype.apply.sequences
                    if name != 'mask'])
        fork.input_dim = dimension
        fork.output_dims = [dimension for name in fork.input_names]
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(
            activation=Tanh(),
            dim=dimension, name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=2 * dimension, match_dim=dimension, name="attention")
        readout = Readout(
            readout_dim=alphabet_size,
            source_names=[transition.apply.states[0],
                          attention.take_glimpses.outputs[0]],
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(alphabet_size, dimension),
            name="readout")
        generator = SequenceGenerator(
            readout=readout, transition=transition, attention=attention,
            name="generator")

        self.lookup = lookup
        self.fork = fork
        self.encoder = encoder
        self.generator = generator
        self.children = [lookup, fork, encoder, generator]
Beispiel #9
0
    def __init__(self, dimension, alphabet_size, **kwargs):
        super(WordReverser, self).__init__(**kwargs)
        encoder = Bidirectional(
            SimpleRecurrent(dim=dimension, activation=Tanh()))
        fork = Fork([name for name in encoder.prototype.apply.sequences
                    if name != 'mask'])
        fork.input_dim = dimension
        fork.output_dims = [encoder.prototype.get_dim(name) for name in fork.input_names]
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(
            activation=Tanh(),
            dim=dimension, name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=2 * dimension, match_dim=dimension, name="attention")
        readout = Readout(
            readout_dim=alphabet_size,
            source_names=[transition.apply.states[0],
                          attention.take_glimpses.outputs[0]],
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(alphabet_size, dimension),
            name="readout")
        generator = SequenceGenerator(
            readout=readout, transition=transition, attention=attention,
            name="generator")

        self.lookup = lookup
        self.fork = fork
        self.encoder = encoder
        self.generator = generator
        self.children = [lookup, fork, encoder, generator]
Beispiel #10
0
    def __init__(self, nvis, nhid, encoding_mlp, encoding_lstm, decoding_mlp,
                 decoding_lstm, T=1, **kwargs):
        super(DRAW, self).__init__(**kwargs)

        self.nvis = nvis
        self.nhid = nhid
        self.T = T

        self.encoding_mlp = encoding_mlp
        self.encoding_mlp.name = 'encoder_mlp'
        for i, child in enumerate(self.encoding_mlp.children):
            child.name = '{}_{}'.format(self.encoding_mlp.name, i)
        self.encoding_lstm = encoding_lstm
        self.encoding_lstm.name = 'encoder_lstm'
        self.encoding_parameter_mapping = Fork(
            output_names=['mu_phi', 'log_sigma_phi'], prototype=Linear())

        self.decoding_mlp = decoding_mlp
        self.decoding_mlp.name = 'decoder_mlp'
        for i, child in enumerate(self.decoding_mlp.children):
            child.name = '{}_{}'.format(self.decoding_mlp.name, i)
        self.decoding_lstm = decoding_lstm
        self.decoding_lstm.name = 'decoder_lstm'
        self.decoding_parameter_mapping = Linear(name='mu_theta')

        self.prior_mu = tensor.zeros((self.nhid,))
        self.prior_mu.name = 'prior_mu'
        self.prior_log_sigma = tensor.zeros((self.nhid,))
        self.prior_log_sigma.name = 'prior_log_sigma'

        self.children = [self.encoding_mlp, self.encoding_lstm,
                         self.encoding_parameter_mapping,
                         self.decoding_mlp, self.decoding_lstm,
                         self.decoding_parameter_mapping]
Beispiel #11
0
def example2():
    """GRU"""
    x = tensor.tensor3('x')
    dim = 3

    fork = Fork(input_dim=dim, output_dims=[dim, dim*2],name='fork',output_names=["linear","gates"], weights_init=initialization.Identity(),biases_init=Constant(0))
    gru = GatedRecurrent(dim=dim, weights_init=initialization.Identity(),biases_init=Constant(0))

    fork.initialize()
    gru.initialize()

    linear, gate_inputs = fork.apply(x)
    h = gru.apply(linear, gate_inputs)

    f = theano.function([x], h)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) 

    doubler = Linear(
                 input_dim=dim, output_dim=dim, weights_init=initialization.Identity(2),
                 biases_init=initialization.Constant(0))
    doubler.initialize()

    lin, gate = fork.apply(doubler.apply(x))
    h_doubler = gru.apply(lin,gate)

    f = theano.function([x], h_doubler)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) 
Beispiel #12
0
def gru_layer(dim, h, n):
    fork = Fork(output_names=['linear' + str(n), 'gates' + str(n)],
                name='fork' + str(n), input_dim=dim, output_dims=[dim, dim * 2])
    gru = GatedRecurrent(dim=dim, name='gru' + str(n))
    initialize([fork, gru])
    linear, gates = fork.apply(h)
    return gru.apply(linear, gates)
Beispiel #13
0
    def __init__(self, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        # Dimension of the word embeddings taken as input
        self.embedding_dim = embedding_dim
        # Hidden state dimension
        self.state_dim = state_dim

        # The bidir GRU
        self.bidir = BidirectionalFromDict(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        # Forks to administer the inputs of GRU gates
        self.fwd_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                             prototype=Linear(),
                             name='fwd_fork')
        self.back_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                              prototype=Linear(),
                              name='back_fork')

        self.children = [self.bidir, self.fwd_fork, self.back_fork]
Beispiel #14
0
def gru_layer(dim, h, n):
    fork = Fork(output_names=['linear' + str(n), 'gates' + str(n)],
                name='fork' + str(n), input_dim=dim, output_dims=[dim, dim * 2])
    gru = GatedRecurrent(dim=dim, name='gru' + str(n))
    initialize([fork, gru])
    linear, gates = fork.apply(h)
    return gru.apply(linear, gates)
Beispiel #15
0
 def __init__(self, embedding_dim, state_dim, **kwargs):
     """Constructor. Note that this implementation only supports
     single layer architectures.
     
     Args:
         embedding_dim (int): Dimensionality of the word vectors
                              defined by the sparse feature map.
         state_dim (int): Size of the recurrent layer.
     """
     super(NoLookupEncoder, self).__init__(**kwargs)
     self.embedding_dim = embedding_dim
     self.state_dim = state_dim
     self.bidir = BidirectionalWMT15(
         GatedRecurrent(activation=Tanh(), dim=state_dim))
     self.fwd_fork = Fork([
         name
         for name in self.bidir.prototype.apply.sequences if name != 'mask'
     ],
                          prototype=Linear(),
                          name='fwd_fork')
     self.back_fork = Fork([
         name
         for name in self.bidir.prototype.apply.sequences if name != 'mask'
     ],
                           prototype=Linear(),
                           name='back_fork')
     self.children = [self.bidir, self.fwd_fork, self.back_fork]
Beispiel #16
0
    def __init__(self, transition, input_dim, hidden_dim, rec_weights_init,
                 ff_weights_init, biases_init, **kwargs):
        super(RecurrentWithFork, self).__init__(**kwargs)
        self.rec_weights_init = rec_weights_init
        self.ff_weights_init = ff_weights_init
        self.biases_init = biases_init
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim

        self.transition = transition
        self.transition.dim = self.hidden_dim
        self.transition.weights_init = self.rec_weights_init
        self.transition.bias_init = self.biases_init

        self.fork = Fork([
            name for name in self.transition.apply.sequences if name != 'mask'
        ],
                         prototype=Linear())
        self.fork.input_dim = self.input_dim
        self.fork.output_dims = [
            self.transition.apply.brick.get_dim(name)
            for name in self.fork.output_names
        ]
        self.fork.weights_init = self.ff_weights_init
        self.fork.biases_init = self.biases_init

        self.children = [transition, self.fork]
Beispiel #17
0
 def __init__(self, recurrent, input_dim, **kwargs):
     super(RecurrentWithFork, self).__init__(**kwargs)
     self.recurrent = recurrent
     self.input_dim = input_dim
     self.fork = Fork(
         [name for name in self.recurrent.sequences if name != 'mask'],
         prototype=Linear())
     self.children = [recurrent.brick, self.fork]
Beispiel #18
0
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 igru_state_dim,
                 igru_depth,
                 trg_dgru_depth,
                 emitter,
                 feedback_brick,
                 merge=None,
                 merge_prototype=None,
                 post_merge=None,
                 **kwargs):
        merged_dim = igru_state_dim
        if not merge:
            merge = Merge(input_names=kwargs['source_names'],
                          prototype=merge_prototype)
        if not post_merge:
            post_merge = Bias(dim=merged_dim)

        # for compatible
        if igru_depth == 1:
            self.igru = IGRU(dim=igru_state_dim)
        else:
            self.igru = RecurrentStack(
                [IGRU(dim=igru_state_dim, name='igru')] + [
                    UpperIGRU(dim=igru_state_dim,
                              activation=Tanh(),
                              name='upper_igru' + str(i))
                    for i in range(1, igru_depth)
                ],
                skip_connections=True)
        self.embedding_dim = embedding_dim
        self.emitter = emitter
        self.feedback_brick = feedback_brick
        self.merge = merge
        self.post_merge = post_merge
        self.merged_dim = merged_dim
        self.igru_depth = igru_depth
        self.trg_dgru_depth = trg_dgru_depth
        self.lookup = LookupTable(name='embeddings')
        self.vocab_size = vocab_size
        self.igru_state_dim = igru_state_dim
        self.gru_to_softmax = Linear(input_dim=igru_state_dim,
                                     output_dim=vocab_size)
        self.gru_fork = Fork([
            name for name in self.igru.apply.sequences
            if name != 'mask' and name != 'input_states'
        ],
                             prototype=Linear(),
                             name='gru_fork')

        children = [
            self.emitter, self.feedback_brick, self.merge, self.post_merge,
            self.igru, self.lookup, self.gru_to_softmax, self.gru_fork
        ]
        kwargs.setdefault('children', []).extend(children)
        super(Interpolator, self).__init__(**kwargs)
Beispiel #19
0
class BidirectionalEncoder(Initializable):
    """Encoder of RNNsearch model."""
    def __init__(self, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        # self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        # self.lookup = LookupTable(name='embeddings')
        self.bidir = BidirectionalWMT15(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                             prototype=Linear(),
                             name='fwd_fork')
        self.back_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                              prototype=Linear(),
                              name='back_fork')

        self.children = [self.bidir, self.fwd_fork, self.back_fork]

    def _push_allocation_config(self):
        # self.lookup.length = self.vocab_size
        # self.lookup.dim = self.embedding_dim

        self.fwd_fork.input_dim = self.embedding_dim
        self.fwd_fork.output_dims = [
            self.bidir.children[0].get_dim(name)
            for name in self.fwd_fork.output_names
        ]
        self.back_fork.input_dim = self.embedding_dim
        self.back_fork.output_dims = [
            self.bidir.children[1].get_dim(name)
            for name in self.back_fork.output_names
        ]

    @application(inputs=['image_embedding'], outputs=['representation'])
    def apply(self, image_embedding):
        # Time as first dimension
        image_embedding_mask = tensor.ones(image_embedding.shape[:2])
        # print image_embedding.type

        # embeddings = self.lookup.apply(source_sentence)

        representation = self.bidir.apply(
            merge(self.fwd_fork.apply(image_embedding, as_dict=True),
                  {'mask': image_embedding_mask}),
            merge(self.back_fork.apply(image_embedding, as_dict=True),
                  {'mask': image_embedding_mask}))
        return representation
Beispiel #20
0
    def __init__(self, 
                 vocab_size, 
                 embedding_dim, 
                 n_layers, 
                 skip_connections, 
                 state_dim, 
                 **kwargs):
        """Sole constructor.
        
        Args:
            vocab_size (int): Source vocabulary size
            embedding_dim (int): Dimension of the embedding layer
            n_layers (int): Number of layers. Layers share the same
                            weight matrices.
            skip_connections (bool): Skip connections connect the
                                     source word embeddings directly 
                                     with deeper layers to propagate 
                                     the gradient more efficiently
            state_dim (int): Number of hidden units in the recurrent
                             layers.
        """
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.n_layers = n_layers
        self.state_dim = state_dim
        self.skip_connections = skip_connections

        self.lookup = LookupTable(name='embeddings')
        if self.n_layers >= 1:
            self.bidir = BidirectionalWMT15(
                GatedRecurrent(activation=Tanh(), dim=state_dim))
            self.fwd_fork = Fork(
                [name for name in self.bidir.prototype.apply.sequences
                 if name != 'mask'], prototype=Linear(), name='fwd_fork')
            self.back_fork = Fork(
                [name for name in self.bidir.prototype.apply.sequences
                 if name != 'mask'], prototype=Linear(), name='back_fork')
            self.children = [self.lookup, self.bidir,
                             self.fwd_fork, self.back_fork]
            if self.n_layers > 1: # Deep encoder
                self.mid_fwd_fork = Fork(
                    [name for name in self.bidir.prototype.apply.sequences
                     if name != 'mask'], prototype=Linear(), name='mid_fwd_fork')
                self.mid_back_fork = Fork(
                    [name for name in self.bidir.prototype.apply.sequences
                     if name != 'mask'], prototype=Linear(), name='mid_back_fork')
                self.children.append(self.mid_fwd_fork)
                self.children.append(self.mid_back_fork)
        elif self.n_layers == 0:
            self.embedding_dim = state_dim*2
            self.children = [self.lookup]
        else:
            logging.fatal("Number of encoder layers must be non-negative")
Beispiel #21
0
def gru_layer(dim, h, n):
    fork = Fork(
        output_names=["linear" + str(n), "gates" + str(n)],
        name="fork" + str(n),
        input_dim=dim,
        output_dims=[dim, dim * 2],
    )
    gru = GatedRecurrent(dim=dim, name="gru" + str(n))
    initialize([fork, gru])
    linear, gates = fork.apply(h)
    return gru.apply(linear, gates)
Beispiel #22
0
def gru_layer(dim, h, n, x_mask, first, **kwargs):
    fork = Fork(output_names=['linear' + str(n), 'gates' + str(n)],
                name='fork' + str(n),
                input_dim=dim,
                output_dims=[dim, dim * 2])
    gru = GatedRecurrent(dim=dim, name='gru' + str(n))
    initialize([fork, gru])
    linear, gates = fork.apply(h)
    if first:
        gruApply = gru.apply(linear, gates, mask=x_mask, **kwargs)
    else:
        gruApply = gru.apply(linear, gates, **kwargs)
    return gruApply
Beispiel #23
0
class BidirectionalEncoder(Initializable):
    """ Bidirectional GRU encoder. """
    def __init__(self, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        # Dimension of the word embeddings taken as input
        self.embedding_dim = embedding_dim
        # Hidden state dimension
        self.state_dim = state_dim

        # The bidir GRU
        self.bidir = BidirectionalFromDict(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        # Forks to administer the inputs of GRU gates
        self.fwd_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                             prototype=Linear(),
                             name='fwd_fork')
        self.back_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                              prototype=Linear(),
                              name='back_fork')

        self.children = [self.bidir, self.fwd_fork, self.back_fork]

    def _push_allocation_config(self):
        self.fwd_fork.input_dim = self.embedding_dim
        self.fwd_fork.output_dims = [
            self.bidir.children[0].get_dim(name)
            for name in self.fwd_fork.output_names
        ]
        self.back_fork.input_dim = self.embedding_dim
        self.back_fork.output_dims = [
            self.bidir.children[1].get_dim(name)
            for name in self.back_fork.output_names
        ]

    @application(inputs=['source_sentence_tbf', 'source_sentence_mask_tb'],
                 outputs=['representation'])
    def apply(self, source_sentence_tbf, source_sentence_mask_tb=None):

        representation_tbf = self.bidir.apply(
            merge(self.fwd_fork.apply(source_sentence_tbf, as_dict=True),
                  {'mask': source_sentence_mask_tb}),
            merge(self.back_fork.apply(source_sentence_tbf, as_dict=True),
                  {'mask': source_sentence_mask_tb}))
        return representation_tbf
Beispiel #24
0
    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.bidir = BidirectionalWMT15(GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork([name for name in self.bidir.prototype.apply.sequences
                          if name != 'mask'], prototype=Linear(), name='fwd_fork')
        self.back_fork = Fork([name for name in self.bidir.prototype.apply.sequences
                          if name != 'mask'], prototype=Linear(), name='back_fork')

        self.children = [self.lookup, self.bidir, self.fwd_fork, self.back_fork]
Beispiel #25
0
class BidirectionalEncoder(Initializable):
    """Encoder of RNNsearch model."""

    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.bidir = BidirectionalWMT15(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork(
            [name for name in self.bidir.prototype.apply.sequences
             if name != 'mask'], prototype=Linear(), name='fwd_fork')
        self.back_fork = Fork(
            [name for name in self.bidir.prototype.apply.sequences
             if name != 'mask'], prototype=Linear(), name='back_fork')

        self.children = [self.lookup, self.bidir,
                         self.fwd_fork, self.back_fork]

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim

        self.fwd_fork.input_dim = self.embedding_dim
        self.fwd_fork.output_dims = [self.bidir.children[0].get_dim(name)
                                     for name in self.fwd_fork.output_names]
        self.back_fork.input_dim = self.embedding_dim
        self.back_fork.output_dims = [self.bidir.children[1].get_dim(name)
                                      for name in self.back_fork.output_names]

    @application(inputs=['source_sentence', 'source_sentence_mask'],
                 outputs=['representation'])
    def apply(self, source_sentence, source_sentence_mask):
        # Time as first dimension
        source_sentence = source_sentence.T
        source_sentence_mask = source_sentence_mask.T

        embeddings = self.lookup.apply(source_sentence)

        representation = self.bidir.apply(
            merge(self.fwd_fork.apply(embeddings, as_dict=True),
                  {'mask': source_sentence_mask}),
            merge(self.back_fork.apply(embeddings, as_dict=True),
                  {'mask': source_sentence_mask})
        )
        return representation
Beispiel #26
0
 def __init__(self, vocab_size, embedding_dim, n_layers, skip_connections,
              state_dim, **kwargs):
     """Sole constructor.
     
     Args:
         vocab_size (int): Source vocabulary size
         embedding_dim (int): Dimension of the embedding layer
         n_layers (int): Number of layers. Layers share the same
                         weight matrices.
         skip_connections (bool): Skip connections connect the
                                  source word embeddings directly 
                                  with deeper layers to propagate 
                                  the gradient more efficiently
         state_dim (int): Number of hidden units in the recurrent
                          layers.
     """
     super(DeepBidirectionalEncoder, self).__init__(**kwargs)
     self.vocab_size = vocab_size
     self.embedding_dim = embedding_dim
     self.n_layers = n_layers
     self.state_dim = state_dim
     self.skip_connections = skip_connections
     self.lookup = LookupTable(name='embeddings')
     self.bidirs = []
     self.fwd_forks = []
     self.back_forks = []
     for i in xrange(self.n_layers):
         bidir = BidirectionalWMT15(GatedRecurrent(activation=Tanh(),
                                                   dim=state_dim),
                                    name='bidir%d' % i)
         self.bidirs.append(bidir)
         self.fwd_forks.append(
             Fork([
                 name for name in bidir.prototype.apply.sequences
                 if name != 'mask'
             ],
                  prototype=Linear(),
                  name='fwd_fork%d' % i))
         self.back_forks.append(
             Fork([
                 name for name in bidir.prototype.apply.sequences
                 if name != 'mask'
             ],
                  prototype=Linear(),
                  name='back_fork%d' % i))
     self.children = [self.lookup] \
                     + self.bidirs \
                     + self.fwd_forks \
                     + self.back_forks
Beispiel #27
0
    def __init__(self, transition, num_params, params_name, weights_init,
                 biases_init, **kwargs):
        super(AddParameters, self).__init__(**kwargs)
        update_instance(self, locals())

        self.input_names = [
            name for name in transition.apply.sequences if name != 'mask'
        ]
        self.state_name = transition.apply.states[0]
        assert len(transition.apply.states) == 1

        self.fork = Fork(self.input_names)
        # Could be also several init bricks, one for each of the states
        self.init = MLP([Identity()], name="init")
        self.children = [self.transition, self.fork, self.init]
Beispiel #28
0
    def __init__(self,
                 readout,
                 transition,
                 attention=None,
                 fork_inputs=None,
                 add_contexts=True,
                 **kwargs):
        if not fork_inputs:
            fork_inputs = [
                name for name in transition.apply.sequences if name != 'mask'
            ]

        fork = Fork(fork_inputs)
        if attention:
            distribute = Distribute(fork_inputs,
                                    attention.take_glimpses.outputs[0])
            transition = AttentionRecurrent(transition,
                                            attention,
                                            distribute,
                                            add_contexts=add_contexts,
                                            name="att_trans")
        else:
            transition = FakeAttentionRecurrent(transition,
                                                name="with_fake_attention")
        super(SequenceGenerator, self).__init__(readout, transition, fork,
                                                **kwargs)
Beispiel #29
0
class RecurrentWithFork(Initializable):

    @lazy(allocation=['input_dim'])
    def __init__(self, recurrent, input_dim, **kwargs):
        super(RecurrentWithFork, self).__init__(**kwargs)
        self.recurrent = recurrent
        self.input_dim = input_dim
        self.fork = Fork(
            [name for name in self.recurrent.sequences
             if name != 'mask'],
             prototype=Linear())
        self.children = [recurrent.brick, self.fork]

    def _push_allocation_config(self):
        self.fork.input_dim = self.input_dim
        self.fork.output_dims = [self.recurrent.brick.get_dim(name)
                                 for name in self.fork.output_names]

    @application(inputs=['input_', 'mask'])
    def apply(self, input_, mask=None, **kwargs):
        return self.recurrent(
            mask=mask, **dict_union(self.fork.apply(input_, as_dict=True),
                                    kwargs))

    @apply.property('outputs')
    def apply_outputs(self):
        return self.recurrent.states
Beispiel #30
0
    def __init__(self, readout, transition, dim_dec, attention=None,
                 add_contexts=True, pointer_weight=0.5,
                 transition_with_att_class=None,
                 use_word_annotations=False, **kwargs):
        super(Generator, self).__init__(**kwargs)
        self.inputs = [name for name in transition.apply.sequences
                       if 'mask' not in name]
        self.dim_dec = dim_dec
        self.pointer_weight = pointer_weight
        fork = Fork(self.inputs)
        kwargs.setdefault('fork', fork)
        if attention:
            transition = transition_with_att_class(
                transition, attention,
                add_contexts=add_contexts, name="att_trans")
        else:
            transition = FakeAttentionRecurrent(transition,
                                                name="with_fake_attention")
        self.readout = readout
        self.transition = transition
        self.fork = fork
        self.children = [self.readout, self.fork, self.transition]

        self.use_word_annotations = use_word_annotations
        if use_word_annotations:
            self.word_annotation_preprocessor = Linear(
                name='input_attention_preprocessor', bias=False)
            self.children.append(self.word_annotation_preprocessor)
Beispiel #31
0
class RecurrentWithFork(Initializable):
    @lazy(allocation=['input_dim'])
    def __init__(self, transition, input_dim, hidden_dim, rec_weights_init,
                 ff_weights_init, biases_init, **kwargs):
        super(RecurrentWithFork, self).__init__(**kwargs)
        self.rec_weights_init = rec_weights_init
        self.ff_weights_init = ff_weights_init
        self.biases_init = biases_init
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim

        self.transition = transition
        self.transition.dim = self.hidden_dim
        self.transition.weights_init = self.rec_weights_init
        self.transition.bias_init = self.biases_init

        self.fork = Fork([
            name for name in self.transition.apply.sequences if name != 'mask'
        ],
                         prototype=Linear())
        self.fork.input_dim = self.input_dim
        self.fork.output_dims = [
            self.transition.apply.brick.get_dim(name)
            for name in self.fork.output_names
        ]
        self.fork.weights_init = self.ff_weights_init
        self.fork.biases_init = self.biases_init

        self.children = [transition, self.fork]


#    def _push_allocation_config(self):#
#        #super(RecurrentWithFork, self)._push_allocation_config()
#        self.transition.dim=self.hidden_dim
#        self.fork.input_dim = self.input_dim
#        self.fork.output_dims = [self.transition.apply.brick.get_dim(name)
#                                 for name in self.fork.output_names]

#    def _push_initialization_config(self):
#        #super(RecurrentWithFork, self)._push_initialization_config()
#        self.fork.weights_init=self.ff_weights_init
#        self.fork.biases_init=self.biases_init
#        self.transition.weights_init=self.rec_weights_init
#        self.transition.bias_init=self.biases_init

    @application(inputs=['input_', 'mask'])
    def apply(self, input_, mask=None, **kwargs):
        states = self.transition.apply(mask=mask,
                                       **dict_union(
                                           self.fork.apply(input_,
                                                           as_dict=True),
                                           kwargs))
        # I don't know, why blocks returns a list [states, cell] for LSTM
        # but just states (no list) for GRU or normal RNN. We only want LSTM's states.
        # cells should not be visible from outside.
        return states[0] if isinstance(states, list) else states

    @apply.property('outputs')
    def apply_outputs(self):
        return self.transition.apply.states
Beispiel #32
0
    def __init__(self,
                 readout,
                 transition,
                 attention=None,
                 fork_inputs=None,
                 **kwargs):
        if not fork_inputs:
            fork_inputs = [
                name for name in transition.apply.sequences if name != 'mask'
            ]

        fork = Fork(fork_inputs)
        if attention:
            mixer = Mixer(fork_inputs,
                          attention.take_look.outputs[0],
                          name="mixer")
            transition = AttentionTransition(transition,
                                             attention,
                                             mixer,
                                             name="att_trans")
        else:
            transition = FakeAttentionTransition(transition,
                                                 name="with_fake_attention")
        super(SequenceGenerator, self).__init__(readout, transition, fork,
                                                **kwargs)
Beispiel #33
0
    def __init__(self, transition, input_dim, hidden_dim,
                 rec_weights_init, ff_weights_init, biases_init, **kwargs):
        super(RecurrentWithFork, self).__init__(**kwargs)
        self.rec_weights_init=rec_weights_init
        self.ff_weights_init=ff_weights_init
        self.biases_init=biases_init
        self.input_dim=input_dim
        self.hidden_dim=hidden_dim

        self.transition=transition
        self.transition.dim=self.hidden_dim
        self.transition.weights_init=self.rec_weights_init
        self.transition.bias_init=self.biases_init


        self.fork = Fork(
            [name for name in self.transition.apply.sequences if name != 'mask'],
             prototype=Linear())
        self.fork.input_dim = self.input_dim
        self.fork.output_dims = [self.transition.apply.brick.get_dim(name)
                                 for name in self.fork.output_names]
        self.fork.weights_init=self.ff_weights_init
        self.fork.biases_init=self.biases_init

        self.children = [transition, self.fork]
Beispiel #34
0
    def __init__(self, inner_input_dim, outer_input_dim, inner_dim, **kwargs):
        self.inner_gru = GatedRecurrent(dim=inner_dim, name='inner_gru')

        self.inner_input_fork = Fork(
            output_names=[name for name in self.inner_gru.apply.sequences
                          if 'mask' not in name],
            input_dim=inner_input_dim, name='inner_input_fork')
        self.outer_input_fork = Fork(
            output_names=[name for name in self.inner_gru.apply.sequences
                          if 'mask' not in name],
            input_dim=outer_input_dim, name='inner_outer_fork')

        super(InnerRecurrent, self).__init__(**kwargs)

        self.children = [
            self.inner_gru, self.inner_input_fork, self.outer_input_fork]
Beispiel #35
0
class RecurrentWithFork(Initializable):
    # Obtained from Dima's code. @rizar
    # https://github.com/rizar/attention-lvcsr/blob/master/lvsr/bricks/__init__.py
    @lazy(allocation=['input_dim'])
    def __init__(self, recurrent, input_dim, **kwargs):
        super(RecurrentWithFork, self).__init__(**kwargs)
        self.recurrent = recurrent
        self.input_dim = input_dim
        self.fork = Fork(
            [name for name in self.recurrent.sequences if name != 'mask'],
            prototype=Linear())
        self.children = [recurrent.brick, self.fork]

    def _push_allocation_config(self):
        self.fork.input_dim = self.input_dim
        self.fork.output_dims = [
            self.recurrent.brick.get_dim(name)
            for name in self.fork.output_names
        ]

    @application(inputs=['input_', 'mask'])
    def apply(self, input_, mask=None, **kwargs):
        return self.recurrent(mask=mask,
                              **dict_union(
                                  self.fork.apply(input_, as_dict=True),
                                  kwargs))

    @apply.property('outputs')
    def apply_outputs(self):
        return self.recurrent.states
Beispiel #36
0
class RecurrentWithFork(Initializable):
    @lazy(allocation=['input_dim'])
    def __init__(self, proto, input_dim, **kwargs):
        super(RecurrentWithFork, self).__init__(**kwargs)
        self.recurrent = proto
        self.input_dim = input_dim
        self.fork = Fork([
            name for name in self.recurrent.apply.sequences if name != 'mask'
        ],
                         prototype=Linear())
        self.children = [self.recurrent, self.fork]

    def _push_allocation_config(self):
        self.fork.input_dim = self.input_dim
        self.fork.output_dims = [
            self.recurrent.get_dim(name) for name in self.fork.output_names
        ]

    @application(inputs=['input_', 'mask'])
    def apply(self, input_, mask=None, **kwargs):
        return self.recurrent.apply(mask=mask,
                                    **dict_union(
                                        self.fork.apply(input_, as_dict=True),
                                        kwargs))

    @apply.property('outputs')
    def apply_outputs(self):
        return self.recurrent.states
Beispiel #37
0
    def __init__(self,
                 trg_space_idx,
                 readout,
                 transition,
                 attention=None,
                 transition_depth=1,
                 igru_depth=1,
                 trg_dgru_depth=1,
                 add_contexts=True,
                 **kwargs):
        self.trg_space_idx = trg_space_idx
        self.transition_depth = transition_depth
        self.igru_depth = igru_depth
        self.trg_dgru_depth = trg_dgru_depth
        self.igru_states_name = [
            'igru_states' + RECURRENTSTACK_SEPARATOR + str(i)
            for i in range(self.igru_depth)
        ]
        self.feedback_name = [
            'feedback' + RECURRENTSTACK_SEPARATOR + str(i)
            for i in range(self.trg_dgru_depth)
        ]

        normal_inputs = [
            name for name in transition.apply.sequences if 'mask' not in name
        ]
        kwargs.setdefault('fork', Fork(normal_inputs))
        transition = AttentionRecurrent(transition,
                                        attention,
                                        add_contexts=add_contexts,
                                        name="att_trans")
        super(SequenceGeneratorDCNMT, self).__init__(readout, transition,
                                                     **kwargs)
    def __init__(self, path, nn_char_map, no_transition_cost=1e12, **kwargs):
        # Since we currently support only type, it is ignored.
        # if type_ != 'fst':
        #    raise ValueError("Supports only FST's so far.")
        fst = FST(path)
        fst_char_map = dict(fst.fst.isyms.items())
        del fst_char_map['<eps>']
        if not len(fst_char_map) == len(nn_char_map):
            raise ValueError()
        remap_table = {
            nn_char_map[character]: fst_code
            for character, fst_code in fst_char_map.items()
        }
        transition = FSTTransition(fst, remap_table, no_transition_cost)

        # This SequenceGenerator will be used only in a very limited way.
        # That's why it is sufficient to equip it with a completely
        # fake readout.
        dummy_readout = Readout(source_names=['add'],
                                readout_dim=len(remap_table),
                                merge=Merge(input_names=['costs'],
                                            prototype=Identity()),
                                post_merge=Identity(),
                                emitter=SoftmaxEmitter())
        super(LanguageModel,
              self).__init__(transition=transition,
                             fork=Fork(output_names=[
                                 name for name in transition.apply.sequences
                                 if name != 'mask'
                             ],
                                       prototype=Identity()),
                             readout=dummy_readout,
                             **kwargs)
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 state_dim,
                 representation_dim,
                 theano_seed=None,
                 **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.representation_dim = representation_dim
        self.theano_seed = theano_seed

        # Initialize gru with special initial state.
        self.transition = GRUInitialState(attended_dim=state_dim,
                                          dim=state_dim,
                                          activation=Tanh(),
                                          name='decoder')

        # Initialize the attention mechanism.
        self.attention = SequenceContentAttention2(
            state_names=self.transition.apply.states,
            attended_dim=representation_dim,
            match_dim=state_dim,
            name="attention")

        readout = Readout(source_names=[
            'states', 'feedback', self.attention.take_glimpses.outputs[0]
        ],
                          readout_dim=self.vocab_size,
                          emitter=NewSoftmaxEmitter(initial_output=-1,
                                                    theano_seed=theano_seed),
                          feedback_brick=NewLookupFeedback(
                              vocab_size, embedding_dim),
                          post_merge=InitializableFeedforwardSequence([
                              Bias(dim=state_dim, name='maxout_bias').apply,
                              Maxout(num_pieces=2, name='maxout').apply,
                              Linear(input_dim=state_dim / 2,
                                     output_dim=embedding_dim,
                                     use_bias=False,
                                     name='softmax0').apply,
                              Linear(input_dim=embedding_dim,
                                     name='softmax1').apply
                          ]),
                          merged_dim=state_dim)

        # Build sequence generator accordingly.
        self.sequence_generator = SequenceGenerator(
            readout=readout,
            transition=self.transition,
            attention=self.attention,
            fork=Fork([
                name
                for name in self.transition.apply.sequences if name != 'mask'
            ],
                      prototype=Linear()),
            cost_type='categorical_cross_entropy')

        self.children = [self.sequence_generator]
Beispiel #40
0
 def __init__(self, recurrent, input_dim, **kwargs):
     super(RecurrentWithFork, self).__init__(**kwargs)
     self.recurrent = recurrent
     self.input_dim = input_dim
     self.fork = Fork(
         [name for name in self.recurrent.sequences
          if name != 'mask'], prototype=Linear())
     self.children = [recurrent.brick, self.fork]
class BidirectionalEncoder(Initializable):
    """ Bidirectional GRU encoder. """

    def __init__(self, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        # Dimension of the word embeddings taken as input
        self.embedding_dim = embedding_dim
        # Hidden state dimension
        self.state_dim = state_dim

        # The bidir GRU
        self.bidir = BidirectionalFromDict(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        # Forks to administer the inputs of GRU gates
        self.fwd_fork = Fork(
            [name for name in self.bidir.prototype.apply.sequences
             if name != 'mask'], prototype=Linear(), name='fwd_fork')
        self.back_fork = Fork(
            [name for name in self.bidir.prototype.apply.sequences
             if name != 'mask'], prototype=Linear(), name='back_fork')

        self.children = [self.bidir,
                         self.fwd_fork, self.back_fork]

    def _push_allocation_config(self):
        self.fwd_fork.input_dim = self.embedding_dim
        self.fwd_fork.output_dims = [self.bidir.children[0].get_dim(name)
                                     for name in self.fwd_fork.output_names]
        self.back_fork.input_dim = self.embedding_dim
        self.back_fork.output_dims = [self.bidir.children[1].get_dim(name)
                                      for name in self.back_fork.output_names]


    @application(inputs=['source_sentence_tbf', 'source_sentence_mask_tb'],
                 outputs=['representation'])
    def apply(self, source_sentence_tbf, source_sentence_mask_tb=None):

        representation_tbf = self.bidir.apply(
            merge(self.fwd_fork.apply(source_sentence_tbf, as_dict=True),
                  {'mask': source_sentence_mask_tb}),
            merge(self.back_fork.apply(source_sentence_tbf, as_dict=True),
                  {'mask': source_sentence_mask_tb})
        )
        return representation_tbf
Beispiel #42
0
class InnerRecurrent(BaseRecurrent, Initializable):
    def __init__(self, inner_input_dim, outer_input_dim, inner_dim, **kwargs):
        self.inner_gru = GatedRecurrent(dim=inner_dim, name='inner_gru')

        self.inner_input_fork = Fork(
            output_names=[name for name in self.inner_gru.apply.sequences
                          if 'mask' not in name],
            input_dim=inner_input_dim, name='inner_input_fork')
        self.outer_input_fork = Fork(
            output_names=[name for name in self.inner_gru.apply.sequences
                          if 'mask' not in name],
            input_dim=outer_input_dim, name='inner_outer_fork')

        super(InnerRecurrent, self).__init__(**kwargs)

        self.children = [
            self.inner_gru, self.inner_input_fork, self.outer_input_fork]

    def _push_allocation_config(self):
        self.inner_input_fork.output_dims = self.inner_gru.get_dims(
            self.inner_input_fork.output_names)
        self.outer_input_fork.output_dims = self.inner_gru.get_dims(
            self.outer_input_fork.output_names)

    @recurrent(sequences=['inner_inputs'], states=['states'],
               contexts=['outer_inputs'], outputs=['states'])
    def apply(self, inner_inputs, states, outer_inputs):
        forked_inputs = self.inner_input_fork.apply(inner_inputs, as_dict=True)
        forked_states = self.outer_input_fork.apply(outer_inputs, as_dict=True)

        gru_inputs = {key: forked_inputs[key] + forked_states[key]
                      for key in forked_inputs.keys()}

        new_states = self.inner_gru.apply(
            iterate=False,
            **dict_union(gru_inputs, {'states': states}))
        return new_states  # mean according to the time axis

    def get_dim(self, name):
        if name == 'states':
            return self.inner_gru.get_dim(name)
        else:
            return AttributeError
Beispiel #43
0
class RecurrentWithFork(Initializable):

    @lazy(allocation=['input_dim'])
    def __init__(self, transition, input_dim, hidden_dim,
                 rec_weights_init, ff_weights_init, biases_init, **kwargs):
        super(RecurrentWithFork, self).__init__(**kwargs)
        self.rec_weights_init=rec_weights_init
        self.ff_weights_init=ff_weights_init
        self.biases_init=biases_init
        self.input_dim=input_dim
        self.hidden_dim=hidden_dim

        self.transition=transition
        self.transition.dim=self.hidden_dim
        self.transition.weights_init=self.rec_weights_init
        self.transition.bias_init=self.biases_init


        self.fork = Fork(
            [name for name in self.transition.apply.sequences if name != 'mask'],
             prototype=Linear())
        self.fork.input_dim = self.input_dim
        self.fork.output_dims = [self.transition.apply.brick.get_dim(name)
                                 for name in self.fork.output_names]
        self.fork.weights_init=self.ff_weights_init
        self.fork.biases_init=self.biases_init

        self.children = [transition, self.fork]

#    def _push_allocation_config(self):#
#        #super(RecurrentWithFork, self)._push_allocation_config()
#        self.transition.dim=self.hidden_dim
#        self.fork.input_dim = self.input_dim
#        self.fork.output_dims = [self.transition.apply.brick.get_dim(name)
#                                 for name in self.fork.output_names]

#    def _push_initialization_config(self):
#        #super(RecurrentWithFork, self)._push_initialization_config()
#        self.fork.weights_init=self.ff_weights_init
#        self.fork.biases_init=self.biases_init
#        self.transition.weights_init=self.rec_weights_init
#        self.transition.bias_init=self.biases_init

    @application(inputs=['input_', 'mask'])
    def apply(self, input_, mask=None, **kwargs):
        states=self.transition.apply(
            mask=mask, **dict_union(self.fork.apply(input_, as_dict=True), kwargs))
        # I don't know, why blocks returns a list [states, cell] for LSTM
        # but just states (no list) for GRU or normal RNN. We only want LSTM's states.
        # cells should not be visible from outside.
        return states[0] if isinstance(states,list) else states

    @apply.property('outputs')
    def apply_outputs(self):
        return self.transition.apply.states
    def __init__(self, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        # Dimension of the word embeddings taken as input
        self.embedding_dim = embedding_dim
        # Hidden state dimension
        self.state_dim = state_dim

        # The bidir GRU
        self.bidir = BidirectionalFromDict(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        # Forks to administer the inputs of GRU gates
        self.fwd_fork = Fork(
            [name for name in self.bidir.prototype.apply.sequences
             if name != 'mask'], prototype=Linear(), name='fwd_fork')
        self.back_fork = Fork(
            [name for name in self.bidir.prototype.apply.sequences
             if name != 'mask'], prototype=Linear(), name='back_fork')

        self.children = [self.bidir,
                         self.fwd_fork, self.back_fork]
Beispiel #45
0
 def __init__(self, embedding_dim, state_dim, **kwargs):
     """Constructor. Note that this implementation only supports
     single layer architectures.
     
     Args:
         embedding_dim (int): Dimensionality of the word vectors
                              defined by the sparse feature map.
         state_dim (int): Size of the recurrent layer.
     """
     super(NoLookupEncoder, self).__init__(**kwargs)
     self.embedding_dim = embedding_dim
     self.state_dim = state_dim
     self.bidir = BidirectionalWMT15(
         GatedRecurrent(activation=Tanh(), dim=state_dim))
     self.fwd_fork = Fork(
         [name for name in self.bidir.prototype.apply.sequences
          if name != 'mask'], prototype=Linear(), name='fwd_fork')
     self.back_fork = Fork(
         [name for name in self.bidir.prototype.apply.sequences
          if name != 'mask'], prototype=Linear(), name='back_fork')
     self.children = [self.bidir,
                      self.fwd_fork, self.back_fork]
def build_fork_lookup(vocab_size, args):
    x = tensor.lmatrix('features')
    virtual_dim = 6
    time_length = 5
    mini_batch_size = 2
    skip_connections = True
    layers = 3

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(virtual_dim)

    print output_names
    print output_dims
    lookup = LookupTable(length=vocab_size, dim=virtual_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names, input_dim=time_length,
                output_dims=output_dims,
                prototype=FeedforwardSequence(
                    [lookup.apply]))

    # Return list of 3D Tensor, one for each layer
    # (Batch X Time X embedding_dim)
    pre_rnn = fork.apply(x)
    fork.initialize()

    f = theano.function([x], pre_rnn)
    return f
Beispiel #47
0
    def __init__(self, blockid, vocab_size, embedding_dim, state_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.blockid = blockid

        self.lookup = LookupTable(name='embeddings' + '_' + self.blockid)
        self.gru = GatedRecurrent(activation=Tanh(), dim=state_dim, name = "GatedRNN" + self.blockid)
        self.fwd_fork = Fork(
            [name for name in self.gru.apply.sequences
             if name != 'mask'], prototype=Linear(), name='fwd_fork' + '_' + self.blockid)

        self.children = [self.lookup, self.gru, self.fwd_fork]
Beispiel #48
0
    def __init__(self, vocab_size, embedding_dim, state_dim, reverse=True,
                 **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.reverse = reverse

        self.lookup = LookupTable(name='embeddings')
        self.transition = GatedRecurrent(Tanh(), name='encoder_transition')
        self.fork = Fork([name for name in self.transition.apply.sequences
                          if name != 'mask'], prototype=Linear())

        self.children = [self.lookup, self.transition, self.fork]
Beispiel #49
0
    def __init__(self, transition, num_params, params_name,
                 weights_init, biases_init, **kwargs):
        super(AddParameters, self).__init__(**kwargs)
        update_instance(self, locals())

        self.input_names = [name for name in transition.apply.sequences
                            if name != 'mask']
        self.state_name = transition.apply.states[0]
        assert len(transition.apply.states) == 1

        self.fork = Fork(self.input_names)
        # Could be also several init bricks, one for each of the states
        self.init = MLP([Identity()], name="init")
        self.children = [self.transition, self.fork, self.init]
Beispiel #50
0
    def __init__(self, config, output_dim=2, **kwargs):
        super(BidiRNN, self).__init__(**kwargs)
        self.config = config

        self.context_embedder = ContextEmbedder(config)
        
        act = config.rec_activation() if hasattr(config, 'rec_activation') else None
        self.rec = SegregatedBidirectional(LSTM(dim=config.hidden_state_dim, activation=act, name='recurrent'))

        self.fwd_fork = Fork([name for name in self.rec.prototype.apply.sequences if name!='mask'],
                             prototype=Linear(), name='fwd_fork')
        self.bkwd_fork = Fork([name for name in self.rec.prototype.apply.sequences if name!='mask'],
                              prototype=Linear(), name='bkwd_fork')

        rto_in = config.hidden_state_dim * 2 + sum(x[2] for x in config.dim_embeddings)
        self.rec_to_output = MLP(activations=[Rectifier() for _ in config.dim_hidden] + [Identity()], 
                                 dims=[rto_in] + config.dim_hidden + [output_dim])

        self.sequences = ['latitude', 'latitude_mask', 'longitude']
        self.inputs = self.sequences + self.context_embedder.inputs

        self.children = [ self.context_embedder, self.fwd_fork, self.bkwd_fork,
                          self.rec, self.rec_to_output ]
    def __init__(self, config, output_dim, activation, **kwargs):
        super(RecurrentEncoder, self).__init__(**kwargs)

        self.config = config
        self.context_embedder = ContextEmbedder(config)

        self.rec = SegregatedBidirectional(LSTM(dim=config.rec_state_dim, name='encoder_recurrent'))

        self.fwd_fork = Fork([name for name in self.rec.prototype.apply.sequences if name!='mask'],
                             prototype=Linear(), name='fwd_fork')
        self.bkwd_fork = Fork([name for name in self.rec.prototype.apply.sequences if name!='mask'],
                              prototype=Linear(), name='bkwd_fork')

        rto_in = config.rec_state_dim * 2 + sum(x[2] for x in config.dim_embeddings)
        self.rec_to_output = MLP(
                    activations=[Rectifier() for _ in config.dim_hidden] + [activation],
                    dims=[rto_in] + config.dim_hidden + [output_dim],
                    name='encoder_rto')

        self.children = [self.context_embedder, self.rec, self.fwd_fork, self.bkwd_fork, self.rec_to_output]

        self.rec_inputs = ['latitude', 'longitude', 'latitude_mask']
        self.inputs = self.context_embedder.inputs + self.rec_inputs
class Feedback(Initializable):
    """Feedback.

    Attributes
    ----------
    output_names : list
    output_dims : dict

    """
    @lazy(allocation=['output_names', 'output_dims'])
    def __init__(self, output_names, output_dims,
                 embedding=None, input_dim=0,
                 **kwargs):
        super(Feedback, self).__init__(**kwargs)

        self.output_names = output_names
        self.output_dims = output_dims
        self.input_dim = input_dim

        self.embedding = embedding
        self.fork = Fork(self.output_names)

        self.apply.inputs = ['input']
        self.apply.outputs = output_names

        self.children = [self.embedding, self.fork]
        self.children = [child for child in self.children if child]

    def _push_allocation_config(self):
        if self.fork:
            self.fork.output_dims = self.output_dims
        else:
            self.embedding.output_dim, = self.output_dims
        if self.embedding:
            self.embedding.input_dim = self.input_dim
            self.fork.input_dim = self.embedding.output_dim
        else:
            self.fork.input_dim = self.input_dim

    @application
    def apply(self, symbols):
        embedded_symbols = symbols
        if self.embedding:
            embedded_symbols = self.embedding.apply(symbols)
        if self.fork:
            return self.fork.apply(embedded_symbols)
        return embedded_symbols
    def __init__(self, output_names, output_dims,
                 embedding=None, input_dim=0,
                 **kwargs):
        super(Feedback, self).__init__(**kwargs)

        self.output_names = output_names
        self.output_dims = output_dims
        self.input_dim = input_dim

        self.embedding = embedding
        self.fork = Fork(self.output_names)

        self.apply.inputs = ['input']
        self.apply.outputs = output_names

        self.children = [self.embedding, self.fork]
        self.children = [child for child in self.children if child]
Beispiel #54
0
class Encoder(Initializable):
    def __init__(self, vocab_size, embedding_dim, state_dim, reverse=True,
                 **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.reverse = reverse

        self.lookup = LookupTable(name='embeddings')
        self.transition = GatedRecurrent(Tanh(), name='encoder_transition')
        self.fork = Fork([name for name in self.transition.apply.sequences
                          if name != 'mask'], prototype=Linear())

        self.children = [self.lookup, self.transition, self.fork]

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim
        self.transition.dim = self.state_dim
        self.fork.input_dim = self.embedding_dim
        self.fork.output_dims = [self.state_dim
                                 for _ in self.fork.output_names]

    @application(inputs=['source_sentence', 'source_sentence_mask'],
                 outputs=['representation'])
    def apply(self, source_sentence, source_sentence_mask):
        # Time as first dimension
        source_sentence = source_sentence.dimshuffle(1, 0)
        source_sentence_mask = source_sentence_mask.T
        if self.reverse:
            source_sentence = source_sentence[::-1]
            source_sentence_mask = source_sentence_mask[::-1]

        embeddings = self.lookup.apply(source_sentence)
        representation = self.transition.apply(**merge(
            self.fork.apply(embeddings, as_dict=True),
            {'mask': source_sentence_mask}
        ))
        return representation[-1]
Beispiel #55
0
    def __init__(self, vocab_size, embedding_dim, state_dim,
                 representation_dim, **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.representation_dim = representation_dim

        readout = Readout(
            source_names=['states', 'feedback', 'readout_context'],
            readout_dim=self.vocab_size,
            emitter=SoftmaxEmitter(),
            feedback_brick=LookupFeedback(vocab_size, embedding_dim),
            post_merge=InitializableFeedforwardSequence(
                [Bias(dim=1000).apply,
                 Maxout(num_pieces=2).apply,
                 Linear(input_dim=state_dim / 2, output_dim=100,
                        use_bias=False).apply,
                 Linear(input_dim=100).apply]),
            merged_dim=1000)

        self.transition = GatedRecurrentWithContext(Tanh(), dim=state_dim,
                                                    name='decoder')
        # Readout will apply the linear transformation to 'readout_context'
        # with a Merge brick, so no need to fork it here
        self.fork = Fork([name for name in
                          self.transition.apply.contexts +
                          self.transition.apply.states
                          if name != 'readout_context'], prototype=Linear())
        self.tanh = Tanh()

        self.sequence_generator = SequenceGenerator(
            readout=readout, transition=self.transition,
            fork_inputs=[name for name in self.transition.apply.sequences
                         if name != 'mask'],
        )

        self.children = [self.fork, self.sequence_generator, self.tanh]
Beispiel #56
0
class Encoder(Initializable):
    """Encoder of RNNsearch model."""

    def __init__(self, blockid, vocab_size, embedding_dim, state_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.blockid = blockid

        self.lookup = LookupTable(name='embeddings' + '_' + self.blockid)
        self.gru = GatedRecurrent(activation=Tanh(), dim=state_dim, name = "GatedRNN" + self.blockid)
        self.fwd_fork = Fork(
            [name for name in self.gru.apply.sequences
             if name != 'mask'], prototype=Linear(), name='fwd_fork' + '_' + self.blockid)

        self.children = [self.lookup, self.gru, self.fwd_fork]

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim

        self.fwd_fork.input_dim = self.embedding_dim
        self.fwd_fork.output_dims = [self.gru.get_dim(name)
                                     for name in self.fwd_fork.output_names]

    @application(inputs=['source_sentence', 'source_sentence_mask'],
                 outputs=['representation'])
    def apply(self, source_sentence, source_sentence_mask):
        # Time as first dimension
        source_sentence = source_sentence.T
        source_sentence_mask = source_sentence_mask.T

        embeddings = self.lookup.apply(source_sentence)
        grupara =  merge( self.fwd_fork.apply(embeddings, as_dict=True) , {'mask': source_sentence_mask})
        representation = self.gru.apply(**grupara)
        return representation
def get_prernn(args):

    # time x batch
    x_mask = tensor.fmatrix('mask')

    # Compute the state dim
    if args.rnn_type == 'lstm':
        state_dim = 4 * args.state_dim
    else:
        state_dim = args.state_dim

    # Prepare the arguments for the fork
    output_names = []
    output_dims = []
    for d in range(args.layers):
        if d > 0:
            suffix = RECURRENTSTACK_SEPARATOR + str(d)
        else:
            suffix = ''
        if d == 0 or args.skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    # Prepare the brick to be forked (LookupTable or Linear)
    # Check if the dataset provides indices (in the case of a
    # fixed vocabulary, x is 2D tensor) or if it gives raw values
    # (x is 3D tensor)
    if has_indices(args.dataset):
        features = args.mini_batch_size
        x = tensor.lmatrix('features')
        vocab_size = get_output_size(args.dataset)
        lookup = LookupTable(length=vocab_size, dim=state_dim)
        lookup.weights_init = initialization.IsotropicGaussian(0.1)
        lookup.biases_init = initialization.Constant(0)
        forked = FeedforwardSequence([lookup.apply])
        if not has_mask(args.dataset):
            x_mask = tensor.ones_like(x, dtype=floatX)

    else:
        x = tensor.tensor3('features', dtype=floatX)
        if args.used_inputs is not None:
            x = tensor.set_subtensor(x[args.used_inputs:, :, :],
                                     tensor.zeros_like(x[args.used_inputs:,
                                                         :, :],
                                                       dtype=floatX))
        features = get_output_size(args.dataset)
        forked = Linear(input_dim=features, output_dim=state_dim)
        forked.weights_init = initialization.IsotropicGaussian(0.1)
        forked.biases_init = initialization.Constant(0)

        if not has_mask(args.dataset):
            x_mask = tensor.ones_like(x[:, :, 0], dtype=floatX)

    # Define the fork
    fork = Fork(output_names=output_names, input_dim=features,
                output_dims=output_dims,
                prototype=forked)
    fork.initialize()

    # Apply the fork
    prernn = fork.apply(x)

    # Give a name to the input of each layer
    if args.skip_connections:
        for t in range(len(prernn)):
            prernn[t].name = "pre_rnn_" + str(t)
    else:
        prernn.name = "pre_rnn"

    return prernn, x_mask
def build_model_vanilla(vocab_size, args, dtype=floatX):
    logger.info('Building model ...')

    # Parameters for the model
    context = args.context
    state_dim = args.state_dim
    layers = args.layers
    skip_connections = args.skip_connections

    # Symbolic variables
    # In both cases: Time X Batch
    x = tensor.lmatrix('features')
    y = tensor.lmatrix('targets')

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    lookup = LookupTable(length=vocab_size, dim=state_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names, input_dim=args.mini_batch_size,
                output_dims=output_dims,
                prototype=FeedforwardSequence(
                    [lookup.apply]))

    transitions = [SimpleRecurrent(dim=state_dim, activation=Tanh())
                   for _ in range(layers)]

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # If skip_connections: dim = layers * state_dim
    # else: dim = state_dim
    output_layer = Linear(
        input_dim=skip_connections * layers *
        state_dim + (1 - skip_connections) * state_dim,
        output_dim=vocab_size, name="output_layer")

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give a name to the input of each layer
    if skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t].name = "pre_rnn_" + str(t)
    else:
        pre_rnn.name = "pre_rnn"

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    init_states = {}
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if skip_connections:
            kwargs['inputs' + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs['inputs'] = pre_rnn
        init_states[d] = theano.shared(
            numpy.zeros((args.mini_batch_size, state_dim)).astype(floatX),
            name='state0_%d' % d)
        kwargs['states' + suffix] = init_states[d]

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    # We have
    # h = [state, state_1, state_2 ...] if layers > 1
    # h = state if layers == 1

    # If we have skip connections, concatenate all the states
    # Else only consider the state of the highest layer
    last_states = {}
    if layers > 1:
        # Save all the last states
        for d in range(layers):
            last_states[d] = h[d][-1, :, :]
        if skip_connections:
            h = tensor.concatenate(h, axis=2)
        else:
            h = h[-1]
    else:
        last_states[0] = h[-1, :, :]
    h.name = "hidden_state"

    # The updates of the hidden states
    updates = []
    for d in range(layers):
        updates.append((init_states[d], last_states[d]))

    presoft = output_layer.apply(h[context:, :, :])
    # Define the cost
    # Compute the probability distribution
    time, batch, feat = presoft.shape
    presoft.name = 'presoft'

    cross_entropy = Softmax().categorical_cross_entropy(
        y[context:, :].flatten(),
        presoft.reshape((batch * time, feat)))
    cross_entropy = cross_entropy / tensor.log(2)
    cross_entropy.name = "cross_entropy"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = cross_entropy + tensor.log(1)
    cost.name = "regularized_cost"

    # Initialize the model
    logger.info('Initializing...')

    fork.initialize()

    rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()

    return cost, cross_entropy, updates