Ejemplo n.º 1
0
    def __init__(self, blockid, vocab_size, embedding_dim, state_dim,
                 representation_dim, theano_seed=None, **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.representation_dim = representation_dim
        self.theano_seed = theano_seed
        self.blockid = blockid

        # Initialize gru with special initial state
        self.transition = GRUInitialState(
            attended_dim=state_dim, dim=state_dim,
            activation=Tanh(), name='decoder' + '_' + self.blockid)

        # Initialize the attention mechanism
        
        self.attention = SequenceContentAttention(
            state_names=self.transition.apply.states,
            attended_dim=representation_dim,
            match_dim=state_dim, name="attention" + '_' + self.blockid)

        # Initialize the readout, note that SoftmaxEmitter emits -1 for
        # initial outputs which is used by LookupFeedBackWMT15
        readout = Readout(
            source_names=['states', 'feedback',
                          self.attention.take_glimpses.outputs[0]],
            readout_dim=self.vocab_size,
            emitter=SoftmaxEmitter(initial_output=-1, theano_seed=theano_seed, name = 'emitter' +  '_' + self.blockid ),
            feedback_brick=LookupFeedbackWMT15(vocab_size, embedding_dim, name = 'lookup' + '_' + self.blockid),
            post_merge=InitializableFeedforwardSequence(
                [Bias(dim=state_dim, name='maxout_bias').apply,
                 Maxout(num_pieces=2, name='maxout').apply,
                 Linear(input_dim=state_dim / 2, output_dim=embedding_dim,
                        use_bias=False, name='softmax0').apply,
                 Linear(input_dim=embedding_dim, name='softmax1').apply], name = 'post_merge' +  '_' + self.blockid),
            merged_dim=state_dim,
            hstates_dim = state_dim,
            name = 'readout' + '_' + self.blockid)

        # Build sequence generator accordingly
        self.sequence_generator = SequenceGenerator(
            readout=readout,
            transition=self.transition,
            attention=self.attention,
            fork=Fork([name for name in self.transition.apply.sequences
                       if name != 'mask'], prototype=Linear(), name = 'fork' + '_' + self.blockid),
            name = 'sequence_generator' + '_' + self.blockid
        )

        self.children = [self.sequence_generator]
Ejemplo n.º 2
0
class Decoder(Initializable):
    """Decoder of RNNsearch model."""

    def __init__(self, blockid, vocab_size, embedding_dim, state_dim,
                 representation_dim, theano_seed=None, **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.representation_dim = representation_dim
        self.theano_seed = theano_seed
        self.blockid = blockid

        # Initialize gru with special initial state
        self.transition = GRUInitialState(
            attended_dim=state_dim, dim=state_dim,
            activation=Tanh(), name='decoder' + '_' + self.blockid)

        # Initialize the attention mechanism
        
        self.attention = SequenceContentAttention(
            state_names=self.transition.apply.states,
            attended_dim=representation_dim,
            match_dim=state_dim, name="attention" + '_' + self.blockid)

        # Initialize the readout, note that SoftmaxEmitter emits -1 for
        # initial outputs which is used by LookupFeedBackWMT15
        readout = Readout(
            source_names=['states', 'feedback',
                          self.attention.take_glimpses.outputs[0]],
            readout_dim=self.vocab_size,
            emitter=SoftmaxEmitter(initial_output=-1, theano_seed=theano_seed, name = 'emitter' +  '_' + self.blockid ),
            feedback_brick=LookupFeedbackWMT15(vocab_size, embedding_dim, name = 'lookup' + '_' + self.blockid),
            post_merge=InitializableFeedforwardSequence(
                [Bias(dim=state_dim, name='maxout_bias').apply,
                 Maxout(num_pieces=2, name='maxout').apply,
                 Linear(input_dim=state_dim / 2, output_dim=embedding_dim,
                        use_bias=False, name='softmax0').apply,
                 Linear(input_dim=embedding_dim, name='softmax1').apply], name = 'post_merge' +  '_' + self.blockid),
            merged_dim=state_dim,
            hstates_dim = state_dim,
            name = 'readout' + '_' + self.blockid)

        # Build sequence generator accordingly
        self.sequence_generator = SequenceGenerator(
            readout=readout,
            transition=self.transition,
            attention=self.attention,
            fork=Fork([name for name in self.transition.apply.sequences
                       if name != 'mask'], prototype=Linear(), name = 'fork' + '_' + self.blockid),
            name = 'sequence_generator' + '_' + self.blockid
        )

        self.children = [self.sequence_generator]

    @application(inputs=['representation', 'source_sentence_mask',
                         'target_sentence_mask', 'target_sentence'],
                 outputs=['cost', 'hstates'])
    def cost(self,  representation, source_sentence_mask,
             target_sentence, target_sentence_mask ):

        source_sentence_mask = source_sentence_mask.T
        target_sentence = target_sentence.T
        target_sentence_mask = target_sentence_mask.T

        # Get the cost matrix
        cost, hiddenstates = self.sequence_generator.cost_matrix(**{
            'mask': target_sentence_mask,
            'outputs': target_sentence,
            'attended': representation,
            'attended_mask': source_sentence_mask}
        )

        return (cost * target_sentence_mask).sum() / \
            target_sentence_mask.shape[1], hiddenstates

    @application
    def generate(self, source_sentence, representation, **kwargs):
        return self.sequence_generator.generate(
            n_steps=2 * source_sentence.shape[1],
            batch_size = source_sentence.shape[0],
            attended = representation,
            attended_mask=tensor.ones((source_sentence.shape[0], representation.shape[0])).T,
            **kwargs)