class TestBidirectionalStack(unittest.TestCase):
    def setUp(self):
        prototype = SimpleRecurrent(dim=3, activation=Tanh())
        self.layers = [
            Bidirectional(weights_init=Orthogonal(), prototype=prototype)
            for _ in range(3)
        ]
        self.stack = RecurrentStack(self.layers)
        for fork in self.stack.forks:
            fork.weights_init = Identity(1)
            fork.biases_init = Constant(0)
        self.stack.initialize()

        self.x_val = 0.1 * numpy.asarray(
            list(itertools.permutations(range(4))), dtype=theano.config.floatX)
        self.x_val = (numpy.ones(
            (24, 4, 3), dtype=theano.config.floatX) * self.x_val[..., None])
        self.mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        self.mask_val[12:24, 3] = 0

    def test_steps(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')

        calc_stack_layers = [
            theano.function([x, mask],
                            self.stack.apply(x, mask=mask)[i])
            for i in range(len(self.layers))
        ]
        stack_layers = [
            f(self.x_val, self.mask_val) for f in calc_stack_layers
        ]

        h_val = self.x_val
        for stack_layer_value, bidir_net in zip(stack_layers, self.layers):
            calc = theano.function([x, mask], bidir_net.apply(x, mask=mask))
            simple_layer_value = calc(h_val, self.mask_val)
            assert_allclose(stack_layer_value, simple_layer_value, rtol=1e-04)
            h_val = simple_layer_value[..., :3]

    def test_dims(self):
        self.assertEqual(self.stack.get_dim("inputs"), 3)
        for i in range(len(self.layers)):
            state_name = self.stack.suffix("states", i)
            self.assertEqual(self.stack.get_dim(state_name), 6)
Exemple #2
0
class TestBidirectionalStack(unittest.TestCase):
    def setUp(self):
        prototype = SimpleRecurrent(dim=3, activation=Tanh())
        self.layers = [
            Bidirectional(weights_init=Orthogonal(), prototype=prototype)
            for _ in range(3)]
        self.stack = RecurrentStack(self.layers)
        for fork in self.stack.forks:
            fork.weights_init = Identity(1)
            fork.biases_init = Constant(0)
        self.stack.initialize()

        self.x_val = 0.1 * numpy.asarray(
            list(itertools.permutations(range(4))),
            dtype=theano.config.floatX)
        self.x_val = (numpy.ones((24, 4, 3), dtype=theano.config.floatX) *
                      self.x_val[..., None])
        self.mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        self.mask_val[12:24, 3] = 0

    def test_steps(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')

        calc_stack_layers = [
            theano.function([x, mask], self.stack.apply(x, mask=mask)[i])
            for i in range(len(self.layers))]
        stack_layers = [
            f(self.x_val, self.mask_val) for f in calc_stack_layers]

        h_val = self.x_val
        for stack_layer_value, bidir_net in zip(stack_layers, self.layers):
            calc = theano.function([x, mask], bidir_net.apply(x, mask=mask))
            simple_layer_value = calc(h_val, self.mask_val)
            assert_allclose(stack_layer_value, simple_layer_value, rtol=1e-04)
            h_val = simple_layer_value[..., :3]

    def test_dims(self):
        self.assertEqual(self.stack.get_dim("inputs"), 3)
        for i in range(len(self.layers)):
            state_name = self.stack.suffix("states", i)
            self.assertEqual(self.stack.get_dim(state_name), 6)
Exemple #3
0
class Decimator(Initializable):
    """Source word encoder, mapping a charater-level word to a vector.
        This encoder is able to learn the morphology.
        For compatibility with previous version, we call it Decimator.
    """
    def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth,
                 **kwargs):
        super(Decimator, self).__init__(**kwargs)

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.dgru_state_dim = dgru_state_dim
        self.embedding_dim = embedding_dim
        self.lookup = LookupTable(name='embeddings')
        self.dgru_depth = dgru_depth
        # representation
        self.dgru = RecurrentStack([
            DGRU(activation=Tanh(), dim=self.dgru_state_dim)
            for _ in range(dgru_depth)
        ],
                                   skip_connections=True)
        # importance of this representation
        self.bidir_w = Bidirectional(RecurrentWithFork(
            DGRU(activation=Tanh(), dim=self.dgru_state_dim // 2),
            self.embedding_dim,
            name='src_word_with_fork'),
                                     name='bidir_src_word_encoder')

        self.gru_fork = Fork(
            [name for name in self.dgru.apply.sequences if name != 'mask'],
            prototype=Linear(),
            name='gru_fork')
        # map to a energy scalar
        self.wl = Linear(input_dim=dgru_state_dim, output_dim=1)

        self.children = [
            self.lookup, self.dgru, self.gru_fork, self.bidir_w, self.wl
        ]

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim

        self.gru_fork.input_dim = self.embedding_dim
        self.gru_fork.output_dims = [
            self.dgru.get_dim(name) for name in self.gru_fork.output_names
        ]

    @application(inputs=['char_seq', 'sample_matrix', 'char_aux'],
                 outputs=['representation', 'weight'])
    def apply(self, char_seq, sample_matrix, char_aux):
        # Time as first dimension
        embeddings = self.lookup.apply(char_seq)
        gru_out = self.dgru.apply(**merge(
            self.gru_fork.apply(embeddings, as_dict=True), {'mask': char_aux}))
        wgru_out = tensor.exp(
            self.wl.apply(self.bidir_w.apply(embeddings, char_aux)))

        if self.dgru_depth > 1:
            gru_out = gru_out[-1]

        gru_out = tensor.addbroadcast(wgru_out, 2) * gru_out
        sampled_representation = tensor.tanh(
            tensor.batched_dot(sample_matrix, gru_out.dimshuffle([1, 0, 2])))
        return sampled_representation.dimshuffle([1, 0, 2]), wgru_out

    def get_dim(self, name):
        if name == 'output':
            return self.dgru_state_dim
        super(Decimator, self).get_dim(name)
Exemple #4
0
class Interpolator(AbstractReadout):
    """Readout char by char."""
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 igru_state_dim,
                 igru_depth,
                 trg_dgru_depth,
                 emitter,
                 feedback_brick,
                 merge=None,
                 merge_prototype=None,
                 post_merge=None,
                 **kwargs):
        merged_dim = igru_state_dim
        if not merge:
            merge = Merge(input_names=kwargs['source_names'],
                          prototype=merge_prototype)
        if not post_merge:
            post_merge = Bias(dim=merged_dim)

        # for compatible
        if igru_depth == 1:
            self.igru = IGRU(dim=igru_state_dim)
        else:
            self.igru = RecurrentStack(
                [IGRU(dim=igru_state_dim, name='igru')] + [
                    UpperIGRU(dim=igru_state_dim,
                              activation=Tanh(),
                              name='upper_igru' + str(i))
                    for i in range(1, igru_depth)
                ],
                skip_connections=True)
        self.embedding_dim = embedding_dim
        self.emitter = emitter
        self.feedback_brick = feedback_brick
        self.merge = merge
        self.post_merge = post_merge
        self.merged_dim = merged_dim
        self.igru_depth = igru_depth
        self.trg_dgru_depth = trg_dgru_depth
        self.lookup = LookupTable(name='embeddings')
        self.vocab_size = vocab_size
        self.igru_state_dim = igru_state_dim
        self.gru_to_softmax = Linear(input_dim=igru_state_dim,
                                     output_dim=vocab_size)
        self.gru_fork = Fork([
            name for name in self.igru.apply.sequences
            if name != 'mask' and name != 'input_states'
        ],
                             prototype=Linear(),
                             name='gru_fork')

        children = [
            self.emitter, self.feedback_brick, self.merge, self.post_merge,
            self.igru, self.lookup, self.gru_to_softmax, self.gru_fork
        ]
        kwargs.setdefault('children', []).extend(children)
        super(Interpolator, self).__init__(**kwargs)

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim
        self.emitter.readout_dim = self.get_dim('readouts')
        self.merge.input_names = self.source_names
        self.merge.input_dims = self.source_dims
        self.merge.output_dim = self.merged_dim
        self.post_merge.input_dim = self.merged_dim
        self.post_merge.output_dim = self.igru_state_dim
        self.gru_fork.input_dim = self.embedding_dim
        self.gru_fork.output_dims = [
            self.igru.get_dim(name) for name in self.gru_fork.output_names
        ]

    @application
    def initial_igru_outputs(self, batch_size):
        return self.igru.initial_states(batch_size)

    @application
    def emit(self, readouts):
        return self.emitter.emit(readouts)

    @application
    def cost(self, readouts, outputs):
        return self.emitter.cost(readouts, outputs)

    @application
    def initial_outputs(self, batch_size):
        return self.emitter.initial_outputs(batch_size)

    @application(outputs=['feedback'])
    def feedback(self, outputs):
        return self.feedback_brick.feedback(outputs)

    @application(outputs=['feedback'])
    def feedback_apply(self, target_char_seq, target_sample_matrix,
                       target_char_aux):
        return self.feedback_brick.apply(target_char_seq, target_sample_matrix,
                                         target_char_aux)

    @application
    def single_feedback(self,
                        target_single_char,
                        batch_size,
                        mask=None,
                        states=None):
        return self.feedback_brick.single_emit(target_single_char, batch_size,
                                               mask, states)

    @single_feedback.property('outputs')
    def single_feedback_outputs(self):
        return [
            'single_feedback' + RECURRENTSTACK_SEPARATOR + str(i)
            for i in range(self.trg_dgru_depth)
        ]

    @application(outputs=['gru_out', 'readout_chars'])
    def single_readout_gru(self, target_prev_char, target_prev_char_aux,
                           input_states, states):
        embeddings = self.lookup.apply(target_prev_char)
        states_dict = {'states': states[0]}
        if self.igru_depth > 1:
            for i in range(1, self.igru_depth):
                states_dict['states' + RECURRENTSTACK_SEPARATOR +
                            str(i)] = states[i]
        gru_out = self.igru.apply(**merge(
            self.gru_fork.apply(embeddings, as_dict=True), states_dict, {
                'mask': target_prev_char_aux,
                'input_states': input_states,
                'iterate': False
            }))
        if self.igru_depth > 1:
            readout_chars = self.gru_to_softmax.apply(gru_out[-1])
        else:
            readout_chars = self.gru_to_softmax.apply(gru_out)
        return gru_out, readout_chars

    @application
    def readout(self, **kwargs):
        merged = self.merge.apply(
            **{name: kwargs[name]
               for name in self.merge.input_names})
        merged = self.post_merge.apply(merged)
        return merged

    @application(outputs=['readout_chars'])
    def readout_gru(self, target_prev_char_seq, target_prev_char_aux,
                    input_states):
        embeddings = self.lookup.apply(target_prev_char_seq)
        gru_out = self.igru.apply(
            **merge(self.gru_fork.apply(embeddings, as_dict=True), {
                'mask': target_prev_char_aux,
                'input_states': input_states
            }))
        if self.igru_depth > 1:
            gru_out = gru_out[-1]
        readout_chars = self.gru_to_softmax.apply(gru_out)
        return readout_chars

    def get_dim(self, name):
        if name == 'outputs':
            return self.emitter.get_dim(name)
        elif name == 'feedback':
            return self.feedback_brick.get_dim(name)
        elif name == 'readouts':
            return self.readout_dim
        return super(AbstractReadout, self).get_dim(name)
Exemple #5
0
class TargetWordEncoder(Initializable):
    """Word encoder in target side use a single RNN to map a charater-level word to a vector"""
    def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth,
                 **kwargs):
        super(TargetWordEncoder, self).__init__(**kwargs)

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.dgru_state_dim = dgru_state_dim
        self.embedding_dim = embedding_dim
        self.lookup = LookupTable(name='embeddings')
        self.dgru_depth = dgru_depth
        self.dgru = RecurrentStack([
            DGRU(activation=Tanh(), dim=self.dgru_state_dim)
            for _ in range(dgru_depth)
        ],
                                   skip_connections=True)

        self.gru_fork = Fork(
            [name for name in self.dgru.apply.sequences if name != 'mask'],
            prototype=Linear(),
            name='gru_fork')

        self.children = [self.lookup, self.dgru, self.gru_fork]

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim

        self.gru_fork.input_dim = self.embedding_dim
        self.gru_fork.output_dims = [
            self.dgru.get_dim(name) for name in self.gru_fork.output_names
        ]

    @application(inputs=['char_seq', 'sample_matrix', 'char_aux'],
                 outputs=['representation'])
    def apply(self, char_seq, sample_matrix, char_aux):
        # Time as first dimension
        embeddings = self.lookup.apply(char_seq)
        gru_out = self.dgru.apply(**merge(
            self.gru_fork.apply(embeddings, as_dict=True), {'mask': char_aux}))
        if self.dgru_depth > 1:
            gru_out = gru_out[-1]
        sampled_representation = tensor.batched_dot(
            sample_matrix, gru_out.dimshuffle([1, 0, 2]))
        return sampled_representation.dimshuffle([1, 0, 2])

    @application(inputs=['target_single_char'])
    def single_emit(self, target_single_char, batch_size, mask, states=None):
        # Time as first dimension
        # only one batch
        embeddings = self.lookup.apply(target_single_char)
        if states is None:
            states = self.dgru.initial_states(batch_size)
        states_dict = {'states': states[0]}
        for i in range(1, self.dgru_depth):
            states_dict['states' + RECURRENTSTACK_SEPARATOR +
                        str(i)] = states[i]
        gru_out = self.dgru.apply(**merge(
            self.gru_fork.apply(embeddings, as_dict=True), states_dict, {
                'mask': mask,
                'iterate': False
            }))
        return gru_out

    @single_emit.property('outputs')
    def single_emit_outputs(self):
        return [
            'gru_out' + RECURRENTSTACK_SEPARATOR + str(i)
            for i in range(self.dgru_depth)
        ]

    def get_dim(self, name):
        if name in ['output', 'feedback']:
            return self.dgru_state_dim
        super(TargetWordEncoder, self).get_dim(name)
Exemple #6
0
class Decimator(Initializable):
    """Char encoder, mapping a char-level word to a vector"""
    def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_layers,
                 **kwargs):
        super(Decimator, self).__init__(**kwargs)

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.dgru_state_dim = dgru_state_dim
        self.embedding_dim = embedding_dim
        self.lookup = LookupTable(name='embeddings')
        self.dgru_layers = dgru_layers
        self.dgru = RecurrentStack([
            DGRU(activation=Tanh(), dim=self.dgru_state_dim)
            for _ in range(dgru_layers)
        ],
                                   skip_connections=True)

        self.gru_fork = Fork(
            [name for name in self.dgru.apply.sequences if name != 'mask'],
            prototype=Linear(),
            name='gru_fork')

        self.children = [self.lookup, self.dgru, self.gru_fork]

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim

        self.gru_fork.input_dim = self.embedding_dim
        self.gru_fork.output_dims = [
            self.dgru.get_dim(name) for name in self.gru_fork.output_names
        ]

    @application(inputs=['char_seq', 'sample_matrix', 'char_aux'],
                 outputs=['representation'])
    def apply(self, char_seq, sample_matrix, char_aux):
        # Time as first dimension
        embeddings = self.lookup.apply(char_seq)
        gru_out = self.dgru.apply(**merge(
            self.gru_fork.apply(embeddings, as_dict=True), {'mask': char_aux}))
        if self.dgru_layers > 1:
            gru_out = gru_out[-1]
        sampled_representation = tensor.batched_dot(
            sample_matrix, gru_out.dimshuffle([1, 0, 2]))
        return sampled_representation.dimshuffle([1, 0, 2])

    @application(inputs=['target_single_char'], outputs=['gru_out'])
    def single_emit(self, target_single_char, batch_size, mask, states=None):
        # Time as first dimension
        # only one batch
        embeddings = self.lookup.apply(target_single_char)
        if states is None:
            states = self.dgru.initial_states(batch_size)
        gru_out = self.dgru.apply(
            **merge(self.gru_fork.apply(embeddings, as_dict=True), {
                'states': states,
                'mask': mask,
                'iterate': False
            }))
        return gru_out

    def get_dim(self, name):
        if name in ['output', 'feedback']:
            return self.dgru_state_dim
        super(Decimator, self).get_dim(name)