class TargetWordEncoder(Initializable): """Word encoder in target side use a single RNN to map a charater-level word to a vector""" def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth, **kwargs): super(TargetWordEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.dgru_state_dim = dgru_state_dim self.embedding_dim = embedding_dim self.lookup = LookupTable(name='embeddings') self.dgru_depth = dgru_depth self.dgru = RecurrentStack([ DGRU(activation=Tanh(), dim=self.dgru_state_dim) for _ in range(dgru_depth) ], skip_connections=True) self.gru_fork = Fork( [name for name in self.dgru.apply.sequences if name != 'mask'], prototype=Linear(), name='gru_fork') self.children = [self.lookup, self.dgru, self.gru_fork] def _push_allocation_config(self): self.lookup.length = self.vocab_size self.lookup.dim = self.embedding_dim self.gru_fork.input_dim = self.embedding_dim self.gru_fork.output_dims = [ self.dgru.get_dim(name) for name in self.gru_fork.output_names ] @application(inputs=['char_seq', 'sample_matrix', 'char_aux'], outputs=['representation']) def apply(self, char_seq, sample_matrix, char_aux): # Time as first dimension embeddings = self.lookup.apply(char_seq) gru_out = self.dgru.apply(**merge( self.gru_fork.apply(embeddings, as_dict=True), {'mask': char_aux})) if self.dgru_depth > 1: gru_out = gru_out[-1] sampled_representation = tensor.batched_dot( sample_matrix, gru_out.dimshuffle([1, 0, 2])) return sampled_representation.dimshuffle([1, 0, 2]) @application(inputs=['target_single_char']) def single_emit(self, target_single_char, batch_size, mask, states=None): # Time as first dimension # only one batch embeddings = self.lookup.apply(target_single_char) if states is None: states = self.dgru.initial_states(batch_size) states_dict = {'states': states[0]} for i in range(1, self.dgru_depth): states_dict['states' + RECURRENTSTACK_SEPARATOR + str(i)] = states[i] gru_out = self.dgru.apply(**merge( self.gru_fork.apply(embeddings, as_dict=True), states_dict, { 'mask': mask, 'iterate': False })) return gru_out @single_emit.property('outputs') def single_emit_outputs(self): return [ 'gru_out' + RECURRENTSTACK_SEPARATOR + str(i) for i in range(self.dgru_depth) ] def get_dim(self, name): if name in ['output', 'feedback']: return self.dgru_state_dim super(TargetWordEncoder, self).get_dim(name)
class Interpolator(AbstractReadout): """Readout char by char.""" def __init__(self, vocab_size, embedding_dim, igru_state_dim, igru_depth, trg_dgru_depth, emitter, feedback_brick, merge=None, merge_prototype=None, post_merge=None, **kwargs): merged_dim = igru_state_dim if not merge: merge = Merge(input_names=kwargs['source_names'], prototype=merge_prototype) if not post_merge: post_merge = Bias(dim=merged_dim) # for compatible if igru_depth == 1: self.igru = IGRU(dim=igru_state_dim) else: self.igru = RecurrentStack( [IGRU(dim=igru_state_dim, name='igru')] + [ UpperIGRU(dim=igru_state_dim, activation=Tanh(), name='upper_igru' + str(i)) for i in range(1, igru_depth) ], skip_connections=True) self.embedding_dim = embedding_dim self.emitter = emitter self.feedback_brick = feedback_brick self.merge = merge self.post_merge = post_merge self.merged_dim = merged_dim self.igru_depth = igru_depth self.trg_dgru_depth = trg_dgru_depth self.lookup = LookupTable(name='embeddings') self.vocab_size = vocab_size self.igru_state_dim = igru_state_dim self.gru_to_softmax = Linear(input_dim=igru_state_dim, output_dim=vocab_size) self.gru_fork = Fork([ name for name in self.igru.apply.sequences if name != 'mask' and name != 'input_states' ], prototype=Linear(), name='gru_fork') children = [ self.emitter, self.feedback_brick, self.merge, self.post_merge, self.igru, self.lookup, self.gru_to_softmax, self.gru_fork ] kwargs.setdefault('children', []).extend(children) super(Interpolator, self).__init__(**kwargs) def _push_allocation_config(self): self.lookup.length = self.vocab_size self.lookup.dim = self.embedding_dim self.emitter.readout_dim = self.get_dim('readouts') self.merge.input_names = self.source_names self.merge.input_dims = self.source_dims self.merge.output_dim = self.merged_dim self.post_merge.input_dim = self.merged_dim self.post_merge.output_dim = self.igru_state_dim self.gru_fork.input_dim = self.embedding_dim self.gru_fork.output_dims = [ self.igru.get_dim(name) for name in self.gru_fork.output_names ] @application def initial_igru_outputs(self, batch_size): return self.igru.initial_states(batch_size) @application def emit(self, readouts): return self.emitter.emit(readouts) @application def cost(self, readouts, outputs): return self.emitter.cost(readouts, outputs) @application def initial_outputs(self, batch_size): return self.emitter.initial_outputs(batch_size) @application(outputs=['feedback']) def feedback(self, outputs): return self.feedback_brick.feedback(outputs) @application(outputs=['feedback']) def feedback_apply(self, target_char_seq, target_sample_matrix, target_char_aux): return self.feedback_brick.apply(target_char_seq, target_sample_matrix, target_char_aux) @application def single_feedback(self, target_single_char, batch_size, mask=None, states=None): return self.feedback_brick.single_emit(target_single_char, batch_size, mask, states) @single_feedback.property('outputs') def single_feedback_outputs(self): return [ 'single_feedback' + RECURRENTSTACK_SEPARATOR + str(i) for i in range(self.trg_dgru_depth) ] @application(outputs=['gru_out', 'readout_chars']) def single_readout_gru(self, target_prev_char, target_prev_char_aux, input_states, states): embeddings = self.lookup.apply(target_prev_char) states_dict = {'states': states[0]} if self.igru_depth > 1: for i in range(1, self.igru_depth): states_dict['states' + RECURRENTSTACK_SEPARATOR + str(i)] = states[i] gru_out = self.igru.apply(**merge( self.gru_fork.apply(embeddings, as_dict=True), states_dict, { 'mask': target_prev_char_aux, 'input_states': input_states, 'iterate': False })) if self.igru_depth > 1: readout_chars = self.gru_to_softmax.apply(gru_out[-1]) else: readout_chars = self.gru_to_softmax.apply(gru_out) return gru_out, readout_chars @application def readout(self, **kwargs): merged = self.merge.apply( **{name: kwargs[name] for name in self.merge.input_names}) merged = self.post_merge.apply(merged) return merged @application(outputs=['readout_chars']) def readout_gru(self, target_prev_char_seq, target_prev_char_aux, input_states): embeddings = self.lookup.apply(target_prev_char_seq) gru_out = self.igru.apply( **merge(self.gru_fork.apply(embeddings, as_dict=True), { 'mask': target_prev_char_aux, 'input_states': input_states })) if self.igru_depth > 1: gru_out = gru_out[-1] readout_chars = self.gru_to_softmax.apply(gru_out) return readout_chars def get_dim(self, name): if name == 'outputs': return self.emitter.get_dim(name) elif name == 'feedback': return self.feedback_brick.get_dim(name) elif name == 'readouts': return self.readout_dim return super(AbstractReadout, self).get_dim(name)
class Decimator(Initializable): """Char encoder, mapping a char-level word to a vector""" def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_layers, **kwargs): super(Decimator, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.dgru_state_dim = dgru_state_dim self.embedding_dim = embedding_dim self.lookup = LookupTable(name='embeddings') self.dgru_layers = dgru_layers self.dgru = RecurrentStack([ DGRU(activation=Tanh(), dim=self.dgru_state_dim) for _ in range(dgru_layers) ], skip_connections=True) self.gru_fork = Fork( [name for name in self.dgru.apply.sequences if name != 'mask'], prototype=Linear(), name='gru_fork') self.children = [self.lookup, self.dgru, self.gru_fork] def _push_allocation_config(self): self.lookup.length = self.vocab_size self.lookup.dim = self.embedding_dim self.gru_fork.input_dim = self.embedding_dim self.gru_fork.output_dims = [ self.dgru.get_dim(name) for name in self.gru_fork.output_names ] @application(inputs=['char_seq', 'sample_matrix', 'char_aux'], outputs=['representation']) def apply(self, char_seq, sample_matrix, char_aux): # Time as first dimension embeddings = self.lookup.apply(char_seq) gru_out = self.dgru.apply(**merge( self.gru_fork.apply(embeddings, as_dict=True), {'mask': char_aux})) if self.dgru_layers > 1: gru_out = gru_out[-1] sampled_representation = tensor.batched_dot( sample_matrix, gru_out.dimshuffle([1, 0, 2])) return sampled_representation.dimshuffle([1, 0, 2]) @application(inputs=['target_single_char'], outputs=['gru_out']) def single_emit(self, target_single_char, batch_size, mask, states=None): # Time as first dimension # only one batch embeddings = self.lookup.apply(target_single_char) if states is None: states = self.dgru.initial_states(batch_size) gru_out = self.dgru.apply( **merge(self.gru_fork.apply(embeddings, as_dict=True), { 'states': states, 'mask': mask, 'iterate': False })) return gru_out def get_dim(self, name): if name in ['output', 'feedback']: return self.dgru_state_dim super(Decimator, self).get_dim(name)