def __init__(self, embedding_dim, state_dim, use_local_attention=False, window_size=10, **kwargs): super(SentenceEncoder, self).__init__(**kwargs) self.embedding_dim = embedding_dim self.state_dim = state_dim self.rnn = GRU(activation=Tanh(), dim=state_dim, attended_dim=embedding_dim) self.input_fork = Fork( [name for name in self.rnn.apply.sequences if name != 'mask'], prototype=Linear(), name='input_fork') self.energy_computer = SumMatchFunction_posTag( name="wordAtt_energy_comp") self.attention = SequenceContentAttention_withExInput( state_names=['states'], state_dims=[state_dim], attended_dim=embedding_dim, match_dim=state_dim, posTag_dim=self.state_dim, energy_computer=self.energy_computer, use_local_attention=use_local_attention, window_size=window_size, name="word_attention") self.children = [self.rnn, self.input_fork, self.attention]
def __init__(self, networks, dims, **kwargs): super(DropMultiLayerEncoder, self).__init__(**kwargs) self.dims = dims self.networks = networks self.use_bias = True self.hid_linear_trans_forw = [ Fork([ name for name in networks[i].prototype.apply.sequences if name not in ['mask', 'drops_states', 'drops_cells', 'drops_igates'] ], name='fork_forw_{}'.format(i), prototype=Linear(), **kwargs) for i in range(len(networks)) ] self.hid_linear_trans_back = [ Fork([ name for name in networks[i].prototype.apply.sequences if name not in ['mask', 'drops_states', 'drops_cells', 'drops_igates'] ], name='fork_back_{}'.format(i), prototype=Linear(), **kwargs) for i in range(len(networks)) ] self.out_linear_trans = Linear(name='out_linear', **kwargs) self.children = (networks + self.hid_linear_trans_forw + self.hid_linear_trans_back + [self.out_linear_trans]) self.num_layers = len(networks)
def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.lookup = LookupTable(name='embeddings') self.bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork') self.children = [ self.lookup, self.bidir, self.fwd_fork, self.back_fork ]
def __init__(self, vocab_size, embedding_dim, igru_state_dim, emitter=None, feedback_brick=None, merge=None, merge_prototype=None, post_merge=None, merged_dim=None, igru=None, **kwargs): self.igru = igru self.lookup = LookupTable(name='embeddings') self.vocab_size = vocab_size self.igru_state_dim = igru_state_dim self.gru_to_softmax = Linear(input_dim=igru_state_dim, output_dim=vocab_size) self.embedding_dim = embedding_dim self.gru_fork = Fork([ name for name in self.igru.apply.sequences if name != 'mask' and name != 'input_states' ], prototype=Linear(), name='gru_fork') kwargs['children'] = [ self.igru, self.lookup, self.gru_to_softmax, self.gru_fork ] super(Interpolator, self).__init__(emitter=emitter, feedback_brick=feedback_brick, merge=merge, merge_prototype=merge_prototype, post_merge=post_merge, merged_dim=merged_dim, **kwargs)
def __init__(self, dimen, vocab_size): #{ # No idea what this is doing, but otherwise "allocated" is not set super(MorphGen, self).__init__(self) # The encoder encoder = Bidirectional(SimpleRecurrent(dim=dimen, activation=Tanh())) # What is this doing ? fork = Fork([name for name in encoder.prototype.apply.sequences if name != 'mask']) fork.input_dim = dimen fork.output_dims = [encoder.prototype.get_dim(name) for name in fork.input_names] lookup = LookupTable(vocab_size, dimen) transition = SimpleRecurrent(dim=dimen, activation=Tanh(), name="transition") atten = SequenceContentAttention(state_names=transition.apply.states,attended_dim=2*dimen, match_dim=dimen, name="attention") readout = Readout( readout_dim=vocab_size, source_names=[transition.apply.states[0], atten.take_glimpses.outputs[0]], emitter=SoftmaxEmitter(name="emitter"), feedback_brick=LookupFeedback(vocab_size, dimen), name="readout"); generator = SequenceGenerator(readout=readout, transition=transition, attention=atten,name="generator") self.lookup = lookup self.fork = fork self.encoder = encoder self.generator = generator self.children = [lookup, fork, encoder, generator]
def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth, **kwargs): super(Decimator, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.dgru_state_dim = dgru_state_dim self.embedding_dim = embedding_dim self.lookup = LookupTable(name='embeddings') self.dgru_depth = dgru_depth # representation self.dgru = RecurrentStack([ DGRU(activation=Tanh(), dim=self.dgru_state_dim) for _ in range(dgru_depth) ], skip_connections=True) # importance of this representation self.bidir_w = Bidirectional(RecurrentWithFork( DGRU(activation=Tanh(), dim=self.dgru_state_dim // 2), self.embedding_dim, name='src_word_with_fork'), name='bidir_src_word_encoder') self.gru_fork = Fork( [name for name in self.dgru.apply.sequences if name != 'mask'], prototype=Linear(), name='gru_fork') # map to a energy scalar self.wl = Linear(input_dim=dgru_state_dim, output_dim=1) self.children = [ self.lookup, self.dgru, self.gru_fork, self.bidir_w, self.wl ]
class BidirectionalEncoder(Initializable): """Encoder of RNNsearch model.""" def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.lookup = LookupTable(name='embeddings') self.bidir = NewBidirectional( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork') self.children = [ self.lookup, self.bidir, self.fwd_fork, self.back_fork ] def _push_allocation_config(self): self.lookup.length = self.vocab_size self.lookup.dim = self.embedding_dim self.fwd_fork.input_dim = self.embedding_dim self.fwd_fork.output_dims = [ self.bidir.children[0].get_dim(name) for name in self.fwd_fork.output_names ] self.back_fork.input_dim = self.embedding_dim self.back_fork.output_dims = [ self.bidir.children[1].get_dim(name) for name in self.back_fork.output_names ] @application(inputs=['source_sentence', 'source_sentence_mask'], outputs=['representation']) def apply(self, source_sentence, source_sentence_mask): # Time as first dimension. source_sentence = source_sentence.T source_sentence_mask = source_sentence_mask.T embeddings = self.lookup.apply(source_sentence) representation = self.bidir.apply( # Conversion to embedding representation here. merge(self.fwd_fork.apply(embeddings, as_dict=True), {'mask': source_sentence_mask}), merge(self.back_fork.apply(embeddings, as_dict=True), {'mask': source_sentence_mask})) self.representation = representation return representation
def __init__(self, dimension, alphabet_size, **kwargs): super(WordReverser, self).__init__(**kwargs) encoder = Bidirectional( SimpleRecurrent(dim=dimension, activation=Tanh())) fork = Fork([name for name in encoder.prototype.apply.sequences if name != 'mask']) fork.input_dim = dimension fork.output_dims = [dimension for name in fork.input_names] lookup = LookupTable(alphabet_size, dimension) transition = SimpleRecurrent( activation=Tanh(), dim=dimension, name="transition") attention = SequenceContentAttention( state_names=transition.apply.states, attended_dim=2 * dimension, match_dim=dimension, name="attention") readout = Readout( readout_dim=alphabet_size, source_names=[transition.apply.states[0], attention.take_glimpses.outputs[0]], emitter=SoftmaxEmitter(name="emitter"), feedback_brick=LookupFeedback(alphabet_size, dimension), name="readout") generator = SequenceGenerator( readout=readout, transition=transition, attention=attention, name="generator") self.lookup = lookup self.fork = fork self.encoder = encoder self.generator = generator self.children = [lookup, fork, encoder, generator]
def __init__(self, dimension, alphabet_size, **kwargs): super(WordReverser, self).__init__(**kwargs) encoder = Bidirectional( SimpleRecurrent(dim=dimension, activation=Tanh())) fork = Fork([name for name in encoder.prototype.apply.sequences if name != 'mask']) fork.input_dim = dimension fork.output_dims = [encoder.prototype.get_dim(name) for name in fork.input_names] lookup = LookupTable(alphabet_size, dimension) transition = SimpleRecurrent( activation=Tanh(), dim=dimension, name="transition") attention = SequenceContentAttention( state_names=transition.apply.states, attended_dim=2 * dimension, match_dim=dimension, name="attention") readout = Readout( readout_dim=alphabet_size, source_names=[transition.apply.states[0], attention.take_glimpses.outputs[0]], emitter=SoftmaxEmitter(name="emitter"), feedback_brick=LookupFeedback(alphabet_size, dimension), name="readout") generator = SequenceGenerator( readout=readout, transition=transition, attention=attention, name="generator") self.lookup = lookup self.fork = fork self.encoder = encoder self.generator = generator self.children = [lookup, fork, encoder, generator]
def __init__(self, nvis, nhid, encoding_mlp, encoding_lstm, decoding_mlp, decoding_lstm, T=1, **kwargs): super(DRAW, self).__init__(**kwargs) self.nvis = nvis self.nhid = nhid self.T = T self.encoding_mlp = encoding_mlp self.encoding_mlp.name = 'encoder_mlp' for i, child in enumerate(self.encoding_mlp.children): child.name = '{}_{}'.format(self.encoding_mlp.name, i) self.encoding_lstm = encoding_lstm self.encoding_lstm.name = 'encoder_lstm' self.encoding_parameter_mapping = Fork( output_names=['mu_phi', 'log_sigma_phi'], prototype=Linear()) self.decoding_mlp = decoding_mlp self.decoding_mlp.name = 'decoder_mlp' for i, child in enumerate(self.decoding_mlp.children): child.name = '{}_{}'.format(self.decoding_mlp.name, i) self.decoding_lstm = decoding_lstm self.decoding_lstm.name = 'decoder_lstm' self.decoding_parameter_mapping = Linear(name='mu_theta') self.prior_mu = tensor.zeros((self.nhid,)) self.prior_mu.name = 'prior_mu' self.prior_log_sigma = tensor.zeros((self.nhid,)) self.prior_log_sigma.name = 'prior_log_sigma' self.children = [self.encoding_mlp, self.encoding_lstm, self.encoding_parameter_mapping, self.decoding_mlp, self.decoding_lstm, self.decoding_parameter_mapping]
def example2(): """GRU""" x = tensor.tensor3('x') dim = 3 fork = Fork(input_dim=dim, output_dims=[dim, dim*2],name='fork',output_names=["linear","gates"], weights_init=initialization.Identity(),biases_init=Constant(0)) gru = GatedRecurrent(dim=dim, weights_init=initialization.Identity(),biases_init=Constant(0)) fork.initialize() gru.initialize() linear, gate_inputs = fork.apply(x) h = gru.apply(linear, gate_inputs) f = theano.function([x], h) print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX))) doubler = Linear( input_dim=dim, output_dim=dim, weights_init=initialization.Identity(2), biases_init=initialization.Constant(0)) doubler.initialize() lin, gate = fork.apply(doubler.apply(x)) h_doubler = gru.apply(lin,gate) f = theano.function([x], h_doubler) print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
def gru_layer(dim, h, n): fork = Fork(output_names=['linear' + str(n), 'gates' + str(n)], name='fork' + str(n), input_dim=dim, output_dims=[dim, dim * 2]) gru = GatedRecurrent(dim=dim, name='gru' + str(n)) initialize([fork, gru]) linear, gates = fork.apply(h) return gru.apply(linear, gates)
def __init__(self, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) # Dimension of the word embeddings taken as input self.embedding_dim = embedding_dim # Hidden state dimension self.state_dim = state_dim # The bidir GRU self.bidir = BidirectionalFromDict( GatedRecurrent(activation=Tanh(), dim=state_dim)) # Forks to administer the inputs of GRU gates self.fwd_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork') self.children = [self.bidir, self.fwd_fork, self.back_fork]
def __init__(self, embedding_dim, state_dim, **kwargs): """Constructor. Note that this implementation only supports single layer architectures. Args: embedding_dim (int): Dimensionality of the word vectors defined by the sparse feature map. state_dim (int): Size of the recurrent layer. """ super(NoLookupEncoder, self).__init__(**kwargs) self.embedding_dim = embedding_dim self.state_dim = state_dim self.bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork') self.children = [self.bidir, self.fwd_fork, self.back_fork]
def __init__(self, transition, input_dim, hidden_dim, rec_weights_init, ff_weights_init, biases_init, **kwargs): super(RecurrentWithFork, self).__init__(**kwargs) self.rec_weights_init = rec_weights_init self.ff_weights_init = ff_weights_init self.biases_init = biases_init self.input_dim = input_dim self.hidden_dim = hidden_dim self.transition = transition self.transition.dim = self.hidden_dim self.transition.weights_init = self.rec_weights_init self.transition.bias_init = self.biases_init self.fork = Fork([ name for name in self.transition.apply.sequences if name != 'mask' ], prototype=Linear()) self.fork.input_dim = self.input_dim self.fork.output_dims = [ self.transition.apply.brick.get_dim(name) for name in self.fork.output_names ] self.fork.weights_init = self.ff_weights_init self.fork.biases_init = self.biases_init self.children = [transition, self.fork]
def __init__(self, recurrent, input_dim, **kwargs): super(RecurrentWithFork, self).__init__(**kwargs) self.recurrent = recurrent self.input_dim = input_dim self.fork = Fork( [name for name in self.recurrent.sequences if name != 'mask'], prototype=Linear()) self.children = [recurrent.brick, self.fork]
def __init__(self, vocab_size, embedding_dim, igru_state_dim, igru_depth, trg_dgru_depth, emitter, feedback_brick, merge=None, merge_prototype=None, post_merge=None, **kwargs): merged_dim = igru_state_dim if not merge: merge = Merge(input_names=kwargs['source_names'], prototype=merge_prototype) if not post_merge: post_merge = Bias(dim=merged_dim) # for compatible if igru_depth == 1: self.igru = IGRU(dim=igru_state_dim) else: self.igru = RecurrentStack( [IGRU(dim=igru_state_dim, name='igru')] + [ UpperIGRU(dim=igru_state_dim, activation=Tanh(), name='upper_igru' + str(i)) for i in range(1, igru_depth) ], skip_connections=True) self.embedding_dim = embedding_dim self.emitter = emitter self.feedback_brick = feedback_brick self.merge = merge self.post_merge = post_merge self.merged_dim = merged_dim self.igru_depth = igru_depth self.trg_dgru_depth = trg_dgru_depth self.lookup = LookupTable(name='embeddings') self.vocab_size = vocab_size self.igru_state_dim = igru_state_dim self.gru_to_softmax = Linear(input_dim=igru_state_dim, output_dim=vocab_size) self.gru_fork = Fork([ name for name in self.igru.apply.sequences if name != 'mask' and name != 'input_states' ], prototype=Linear(), name='gru_fork') children = [ self.emitter, self.feedback_brick, self.merge, self.post_merge, self.igru, self.lookup, self.gru_to_softmax, self.gru_fork ] kwargs.setdefault('children', []).extend(children) super(Interpolator, self).__init__(**kwargs)
class BidirectionalEncoder(Initializable): """Encoder of RNNsearch model.""" def __init__(self, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) # self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim # self.lookup = LookupTable(name='embeddings') self.bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork') self.children = [self.bidir, self.fwd_fork, self.back_fork] def _push_allocation_config(self): # self.lookup.length = self.vocab_size # self.lookup.dim = self.embedding_dim self.fwd_fork.input_dim = self.embedding_dim self.fwd_fork.output_dims = [ self.bidir.children[0].get_dim(name) for name in self.fwd_fork.output_names ] self.back_fork.input_dim = self.embedding_dim self.back_fork.output_dims = [ self.bidir.children[1].get_dim(name) for name in self.back_fork.output_names ] @application(inputs=['image_embedding'], outputs=['representation']) def apply(self, image_embedding): # Time as first dimension image_embedding_mask = tensor.ones(image_embedding.shape[:2]) # print image_embedding.type # embeddings = self.lookup.apply(source_sentence) representation = self.bidir.apply( merge(self.fwd_fork.apply(image_embedding, as_dict=True), {'mask': image_embedding_mask}), merge(self.back_fork.apply(image_embedding, as_dict=True), {'mask': image_embedding_mask})) return representation
def __init__(self, vocab_size, embedding_dim, n_layers, skip_connections, state_dim, **kwargs): """Sole constructor. Args: vocab_size (int): Source vocabulary size embedding_dim (int): Dimension of the embedding layer n_layers (int): Number of layers. Layers share the same weight matrices. skip_connections (bool): Skip connections connect the source word embeddings directly with deeper layers to propagate the gradient more efficiently state_dim (int): Number of hidden units in the recurrent layers. """ super(BidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.n_layers = n_layers self.state_dim = state_dim self.skip_connections = skip_connections self.lookup = LookupTable(name='embeddings') if self.n_layers >= 1: self.bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork') self.back_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='back_fork') self.children = [self.lookup, self.bidir, self.fwd_fork, self.back_fork] if self.n_layers > 1: # Deep encoder self.mid_fwd_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='mid_fwd_fork') self.mid_back_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='mid_back_fork') self.children.append(self.mid_fwd_fork) self.children.append(self.mid_back_fork) elif self.n_layers == 0: self.embedding_dim = state_dim*2 self.children = [self.lookup] else: logging.fatal("Number of encoder layers must be non-negative")
def gru_layer(dim, h, n): fork = Fork( output_names=["linear" + str(n), "gates" + str(n)], name="fork" + str(n), input_dim=dim, output_dims=[dim, dim * 2], ) gru = GatedRecurrent(dim=dim, name="gru" + str(n)) initialize([fork, gru]) linear, gates = fork.apply(h) return gru.apply(linear, gates)
def gru_layer(dim, h, n, x_mask, first, **kwargs): fork = Fork(output_names=['linear' + str(n), 'gates' + str(n)], name='fork' + str(n), input_dim=dim, output_dims=[dim, dim * 2]) gru = GatedRecurrent(dim=dim, name='gru' + str(n)) initialize([fork, gru]) linear, gates = fork.apply(h) if first: gruApply = gru.apply(linear, gates, mask=x_mask, **kwargs) else: gruApply = gru.apply(linear, gates, **kwargs) return gruApply
class BidirectionalEncoder(Initializable): """ Bidirectional GRU encoder. """ def __init__(self, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) # Dimension of the word embeddings taken as input self.embedding_dim = embedding_dim # Hidden state dimension self.state_dim = state_dim # The bidir GRU self.bidir = BidirectionalFromDict( GatedRecurrent(activation=Tanh(), dim=state_dim)) # Forks to administer the inputs of GRU gates self.fwd_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork') self.children = [self.bidir, self.fwd_fork, self.back_fork] def _push_allocation_config(self): self.fwd_fork.input_dim = self.embedding_dim self.fwd_fork.output_dims = [ self.bidir.children[0].get_dim(name) for name in self.fwd_fork.output_names ] self.back_fork.input_dim = self.embedding_dim self.back_fork.output_dims = [ self.bidir.children[1].get_dim(name) for name in self.back_fork.output_names ] @application(inputs=['source_sentence_tbf', 'source_sentence_mask_tb'], outputs=['representation']) def apply(self, source_sentence_tbf, source_sentence_mask_tb=None): representation_tbf = self.bidir.apply( merge(self.fwd_fork.apply(source_sentence_tbf, as_dict=True), {'mask': source_sentence_mask_tb}), merge(self.back_fork.apply(source_sentence_tbf, as_dict=True), {'mask': source_sentence_mask_tb})) return representation_tbf
def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.lookup = LookupTable(name='embeddings') self.bidir = BidirectionalWMT15(GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork([name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='back_fork') self.children = [self.lookup, self.bidir, self.fwd_fork, self.back_fork]
class BidirectionalEncoder(Initializable): """Encoder of RNNsearch model.""" def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.lookup = LookupTable(name='embeddings') self.bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork') self.back_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='back_fork') self.children = [self.lookup, self.bidir, self.fwd_fork, self.back_fork] def _push_allocation_config(self): self.lookup.length = self.vocab_size self.lookup.dim = self.embedding_dim self.fwd_fork.input_dim = self.embedding_dim self.fwd_fork.output_dims = [self.bidir.children[0].get_dim(name) for name in self.fwd_fork.output_names] self.back_fork.input_dim = self.embedding_dim self.back_fork.output_dims = [self.bidir.children[1].get_dim(name) for name in self.back_fork.output_names] @application(inputs=['source_sentence', 'source_sentence_mask'], outputs=['representation']) def apply(self, source_sentence, source_sentence_mask): # Time as first dimension source_sentence = source_sentence.T source_sentence_mask = source_sentence_mask.T embeddings = self.lookup.apply(source_sentence) representation = self.bidir.apply( merge(self.fwd_fork.apply(embeddings, as_dict=True), {'mask': source_sentence_mask}), merge(self.back_fork.apply(embeddings, as_dict=True), {'mask': source_sentence_mask}) ) return representation
def __init__(self, vocab_size, embedding_dim, n_layers, skip_connections, state_dim, **kwargs): """Sole constructor. Args: vocab_size (int): Source vocabulary size embedding_dim (int): Dimension of the embedding layer n_layers (int): Number of layers. Layers share the same weight matrices. skip_connections (bool): Skip connections connect the source word embeddings directly with deeper layers to propagate the gradient more efficiently state_dim (int): Number of hidden units in the recurrent layers. """ super(DeepBidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.n_layers = n_layers self.state_dim = state_dim self.skip_connections = skip_connections self.lookup = LookupTable(name='embeddings') self.bidirs = [] self.fwd_forks = [] self.back_forks = [] for i in xrange(self.n_layers): bidir = BidirectionalWMT15(GatedRecurrent(activation=Tanh(), dim=state_dim), name='bidir%d' % i) self.bidirs.append(bidir) self.fwd_forks.append( Fork([ name for name in bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork%d' % i)) self.back_forks.append( Fork([ name for name in bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork%d' % i)) self.children = [self.lookup] \ + self.bidirs \ + self.fwd_forks \ + self.back_forks
def __init__(self, transition, num_params, params_name, weights_init, biases_init, **kwargs): super(AddParameters, self).__init__(**kwargs) update_instance(self, locals()) self.input_names = [ name for name in transition.apply.sequences if name != 'mask' ] self.state_name = transition.apply.states[0] assert len(transition.apply.states) == 1 self.fork = Fork(self.input_names) # Could be also several init bricks, one for each of the states self.init = MLP([Identity()], name="init") self.children = [self.transition, self.fork, self.init]
def __init__(self, readout, transition, attention=None, fork_inputs=None, add_contexts=True, **kwargs): if not fork_inputs: fork_inputs = [ name for name in transition.apply.sequences if name != 'mask' ] fork = Fork(fork_inputs) if attention: distribute = Distribute(fork_inputs, attention.take_glimpses.outputs[0]) transition = AttentionRecurrent(transition, attention, distribute, add_contexts=add_contexts, name="att_trans") else: transition = FakeAttentionRecurrent(transition, name="with_fake_attention") super(SequenceGenerator, self).__init__(readout, transition, fork, **kwargs)
class RecurrentWithFork(Initializable): @lazy(allocation=['input_dim']) def __init__(self, recurrent, input_dim, **kwargs): super(RecurrentWithFork, self).__init__(**kwargs) self.recurrent = recurrent self.input_dim = input_dim self.fork = Fork( [name for name in self.recurrent.sequences if name != 'mask'], prototype=Linear()) self.children = [recurrent.brick, self.fork] def _push_allocation_config(self): self.fork.input_dim = self.input_dim self.fork.output_dims = [self.recurrent.brick.get_dim(name) for name in self.fork.output_names] @application(inputs=['input_', 'mask']) def apply(self, input_, mask=None, **kwargs): return self.recurrent( mask=mask, **dict_union(self.fork.apply(input_, as_dict=True), kwargs)) @apply.property('outputs') def apply_outputs(self): return self.recurrent.states
def __init__(self, readout, transition, dim_dec, attention=None, add_contexts=True, pointer_weight=0.5, transition_with_att_class=None, use_word_annotations=False, **kwargs): super(Generator, self).__init__(**kwargs) self.inputs = [name for name in transition.apply.sequences if 'mask' not in name] self.dim_dec = dim_dec self.pointer_weight = pointer_weight fork = Fork(self.inputs) kwargs.setdefault('fork', fork) if attention: transition = transition_with_att_class( transition, attention, add_contexts=add_contexts, name="att_trans") else: transition = FakeAttentionRecurrent(transition, name="with_fake_attention") self.readout = readout self.transition = transition self.fork = fork self.children = [self.readout, self.fork, self.transition] self.use_word_annotations = use_word_annotations if use_word_annotations: self.word_annotation_preprocessor = Linear( name='input_attention_preprocessor', bias=False) self.children.append(self.word_annotation_preprocessor)
class RecurrentWithFork(Initializable): @lazy(allocation=['input_dim']) def __init__(self, transition, input_dim, hidden_dim, rec_weights_init, ff_weights_init, biases_init, **kwargs): super(RecurrentWithFork, self).__init__(**kwargs) self.rec_weights_init = rec_weights_init self.ff_weights_init = ff_weights_init self.biases_init = biases_init self.input_dim = input_dim self.hidden_dim = hidden_dim self.transition = transition self.transition.dim = self.hidden_dim self.transition.weights_init = self.rec_weights_init self.transition.bias_init = self.biases_init self.fork = Fork([ name for name in self.transition.apply.sequences if name != 'mask' ], prototype=Linear()) self.fork.input_dim = self.input_dim self.fork.output_dims = [ self.transition.apply.brick.get_dim(name) for name in self.fork.output_names ] self.fork.weights_init = self.ff_weights_init self.fork.biases_init = self.biases_init self.children = [transition, self.fork] # def _push_allocation_config(self):# # #super(RecurrentWithFork, self)._push_allocation_config() # self.transition.dim=self.hidden_dim # self.fork.input_dim = self.input_dim # self.fork.output_dims = [self.transition.apply.brick.get_dim(name) # for name in self.fork.output_names] # def _push_initialization_config(self): # #super(RecurrentWithFork, self)._push_initialization_config() # self.fork.weights_init=self.ff_weights_init # self.fork.biases_init=self.biases_init # self.transition.weights_init=self.rec_weights_init # self.transition.bias_init=self.biases_init @application(inputs=['input_', 'mask']) def apply(self, input_, mask=None, **kwargs): states = self.transition.apply(mask=mask, **dict_union( self.fork.apply(input_, as_dict=True), kwargs)) # I don't know, why blocks returns a list [states, cell] for LSTM # but just states (no list) for GRU or normal RNN. We only want LSTM's states. # cells should not be visible from outside. return states[0] if isinstance(states, list) else states @apply.property('outputs') def apply_outputs(self): return self.transition.apply.states
def __init__(self, readout, transition, attention=None, fork_inputs=None, **kwargs): if not fork_inputs: fork_inputs = [ name for name in transition.apply.sequences if name != 'mask' ] fork = Fork(fork_inputs) if attention: mixer = Mixer(fork_inputs, attention.take_look.outputs[0], name="mixer") transition = AttentionTransition(transition, attention, mixer, name="att_trans") else: transition = FakeAttentionTransition(transition, name="with_fake_attention") super(SequenceGenerator, self).__init__(readout, transition, fork, **kwargs)
def __init__(self, transition, input_dim, hidden_dim, rec_weights_init, ff_weights_init, biases_init, **kwargs): super(RecurrentWithFork, self).__init__(**kwargs) self.rec_weights_init=rec_weights_init self.ff_weights_init=ff_weights_init self.biases_init=biases_init self.input_dim=input_dim self.hidden_dim=hidden_dim self.transition=transition self.transition.dim=self.hidden_dim self.transition.weights_init=self.rec_weights_init self.transition.bias_init=self.biases_init self.fork = Fork( [name for name in self.transition.apply.sequences if name != 'mask'], prototype=Linear()) self.fork.input_dim = self.input_dim self.fork.output_dims = [self.transition.apply.brick.get_dim(name) for name in self.fork.output_names] self.fork.weights_init=self.ff_weights_init self.fork.biases_init=self.biases_init self.children = [transition, self.fork]
def __init__(self, inner_input_dim, outer_input_dim, inner_dim, **kwargs): self.inner_gru = GatedRecurrent(dim=inner_dim, name='inner_gru') self.inner_input_fork = Fork( output_names=[name for name in self.inner_gru.apply.sequences if 'mask' not in name], input_dim=inner_input_dim, name='inner_input_fork') self.outer_input_fork = Fork( output_names=[name for name in self.inner_gru.apply.sequences if 'mask' not in name], input_dim=outer_input_dim, name='inner_outer_fork') super(InnerRecurrent, self).__init__(**kwargs) self.children = [ self.inner_gru, self.inner_input_fork, self.outer_input_fork]
class RecurrentWithFork(Initializable): # Obtained from Dima's code. @rizar # https://github.com/rizar/attention-lvcsr/blob/master/lvsr/bricks/__init__.py @lazy(allocation=['input_dim']) def __init__(self, recurrent, input_dim, **kwargs): super(RecurrentWithFork, self).__init__(**kwargs) self.recurrent = recurrent self.input_dim = input_dim self.fork = Fork( [name for name in self.recurrent.sequences if name != 'mask'], prototype=Linear()) self.children = [recurrent.brick, self.fork] def _push_allocation_config(self): self.fork.input_dim = self.input_dim self.fork.output_dims = [ self.recurrent.brick.get_dim(name) for name in self.fork.output_names ] @application(inputs=['input_', 'mask']) def apply(self, input_, mask=None, **kwargs): return self.recurrent(mask=mask, **dict_union( self.fork.apply(input_, as_dict=True), kwargs)) @apply.property('outputs') def apply_outputs(self): return self.recurrent.states
class RecurrentWithFork(Initializable): @lazy(allocation=['input_dim']) def __init__(self, proto, input_dim, **kwargs): super(RecurrentWithFork, self).__init__(**kwargs) self.recurrent = proto self.input_dim = input_dim self.fork = Fork([ name for name in self.recurrent.apply.sequences if name != 'mask' ], prototype=Linear()) self.children = [self.recurrent, self.fork] def _push_allocation_config(self): self.fork.input_dim = self.input_dim self.fork.output_dims = [ self.recurrent.get_dim(name) for name in self.fork.output_names ] @application(inputs=['input_', 'mask']) def apply(self, input_, mask=None, **kwargs): return self.recurrent.apply(mask=mask, **dict_union( self.fork.apply(input_, as_dict=True), kwargs)) @apply.property('outputs') def apply_outputs(self): return self.recurrent.states
def __init__(self, trg_space_idx, readout, transition, attention=None, transition_depth=1, igru_depth=1, trg_dgru_depth=1, add_contexts=True, **kwargs): self.trg_space_idx = trg_space_idx self.transition_depth = transition_depth self.igru_depth = igru_depth self.trg_dgru_depth = trg_dgru_depth self.igru_states_name = [ 'igru_states' + RECURRENTSTACK_SEPARATOR + str(i) for i in range(self.igru_depth) ] self.feedback_name = [ 'feedback' + RECURRENTSTACK_SEPARATOR + str(i) for i in range(self.trg_dgru_depth) ] normal_inputs = [ name for name in transition.apply.sequences if 'mask' not in name ] kwargs.setdefault('fork', Fork(normal_inputs)) transition = AttentionRecurrent(transition, attention, add_contexts=add_contexts, name="att_trans") super(SequenceGeneratorDCNMT, self).__init__(readout, transition, **kwargs)
def __init__(self, path, nn_char_map, no_transition_cost=1e12, **kwargs): # Since we currently support only type, it is ignored. # if type_ != 'fst': # raise ValueError("Supports only FST's so far.") fst = FST(path) fst_char_map = dict(fst.fst.isyms.items()) del fst_char_map['<eps>'] if not len(fst_char_map) == len(nn_char_map): raise ValueError() remap_table = { nn_char_map[character]: fst_code for character, fst_code in fst_char_map.items() } transition = FSTTransition(fst, remap_table, no_transition_cost) # This SequenceGenerator will be used only in a very limited way. # That's why it is sufficient to equip it with a completely # fake readout. dummy_readout = Readout(source_names=['add'], readout_dim=len(remap_table), merge=Merge(input_names=['costs'], prototype=Identity()), post_merge=Identity(), emitter=SoftmaxEmitter()) super(LanguageModel, self).__init__(transition=transition, fork=Fork(output_names=[ name for name in transition.apply.sequences if name != 'mask' ], prototype=Identity()), readout=dummy_readout, **kwargs)
def __init__(self, vocab_size, embedding_dim, state_dim, representation_dim, theano_seed=None, **kwargs): super(Decoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim self.theano_seed = theano_seed # Initialize gru with special initial state. self.transition = GRUInitialState(attended_dim=state_dim, dim=state_dim, activation=Tanh(), name='decoder') # Initialize the attention mechanism. self.attention = SequenceContentAttention2( state_names=self.transition.apply.states, attended_dim=representation_dim, match_dim=state_dim, name="attention") readout = Readout(source_names=[ 'states', 'feedback', self.attention.take_glimpses.outputs[0] ], readout_dim=self.vocab_size, emitter=NewSoftmaxEmitter(initial_output=-1, theano_seed=theano_seed), feedback_brick=NewLookupFeedback( vocab_size, embedding_dim), post_merge=InitializableFeedforwardSequence([ Bias(dim=state_dim, name='maxout_bias').apply, Maxout(num_pieces=2, name='maxout').apply, Linear(input_dim=state_dim / 2, output_dim=embedding_dim, use_bias=False, name='softmax0').apply, Linear(input_dim=embedding_dim, name='softmax1').apply ]), merged_dim=state_dim) # Build sequence generator accordingly. self.sequence_generator = SequenceGenerator( readout=readout, transition=self.transition, attention=self.attention, fork=Fork([ name for name in self.transition.apply.sequences if name != 'mask' ], prototype=Linear()), cost_type='categorical_cross_entropy') self.children = [self.sequence_generator]
class BidirectionalEncoder(Initializable): """ Bidirectional GRU encoder. """ def __init__(self, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) # Dimension of the word embeddings taken as input self.embedding_dim = embedding_dim # Hidden state dimension self.state_dim = state_dim # The bidir GRU self.bidir = BidirectionalFromDict( GatedRecurrent(activation=Tanh(), dim=state_dim)) # Forks to administer the inputs of GRU gates self.fwd_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork') self.back_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='back_fork') self.children = [self.bidir, self.fwd_fork, self.back_fork] def _push_allocation_config(self): self.fwd_fork.input_dim = self.embedding_dim self.fwd_fork.output_dims = [self.bidir.children[0].get_dim(name) for name in self.fwd_fork.output_names] self.back_fork.input_dim = self.embedding_dim self.back_fork.output_dims = [self.bidir.children[1].get_dim(name) for name in self.back_fork.output_names] @application(inputs=['source_sentence_tbf', 'source_sentence_mask_tb'], outputs=['representation']) def apply(self, source_sentence_tbf, source_sentence_mask_tb=None): representation_tbf = self.bidir.apply( merge(self.fwd_fork.apply(source_sentence_tbf, as_dict=True), {'mask': source_sentence_mask_tb}), merge(self.back_fork.apply(source_sentence_tbf, as_dict=True), {'mask': source_sentence_mask_tb}) ) return representation_tbf
class InnerRecurrent(BaseRecurrent, Initializable): def __init__(self, inner_input_dim, outer_input_dim, inner_dim, **kwargs): self.inner_gru = GatedRecurrent(dim=inner_dim, name='inner_gru') self.inner_input_fork = Fork( output_names=[name for name in self.inner_gru.apply.sequences if 'mask' not in name], input_dim=inner_input_dim, name='inner_input_fork') self.outer_input_fork = Fork( output_names=[name for name in self.inner_gru.apply.sequences if 'mask' not in name], input_dim=outer_input_dim, name='inner_outer_fork') super(InnerRecurrent, self).__init__(**kwargs) self.children = [ self.inner_gru, self.inner_input_fork, self.outer_input_fork] def _push_allocation_config(self): self.inner_input_fork.output_dims = self.inner_gru.get_dims( self.inner_input_fork.output_names) self.outer_input_fork.output_dims = self.inner_gru.get_dims( self.outer_input_fork.output_names) @recurrent(sequences=['inner_inputs'], states=['states'], contexts=['outer_inputs'], outputs=['states']) def apply(self, inner_inputs, states, outer_inputs): forked_inputs = self.inner_input_fork.apply(inner_inputs, as_dict=True) forked_states = self.outer_input_fork.apply(outer_inputs, as_dict=True) gru_inputs = {key: forked_inputs[key] + forked_states[key] for key in forked_inputs.keys()} new_states = self.inner_gru.apply( iterate=False, **dict_union(gru_inputs, {'states': states})) return new_states # mean according to the time axis def get_dim(self, name): if name == 'states': return self.inner_gru.get_dim(name) else: return AttributeError
class RecurrentWithFork(Initializable): @lazy(allocation=['input_dim']) def __init__(self, transition, input_dim, hidden_dim, rec_weights_init, ff_weights_init, biases_init, **kwargs): super(RecurrentWithFork, self).__init__(**kwargs) self.rec_weights_init=rec_weights_init self.ff_weights_init=ff_weights_init self.biases_init=biases_init self.input_dim=input_dim self.hidden_dim=hidden_dim self.transition=transition self.transition.dim=self.hidden_dim self.transition.weights_init=self.rec_weights_init self.transition.bias_init=self.biases_init self.fork = Fork( [name for name in self.transition.apply.sequences if name != 'mask'], prototype=Linear()) self.fork.input_dim = self.input_dim self.fork.output_dims = [self.transition.apply.brick.get_dim(name) for name in self.fork.output_names] self.fork.weights_init=self.ff_weights_init self.fork.biases_init=self.biases_init self.children = [transition, self.fork] # def _push_allocation_config(self):# # #super(RecurrentWithFork, self)._push_allocation_config() # self.transition.dim=self.hidden_dim # self.fork.input_dim = self.input_dim # self.fork.output_dims = [self.transition.apply.brick.get_dim(name) # for name in self.fork.output_names] # def _push_initialization_config(self): # #super(RecurrentWithFork, self)._push_initialization_config() # self.fork.weights_init=self.ff_weights_init # self.fork.biases_init=self.biases_init # self.transition.weights_init=self.rec_weights_init # self.transition.bias_init=self.biases_init @application(inputs=['input_', 'mask']) def apply(self, input_, mask=None, **kwargs): states=self.transition.apply( mask=mask, **dict_union(self.fork.apply(input_, as_dict=True), kwargs)) # I don't know, why blocks returns a list [states, cell] for LSTM # but just states (no list) for GRU or normal RNN. We only want LSTM's states. # cells should not be visible from outside. return states[0] if isinstance(states,list) else states @apply.property('outputs') def apply_outputs(self): return self.transition.apply.states
def __init__(self, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) # Dimension of the word embeddings taken as input self.embedding_dim = embedding_dim # Hidden state dimension self.state_dim = state_dim # The bidir GRU self.bidir = BidirectionalFromDict( GatedRecurrent(activation=Tanh(), dim=state_dim)) # Forks to administer the inputs of GRU gates self.fwd_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork') self.back_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='back_fork') self.children = [self.bidir, self.fwd_fork, self.back_fork]
def __init__(self, embedding_dim, state_dim, **kwargs): """Constructor. Note that this implementation only supports single layer architectures. Args: embedding_dim (int): Dimensionality of the word vectors defined by the sparse feature map. state_dim (int): Size of the recurrent layer. """ super(NoLookupEncoder, self).__init__(**kwargs) self.embedding_dim = embedding_dim self.state_dim = state_dim self.bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork') self.back_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='back_fork') self.children = [self.bidir, self.fwd_fork, self.back_fork]
def build_fork_lookup(vocab_size, args): x = tensor.lmatrix('features') virtual_dim = 6 time_length = 5 mini_batch_size = 2 skip_connections = True layers = 3 # Build the model output_names = [] output_dims = [] for d in range(layers): if d > 0: suffix = '_' + str(d) else: suffix = '' if d == 0 or skip_connections: output_names.append("inputs" + suffix) output_dims.append(virtual_dim) print output_names print output_dims lookup = LookupTable(length=vocab_size, dim=virtual_dim) lookup.weights_init = initialization.IsotropicGaussian(0.1) lookup.biases_init = initialization.Constant(0) fork = Fork(output_names=output_names, input_dim=time_length, output_dims=output_dims, prototype=FeedforwardSequence( [lookup.apply])) # Return list of 3D Tensor, one for each layer # (Batch X Time X embedding_dim) pre_rnn = fork.apply(x) fork.initialize() f = theano.function([x], pre_rnn) return f
def __init__(self, blockid, vocab_size, embedding_dim, state_dim, **kwargs): super(Encoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.blockid = blockid self.lookup = LookupTable(name='embeddings' + '_' + self.blockid) self.gru = GatedRecurrent(activation=Tanh(), dim=state_dim, name = "GatedRNN" + self.blockid) self.fwd_fork = Fork( [name for name in self.gru.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork' + '_' + self.blockid) self.children = [self.lookup, self.gru, self.fwd_fork]
def __init__(self, vocab_size, embedding_dim, state_dim, reverse=True, **kwargs): super(Encoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.reverse = reverse self.lookup = LookupTable(name='embeddings') self.transition = GatedRecurrent(Tanh(), name='encoder_transition') self.fork = Fork([name for name in self.transition.apply.sequences if name != 'mask'], prototype=Linear()) self.children = [self.lookup, self.transition, self.fork]
def __init__(self, transition, num_params, params_name, weights_init, biases_init, **kwargs): super(AddParameters, self).__init__(**kwargs) update_instance(self, locals()) self.input_names = [name for name in transition.apply.sequences if name != 'mask'] self.state_name = transition.apply.states[0] assert len(transition.apply.states) == 1 self.fork = Fork(self.input_names) # Could be also several init bricks, one for each of the states self.init = MLP([Identity()], name="init") self.children = [self.transition, self.fork, self.init]
def __init__(self, config, output_dim=2, **kwargs): super(BidiRNN, self).__init__(**kwargs) self.config = config self.context_embedder = ContextEmbedder(config) act = config.rec_activation() if hasattr(config, 'rec_activation') else None self.rec = SegregatedBidirectional(LSTM(dim=config.hidden_state_dim, activation=act, name='recurrent')) self.fwd_fork = Fork([name for name in self.rec.prototype.apply.sequences if name!='mask'], prototype=Linear(), name='fwd_fork') self.bkwd_fork = Fork([name for name in self.rec.prototype.apply.sequences if name!='mask'], prototype=Linear(), name='bkwd_fork') rto_in = config.hidden_state_dim * 2 + sum(x[2] for x in config.dim_embeddings) self.rec_to_output = MLP(activations=[Rectifier() for _ in config.dim_hidden] + [Identity()], dims=[rto_in] + config.dim_hidden + [output_dim]) self.sequences = ['latitude', 'latitude_mask', 'longitude'] self.inputs = self.sequences + self.context_embedder.inputs self.children = [ self.context_embedder, self.fwd_fork, self.bkwd_fork, self.rec, self.rec_to_output ]
def __init__(self, config, output_dim, activation, **kwargs): super(RecurrentEncoder, self).__init__(**kwargs) self.config = config self.context_embedder = ContextEmbedder(config) self.rec = SegregatedBidirectional(LSTM(dim=config.rec_state_dim, name='encoder_recurrent')) self.fwd_fork = Fork([name for name in self.rec.prototype.apply.sequences if name!='mask'], prototype=Linear(), name='fwd_fork') self.bkwd_fork = Fork([name for name in self.rec.prototype.apply.sequences if name!='mask'], prototype=Linear(), name='bkwd_fork') rto_in = config.rec_state_dim * 2 + sum(x[2] for x in config.dim_embeddings) self.rec_to_output = MLP( activations=[Rectifier() for _ in config.dim_hidden] + [activation], dims=[rto_in] + config.dim_hidden + [output_dim], name='encoder_rto') self.children = [self.context_embedder, self.rec, self.fwd_fork, self.bkwd_fork, self.rec_to_output] self.rec_inputs = ['latitude', 'longitude', 'latitude_mask'] self.inputs = self.context_embedder.inputs + self.rec_inputs
class Feedback(Initializable): """Feedback. Attributes ---------- output_names : list output_dims : dict """ @lazy(allocation=['output_names', 'output_dims']) def __init__(self, output_names, output_dims, embedding=None, input_dim=0, **kwargs): super(Feedback, self).__init__(**kwargs) self.output_names = output_names self.output_dims = output_dims self.input_dim = input_dim self.embedding = embedding self.fork = Fork(self.output_names) self.apply.inputs = ['input'] self.apply.outputs = output_names self.children = [self.embedding, self.fork] self.children = [child for child in self.children if child] def _push_allocation_config(self): if self.fork: self.fork.output_dims = self.output_dims else: self.embedding.output_dim, = self.output_dims if self.embedding: self.embedding.input_dim = self.input_dim self.fork.input_dim = self.embedding.output_dim else: self.fork.input_dim = self.input_dim @application def apply(self, symbols): embedded_symbols = symbols if self.embedding: embedded_symbols = self.embedding.apply(symbols) if self.fork: return self.fork.apply(embedded_symbols) return embedded_symbols
def __init__(self, output_names, output_dims, embedding=None, input_dim=0, **kwargs): super(Feedback, self).__init__(**kwargs) self.output_names = output_names self.output_dims = output_dims self.input_dim = input_dim self.embedding = embedding self.fork = Fork(self.output_names) self.apply.inputs = ['input'] self.apply.outputs = output_names self.children = [self.embedding, self.fork] self.children = [child for child in self.children if child]
class Encoder(Initializable): def __init__(self, vocab_size, embedding_dim, state_dim, reverse=True, **kwargs): super(Encoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.reverse = reverse self.lookup = LookupTable(name='embeddings') self.transition = GatedRecurrent(Tanh(), name='encoder_transition') self.fork = Fork([name for name in self.transition.apply.sequences if name != 'mask'], prototype=Linear()) self.children = [self.lookup, self.transition, self.fork] def _push_allocation_config(self): self.lookup.length = self.vocab_size self.lookup.dim = self.embedding_dim self.transition.dim = self.state_dim self.fork.input_dim = self.embedding_dim self.fork.output_dims = [self.state_dim for _ in self.fork.output_names] @application(inputs=['source_sentence', 'source_sentence_mask'], outputs=['representation']) def apply(self, source_sentence, source_sentence_mask): # Time as first dimension source_sentence = source_sentence.dimshuffle(1, 0) source_sentence_mask = source_sentence_mask.T if self.reverse: source_sentence = source_sentence[::-1] source_sentence_mask = source_sentence_mask[::-1] embeddings = self.lookup.apply(source_sentence) representation = self.transition.apply(**merge( self.fork.apply(embeddings, as_dict=True), {'mask': source_sentence_mask} )) return representation[-1]
def __init__(self, vocab_size, embedding_dim, state_dim, representation_dim, **kwargs): super(Decoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim readout = Readout( source_names=['states', 'feedback', 'readout_context'], readout_dim=self.vocab_size, emitter=SoftmaxEmitter(), feedback_brick=LookupFeedback(vocab_size, embedding_dim), post_merge=InitializableFeedforwardSequence( [Bias(dim=1000).apply, Maxout(num_pieces=2).apply, Linear(input_dim=state_dim / 2, output_dim=100, use_bias=False).apply, Linear(input_dim=100).apply]), merged_dim=1000) self.transition = GatedRecurrentWithContext(Tanh(), dim=state_dim, name='decoder') # Readout will apply the linear transformation to 'readout_context' # with a Merge brick, so no need to fork it here self.fork = Fork([name for name in self.transition.apply.contexts + self.transition.apply.states if name != 'readout_context'], prototype=Linear()) self.tanh = Tanh() self.sequence_generator = SequenceGenerator( readout=readout, transition=self.transition, fork_inputs=[name for name in self.transition.apply.sequences if name != 'mask'], ) self.children = [self.fork, self.sequence_generator, self.tanh]
class Encoder(Initializable): """Encoder of RNNsearch model.""" def __init__(self, blockid, vocab_size, embedding_dim, state_dim, **kwargs): super(Encoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.blockid = blockid self.lookup = LookupTable(name='embeddings' + '_' + self.blockid) self.gru = GatedRecurrent(activation=Tanh(), dim=state_dim, name = "GatedRNN" + self.blockid) self.fwd_fork = Fork( [name for name in self.gru.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork' + '_' + self.blockid) self.children = [self.lookup, self.gru, self.fwd_fork] def _push_allocation_config(self): self.lookup.length = self.vocab_size self.lookup.dim = self.embedding_dim self.fwd_fork.input_dim = self.embedding_dim self.fwd_fork.output_dims = [self.gru.get_dim(name) for name in self.fwd_fork.output_names] @application(inputs=['source_sentence', 'source_sentence_mask'], outputs=['representation']) def apply(self, source_sentence, source_sentence_mask): # Time as first dimension source_sentence = source_sentence.T source_sentence_mask = source_sentence_mask.T embeddings = self.lookup.apply(source_sentence) grupara = merge( self.fwd_fork.apply(embeddings, as_dict=True) , {'mask': source_sentence_mask}) representation = self.gru.apply(**grupara) return representation
def get_prernn(args): # time x batch x_mask = tensor.fmatrix('mask') # Compute the state dim if args.rnn_type == 'lstm': state_dim = 4 * args.state_dim else: state_dim = args.state_dim # Prepare the arguments for the fork output_names = [] output_dims = [] for d in range(args.layers): if d > 0: suffix = RECURRENTSTACK_SEPARATOR + str(d) else: suffix = '' if d == 0 or args.skip_connections: output_names.append("inputs" + suffix) output_dims.append(state_dim) # Prepare the brick to be forked (LookupTable or Linear) # Check if the dataset provides indices (in the case of a # fixed vocabulary, x is 2D tensor) or if it gives raw values # (x is 3D tensor) if has_indices(args.dataset): features = args.mini_batch_size x = tensor.lmatrix('features') vocab_size = get_output_size(args.dataset) lookup = LookupTable(length=vocab_size, dim=state_dim) lookup.weights_init = initialization.IsotropicGaussian(0.1) lookup.biases_init = initialization.Constant(0) forked = FeedforwardSequence([lookup.apply]) if not has_mask(args.dataset): x_mask = tensor.ones_like(x, dtype=floatX) else: x = tensor.tensor3('features', dtype=floatX) if args.used_inputs is not None: x = tensor.set_subtensor(x[args.used_inputs:, :, :], tensor.zeros_like(x[args.used_inputs:, :, :], dtype=floatX)) features = get_output_size(args.dataset) forked = Linear(input_dim=features, output_dim=state_dim) forked.weights_init = initialization.IsotropicGaussian(0.1) forked.biases_init = initialization.Constant(0) if not has_mask(args.dataset): x_mask = tensor.ones_like(x[:, :, 0], dtype=floatX) # Define the fork fork = Fork(output_names=output_names, input_dim=features, output_dims=output_dims, prototype=forked) fork.initialize() # Apply the fork prernn = fork.apply(x) # Give a name to the input of each layer if args.skip_connections: for t in range(len(prernn)): prernn[t].name = "pre_rnn_" + str(t) else: prernn.name = "pre_rnn" return prernn, x_mask
def build_model_vanilla(vocab_size, args, dtype=floatX): logger.info('Building model ...') # Parameters for the model context = args.context state_dim = args.state_dim layers = args.layers skip_connections = args.skip_connections # Symbolic variables # In both cases: Time X Batch x = tensor.lmatrix('features') y = tensor.lmatrix('targets') # Build the model output_names = [] output_dims = [] for d in range(layers): if d > 0: suffix = '_' + str(d) else: suffix = '' if d == 0 or skip_connections: output_names.append("inputs" + suffix) output_dims.append(state_dim) lookup = LookupTable(length=vocab_size, dim=state_dim) lookup.weights_init = initialization.IsotropicGaussian(0.1) lookup.biases_init = initialization.Constant(0) fork = Fork(output_names=output_names, input_dim=args.mini_batch_size, output_dims=output_dims, prototype=FeedforwardSequence( [lookup.apply])) transitions = [SimpleRecurrent(dim=state_dim, activation=Tanh()) for _ in range(layers)] rnn = RecurrentStack(transitions, skip_connections=skip_connections) # If skip_connections: dim = layers * state_dim # else: dim = state_dim output_layer = Linear( input_dim=skip_connections * layers * state_dim + (1 - skip_connections) * state_dim, output_dim=vocab_size, name="output_layer") # Return list of 3D Tensor, one for each layer # (Time X Batch X embedding_dim) pre_rnn = fork.apply(x) # Give a name to the input of each layer if skip_connections: for t in range(len(pre_rnn)): pre_rnn[t].name = "pre_rnn_" + str(t) else: pre_rnn.name = "pre_rnn" # Prepare inputs for the RNN kwargs = OrderedDict() init_states = {} for d in range(layers): if d > 0: suffix = '_' + str(d) else: suffix = '' if skip_connections: kwargs['inputs' + suffix] = pre_rnn[d] elif d == 0: kwargs['inputs'] = pre_rnn init_states[d] = theano.shared( numpy.zeros((args.mini_batch_size, state_dim)).astype(floatX), name='state0_%d' % d) kwargs['states' + suffix] = init_states[d] # Apply the RNN to the inputs h = rnn.apply(low_memory=True, **kwargs) # We have # h = [state, state_1, state_2 ...] if layers > 1 # h = state if layers == 1 # If we have skip connections, concatenate all the states # Else only consider the state of the highest layer last_states = {} if layers > 1: # Save all the last states for d in range(layers): last_states[d] = h[d][-1, :, :] if skip_connections: h = tensor.concatenate(h, axis=2) else: h = h[-1] else: last_states[0] = h[-1, :, :] h.name = "hidden_state" # The updates of the hidden states updates = [] for d in range(layers): updates.append((init_states[d], last_states[d])) presoft = output_layer.apply(h[context:, :, :]) # Define the cost # Compute the probability distribution time, batch, feat = presoft.shape presoft.name = 'presoft' cross_entropy = Softmax().categorical_cross_entropy( y[context:, :].flatten(), presoft.reshape((batch * time, feat))) cross_entropy = cross_entropy / tensor.log(2) cross_entropy.name = "cross_entropy" # TODO: add regularisation for the cost # the log(1) is here in order to differentiate the two variables # for monitoring cost = cross_entropy + tensor.log(1) cost.name = "regularized_cost" # Initialize the model logger.info('Initializing...') fork.initialize() rnn.weights_init = initialization.Orthogonal() rnn.biases_init = initialization.Constant(0) rnn.initialize() output_layer.weights_init = initialization.IsotropicGaussian(0.1) output_layer.biases_init = initialization.Constant(0) output_layer.initialize() return cost, cross_entropy, updates