def mctn_level2_model(input, output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, input_dim=None, hidden_dim=None, depth=1, bidirectional=True, unroll=False, stateful=False, dropout=0.0): """ Level 2 MCTN used for translation between the joint embedded of 2 modalities to the third one. Due to the lack of ground truth, no cycle phase happens """ if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise if hidden_dim is None: hidden_dim = output_dim encoder = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build(shape) encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer encoded = encoder(input) decoder = RecurrentSequential(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoder.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) if depth[1] == 1: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) else: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) inputs = [input] decoded = decoder(encoded) return inputs, encoded, decoded
def mctn_model(output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, input_dim=None, hidden_dim=None, depth=1, bidirectional=True, unroll=False, stateful=False, dropout=0.0, is_cycled=True ): """ MCTN Model (by default with Cycle Consistency Loss) """ if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True # encoder phase encoder = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build(shape) encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer encoded = encoder(_input) # decoder phase decoder = RecurrentSequential(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoder.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) if depth[1] == 1: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) else: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) inputs = [_input] decoded = decoder(encoded) # cycle phase cycled_decoded = None if is_cycled: cycled_encoded = encoder(decoded) cycled_decoded = decoder(cycled_encoded) return inputs, encoded, decoded, cycled_decoded
def paired_trimodal_model(output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, input_dim=None, hidden_dim=None, depth=1, bidirectional=True, unroll=False, stateful=False, dropout=0.0): """ One modal translates into two other modalities, no cycle involved The model has 1 encoder and 2 decoders """ if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True # encoder phase encoder = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]))) # encoder phase encoder_2 = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder_2.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], output_dim))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) encoder_2.add(Dropout(dropout)) encoder_2.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build(shape) encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer encoder_2 = Bidirectional(encoder_2, merge_mode='sum') encoder_2.forward_layer.build(shape) encoder_2.backward_layer.build(shape) # patch encoder_2.layer = encoder_2.forward_layer encoded_one = encoder(_input) # decoder phase decoder = RecurrentSequential(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoder.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) decoder_2 = RecurrentSequential(decode=True, output_length=input_length, unroll=unroll, stateful=stateful) decoder_2.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) if depth[1] == 1: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) else: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) if depth[1] == 1: decoder_2.add( AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim)) else: decoder_2.add( AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder_2.add(Dropout(dropout)) decoder_2.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder_2.add(Dropout(dropout)) decoder_2.add(LSTMDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim)) inputs = [_input] decoded_one = decoder(encoded_one) encoded_two = encoder_2(decoded_one) decoded_two = decoder_2(encoded_two) return inputs, encoded_one, encoded_two, decoded_one, decoded_two
def AttentionSeq2Seq(output_dim, output_length, hidden_dim=None, depth=1, bidirectional=True, dropout=0., **kwargs): ''' This is an attention Seq2seq model based on [3]. Here, there is a soft allignment between the input and output sequence elements. A bidirection encoder is used by default. There is no hidden state transfer in this model. The math: Encoder: X = Input Sequence of length m. H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True, so H is a sequence of vectors of length m. Decoder: y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c) and v (called the context vector) is a weighted sum over H: v(i) = sigma(j = 0 to m-1) alpha(i, j) * H(j) The weight alpha[i, j] for each hj is computed as follows: energy = a(s(i-1), H(j)) alhpa = softmax(energy) Where a is a feed forward network. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None, ) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(unroll=unroll, stateful=stateful, return_sequences=True, input_length=shape[1]) encoder.add( LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs)) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim, **kwargs)) input = Input(batch_shape=shape) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoded = encoder(input) decoded = encoded for _ in range(1, depth[1]): decoder = AttentionDecoderCell( output_dim=hidden_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], shape[1], hidden_dim)).get_layer( decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoded = Dropout(dropout)(decoded) decoded = decoder(decoded) decoder = AttentionDecoderCell( output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_length if depth[1] > 1 else shape[1], hidden_dim)).get_layer(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoded = Dropout(dropout)(decoded) decoded = decoder(decoded) model = Model(input, decoded) return model
def Seq2Seq(output_dim, output_length, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, peek=False, dropout=0., **kwargs): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None, ) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful) for i in range(depth[0]): encoder.add( LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs)) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense2 = Dense(output_dim) decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True, input_length=shape[1]) for i in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs)) input = Input(batch_shape=shape) encoded_seq = dense1(input) encoded_seq = encoder(encoded_seq) if broadcast_state: decoder.model.layers[1].states[:2] = encoder.state_outputs[-3:-1] encoded_seq = dense2(encoded_seq) decoder.initial_readout = encoded_seq decoded_seq = decoder(encoded_seq) model = Model(input, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, depth=1, dropout=0., **kwargs): ''' Simple model for sequence to sequence learning. The encoder encodes the input sequence to vector (called context vector) The decoder decodes the context vector in to a sequence of vectors. There is no one on one relation between the input and output sequence elements. The input sequence and output sequence may differ in length. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. dropout : Dropout probability in between layers. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None, ) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(unroll=unroll, stateful=stateful, input_length=shape[1]) encoder.add( LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs)) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim, **kwargs)) decoder = RecurrentContainer(unroll=unroll, stateful=stateful, decode=True, output_length=output_length, input_length=shape[1]) decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim))) decoder.add(LSTMCell(hidden_dim, **kwargs)) for _ in range(1, depth[1]): decoder.add(Dropout(dropout)) decoder.add(LSTMCell(hidden_dim, **kwargs)) model = Sequential() model.add(encoder) model.add(decoder) return model
def __init__(self, config): self.model = None self.check_list = { 'text_maxlen', 'sentence_maxnum', 'sentence_maxlen', 'hidden_size', 'delimiter', 'pad_word', 'unk_word', 'start_sent', 'end_sent', 'vocab_size', 'embed_size', "embed_path", 'embed_trainable', 'learning_rate' } self.config = config assert self.check(), 'parametre check failed' self.size = self.config['hidden_size'] embed_dict = read_embedding(filename=self.config['embed_path']) self._PAD_ = self.config['pad_word'] self._UNK_ = self.config['unk_word'] self._START_ = self.config['start_sent'] self._END_ = self.config['end_sent'] embed_dict[self._PAD_] = np.zeros((self.config['embed_size'], ), dtype=np.float32) embed_dict[self._UNK_] = np.zeros((self.config['embed_size'], ), dtype=np.float32) embed = np.float32( np.random.uniform( -0.2, 0.2, [self.config['vocab_size'], self.config['embed_size']])) weights = convert_embed_2_numpy(embed_dict, embed=embed) self.Emb = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[weights], trainable=self.config['embed_trainable']) self.Splitlayer_keephead = SplitLayer( delimiter=self.config['delimiter'], output_sentence_len=self.config['sentence_maxlen'], output_sentence_num=self.config['sentence_maxnum'], pad_word=self.config['pad_word'], cut_head=False, name='Split_Layer_keep_head') self.Splitlayer_cuthead = SplitLayer( delimiter=self.config['delimiter'], output_sentence_len=self.config['sentence_maxlen'], output_sentence_num=self.config['sentence_maxnum'], pad_word=self.config['pad_word'], cut_head=True, name='Split_Layer_cut_head') self.Sentence_reshape1D = Reshape((self.config['sentence_maxnum'] * self.config['sentence_maxlen'], ), name='Sentence_reshape1D') self.Sentence_reshape2D = Reshape(( self.config['sentence_maxnum'], self.config['sentence_maxlen'], self.config['embed_size'], ), name='Sentence_reshape2D') self.Encoder_word = CuDNNLSTM(units=self.size, name='Encoder_word') self.Encoder_sent = CuDNNLSTM(units=self.size, name='Encoder_sent', return_state=True) self.Decoder_word_cell = LSTMCell(units=self.size, name='Decoder_word_cell') self.Decoder_sent_cell = LSTMCell(units=self.size, name='Decoder_sent_cell') self.AttentionMapper = Linear(output_size=self.size, bias=True, bias_start=0.0, activation='tanh') self.Join = Dense(units=1, use_bias=False, name='Join') # shape : [attention_vec_size] self.Exp = Lambda(lambda x: K.exp(x), name='Exp') self.Calcprob = Dense(units=self.config['vocab_size'], activation='softmax', name='Calcprob') self.ArgMax = Lambda(lambda x: K.argmax(x, axis=-1), dtype='int32') self.Printer = Lambda(lambda x: K.tf.Print(x, [x])) self.Identical = Lambda(lambda x: x, name='Identical') self.EncoderModel = None self.DecoderModel_onesent = None self.DecoderModel_onestep = None self._mask = None self._targets = None self.optim = optimizers.SGD(config['learning_rate']) return
def Seq2Seq(output_dim, output_length, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) for _ in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder = RecurrentSequential(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) for _ in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) encoded_seq = encoder(encoded_seq) if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] decoded_seq = decoder(encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def AttentionSeq2Seq(output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, input_dim=None, hidden_dim=None, depth=1, bidirectional=True, unroll=False, stateful=False, dropout=0.0,): ''' This is an attention Seq2seq model based on [3]. Here, there is a soft allignment between the input and output sequence elements. A bidirection encoder is used by default. There is no hidden state transfer in this model. The math: Encoder: X = Input Sequence of length m. H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True, so H is a sequence of vectors of length m. Decoder: y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c) and v (called the context vector) is a weighted sum over H: v(i) = sigma(j = 0 to m-1) alpha(i, j) * H(j) The weight alpha[i, j] for each hj is computed as follows: energy = a(s(i-1), H(j)) alpha = softmax(energy) Where a is a feed forward network. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoder = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build(shape) encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer encoded = encoder(_input) decoder = RecurrentSequential(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoder.add(Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) if depth[1] == 1: decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) else: decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) inputs = [_input] decoded = decoder(encoded) model = Model(inputs, decoded) return model
def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, input_shape=None, batch_size=None, batch_input_shape=None, input_dim=None, input_length=None, depth=1, dropout=0.0, unroll=False, stateful=False): ''' Simple model for sequence to sequence learning. The encoder encodes the input sequence to vector (called context vector) The decoder decodes the context vector in to a sequence of vectors. There is no one on one relation between the input and output sequence elements. The input sequence and output sequence may differ in length. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. dropout : Dropout probability in between layers. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim encoder = RecurrentSequential(unroll=unroll, stateful=stateful) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) decoder = RecurrentSequential(unroll=unroll, stateful=stateful, decode=True, output_length=output_length) decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim))) if depth[1] == 1: decoder.add(LSTMCell(output_dim)) else: decoder.add(LSTMCell(hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add(LSTMCell(hidden_dim)) decoder.add(Dropout(dropout)) decoder.add(LSTMCell(output_dim)) _input = Input(batch_shape=shape) x = encoder(_input) output = decoder(x) return Model(_input, output)