def AttentionSeq2Seq(output_dim, output_length, hidden_dim=None, depth=1, bidirectional=True, dropout=0., **kwargs): ''' This is an attention Seq2seq model based on [3]. Here, there is a soft allignment between the input and output sequence elements. A bidirection encoder is used by default. There is no hidden state transfer in this model. The math: Encoder: X = Input Sequence of length m. H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True, so H is a sequence of vectors of length m. Decoder: y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c) and v (called the context vector) is a weighted sum over H: v(i) = sigma(j = 0 to m-1) alpha(i, j) * H(j) The weight alpha[i, j] for each hj is computed as follows: energy = a(s(i-1), H(j)) alhpa = softmax(energy) Where a is a feed forward network. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None,) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(unroll=unroll, stateful=stateful, return_sequences=True, input_length=shape[1]) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs)) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim, **kwargs)) input = Input(batch_shape=shape) input._keras_history[0].supports_masking = True if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoded = encoder(input) decoder = RecurrentContainer(decode=True, output_length=output_length, unroll=unroll, stateful=stateful, input_length=shape[1]) decoder.add(Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) if depth[1] == 1: decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) else: decoder.add(AttentionDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) inputs = [input] ''' if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) inputs += [truth_tensor] decoder.set_truth_tensor(truth_tensor) ''' decoded = decoder(encoded) model = Model(inputs, decoded) return model
def SimpleED(input_length, output_length, hidden_dim, input_dim, batch_size, dropout): """ See: https://github.com/farizrahman4u/seq2seq/blob/master/seq2seq/models.py :param input_length: :param output_length: :param hidden_dim: :param input_dim: :param batch_size: :param dropout: :return: """ encoder = RecurrentContainer(input_length=input_length) encoder.add(GRUCell(hidden_dim, batch_input_shape=(batch_size, input_dim))) # encoder.add(Dropout(dropout)) # encoder.add(GRUCell(hidden_dim)) decoder = RecurrentContainer(decode=True, output_length=output_length, input_length=input_length) decoder.add(Dropout(dropout, batch_input_shape=(batch_size, hidden_dim))) decoder.add(GRUCell(hidden_dim)) # decoder.add(Dropout(dropout)) # decoder.add(GRUCell(hidden_dim)) model = Sequential() model.add(encoder) model.add(decoder) return model
def __build_seq2seq_decoder__(self): # Using recurrentshop's decoder container container = RecurrentContainer( return_sequences=True, readout='add', output_length=self.sequence_len, input_shape=(self.enc_layer_output[-1], ), decode=True) if len(self.dec_layer_output) > 1: container.add( LSTMCell(output_dim=self.dec_layer_output[0], input_dim=self.enc_layer_output[-1])) for dl in self.dec_layer_output[1:-1]: container.add(LSTMCell(output_dim=dl)) container.add(LSTMCell(output_dim=self.enc_layer_output[-1])) else: container.add( LSTMCell(input_dim=self.enc_layer_output[-1], output_dim=self.enc_layer_output[-1])) if self.enc_layer_output[-1] != self.dec_layer_output[-1]: print( 'WARNING: Overriding final decoder output to %s for readout compatibility' % self.enc_layer_output[-1]) self.decoder.add(container)
def __build_readout_decoder__(self): self.decoder.add( RepeatVector( self.sequence_len, input_shape=(self.enc_layer_output[-1], ))) # Repeat the final vector for answer input # Using recurrentshop's container with readout container = RecurrentContainer(readout=True, return_sequences=True, output_length=self.sequence_len) if len(self.dec_layer_output) > 1: container.add( LSTMCell(output_dim=self.dec_layer_output[0], input_dim=self.enc_layer_output[-1])) for dl in self.dec_layer_output[1:-1]: container.add(LSTMCell(output_dim=dl)) container.add(LSTMCell(output_dim=self.enc_layer_output[-1])) else: container.add( LSTMCell(input_dim=self.enc_layer_output[-1], output_dim=self.enc_layer_output[-1])) if self.enc_layer_output[-1] != self.dec_layer_output[-1]: print( 'WARNING: Overriding final decoder output to %s for readout compatibility' % self.enc_layer_output[-1]) self.decoder.add(container)
def Seq2Seq(output_dim, output_length, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, peek=False, dropout=0., **kwargs): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None,) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful) for i in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs)) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense2 = Dense(output_dim) decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True) for i in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs)) input = Input(batch_shape=shape) encoded_seq = dense1(input) encoded_seq = encoder(encoded_seq) if broadcast_state: decoder.model.layers[1].states[:2] = encoder.state_outputs[-3:-1] encoded_seq = dense2(encoded_seq) decoder.initial_readout = encoded_seq decoded_seq = decoder(encoded_seq) model = Model(input, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def AttentionSeq2Seq(output_dim, output_length, hidden_dim=None, depth=1, bidirectional=True, dropout=0., **kwargs): ''' This is an attention Seq2seq model based on [3]. Here, there is a soft allignment between the input and output sequence elements. A bidirection encoder is used by default. There is no hidden state transfer in this model. The math: Encoder: X = Input Sequence of length m. H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True, so H is a sequence of vectors of length m. Decoder: y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c) and v (called the context vector) is a weighted sum over H: v(i) = sigma(j = 0 to m-1) alpha(i, j) * H(j) The weight alpha[i, j] for each hj is computed as follows: energy = a(s(i-1), H(j)) alhpa = softmax(energy) Where a is a feed forward network. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None,) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(unroll=unroll, stateful=stateful, return_sequences=True, input_length=shape[1]) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs)) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim, **kwargs)) input = Input(batch_shape=shape) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoded = encoder(input) decoded = encoded for _ in range(1, depth[1]): decoder = AttentionDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], shape[1], hidden_dim)).get_layer(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoded = Dropout(dropout)(decoded) decoded = decoder(decoded) decoder = AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_length if depth[1] > 1 else shape[1], hidden_dim)).get_layer(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoded = Dropout(dropout)(decoded) decoded = decoder(decoded) model = Model(input, decoded) return model
def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, depth=1, dropout=0., **kwargs): ''' Simple model for sequence to sequence learning. The encoder encodes the input sequence to vector (called context vector) The decoder decodes the context vector in to a sequence of vectors. There is no one on one relation between the input and output sequence elements. The input sequence and output sequence may differ in length. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. dropout : Dropout probability in between layers. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None,) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(unroll=unroll, stateful=stateful, input_length=shape[1]) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs)) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim, **kwargs)) decoder = RecurrentContainer(unroll=unroll, stateful=stateful, decode=True, output_length=output_length) decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim))) decoder.add(LSTMCell(hidden_dim, **kwargs)) for _ in range(1, depth[1]): decoder.add(Dropout(dropout)) decoder.add(LSTMCell(hidden_dim, **kwargs)) model = Sequential() model.add(encoder) model.add(decoder) return model
def Seq2Seq(output_dim, output_length, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0., **kwargs): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None,) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful, return_states=broadcast_state) for i in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs)) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True, input_length=shape[1]) for i in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs)) input = Input(batch_shape=shape) input._keras_history[0].supports_masking = True encoded_seq = dense1(input) encoded_seq = encoder(encoded_seq) if broadcast_state: states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = [None] * 2 encoded_seq = dense2(encoded_seq) inputs = [input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] decoded_seq = decoder({'input': encoded_seq, 'ground_truth': inputs[1] if teacher_force else None, 'initial_readout': encoded_seq, 'states': states}) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, depth=1, dropout=0., **kwargs): ''' Simple model for sequence to sequence learning. The encoder encodes the input sequence to vector (called context vector) The decoder decodes the context vector in to a sequence of vectors. There is no one on one relation between the input and output sequence elements. The input sequence and output sequence may differ in length. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. dropout : Dropout probability in between layers. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None,) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(unroll=unroll, stateful=stateful, input_length=shape[1]) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs)) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim, **kwargs)) decoder = RecurrentContainer(unroll=unroll, stateful=stateful, decode=True, output_length=output_length, input_length=shape[1]) decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim))) if depth[1] == 1: decoder.add(LSTMCell(output_dim, **kwargs)) else: decoder.add(LSTMCell(hidden_dim, **kwargs)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add(LSTMCell(hidden_dim, **kwargs)) decoder.add(Dropout(dropout)) decoder.add(LSTMCell(output_dim, **kwargs)) model = Sequential() model.add(encoder) model.add(decoder) return model
def create_model(cate_num=10, local_num=10, id_num=20, \ hidden_dim=128, seq_len=20, word_num = 10000, inner_broadcast_state=True, broadcast_state=True, \ encoder_dim=64, dropout=0.5,depth=1, output_length=20, peek=False, teacher_force=True, batch_size = 20,\ ** kwargs): if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None, ) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False hid_rep_dim = 64 output_dim = word_num def withMask(layer): layer._keras_history[0].supports_masking = True cate_input = Input(batch_shape=(shape[0], 1), name="cate") withMask(cate_input) local_input = Input(batch_shape=(shape[0], 1), name='local') withMask(local_input) id_input = Input(batch_shape=(shape[0], 1), name='id') withMask(id_input) def flatEmb(input, input_dim, output_dim, input_lenghth=1): emb = Embedding(input_dim=input_dim, output_dim=output_dim, input_length=input_lenghth)(input) return Flatten()(emb) cate_rep = flatEmb(input=cate_input, input_dim=cate_num, output_dim=hid_rep_dim) local_rep = flatEmb(local_input, local_num, hid_rep_dim) id_rep = flatEmb(id_input, id_num, hid_rep_dim) words_rep fc1 = LeakyReLU()(Dense(64)(cate_rep)) fc2 = LeakyReLU()(Dense(64)(local_rep)) fc3 = LeakyReLU()(Dense(64)(id_rep)) params = merge([fc1, fc2, fc3], mode='concat', concat_axis=1) params = Dense(output_dim)(params) decoder_input = LeakyReLU()(params) decoder_input = BatchNormalization()(decoder_input) decoder_input = Dropout(dropout)(decoder_input) em decoder = RecurrentContainer(readout='readout_only', state_sync=inner_broadcast_state, \ output_length=output_length, unroll=unroll, stateful=stateful, decode=True, \ input_length=hidden_dim,return_sequences=True) for _ in range(1, depth[1]): decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs)) out_lstm = LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs) out_lstm.activation = activations.get('softmax') decoder.add(out_lstm) states = [decoder_input, None] inputs = [id_input, local_input, cate_input] # if teacher_force: # truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) # truth_tensor._keras_history[0].supports_masking = True # inputs += [truth_tensor] decoded_seq = decoder({ 'input': decoder_input, 'states': states, 'initial_readout': decoder_input }) model = Model(inputs, decoded_seq) return model
def SimpleSeq2Seq(output_dim, output_length, latent_dim, batch_size, epsilon_std, lookup_matrix=None, hidden_dim=None, depth=1, dropout=0., **kwargs): ''' Simple model for sequence to sequence learning. The encoder encodes the input sequence to vector (called context vector) The decoder decodes the context vector in to a sequence of vectors. There is no one on one relation between the input and output sequence elements. The input sequence and output sequence may differ in length. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. dropout : Dropout probability in between layers. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None,) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim embedder = Embedding(input_dim=lookup_matrix.shape[0], output_dim=lookup_matrix.shape[1], \ input_length=output_length, weights=[lookup_matrix]) encoder = RecurrentContainer(unroll=unroll, stateful=stateful, input_length=shape[1]) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs)) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim, **kwargs)) decoder = RecurrentContainer(unroll=unroll, stateful=stateful, decode=True, output_length=output_length, input_length=shape[1]) decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim))) if depth[1] == 1: decoder.add(LSTMCell(output_dim, **kwargs)) else: decoder.add(LSTMCell(hidden_dim, **kwargs)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add(LSTMCell(hidden_dim, **kwargs)) decoder.add(Dropout(dropout)) decoder.add(LSTMCell(output_dim, **kwargs)) x = Input(batch_shape=(None,output_length)) embedded_x = embedder(x) h_encoded = encoder(embedded_x) def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0., std=epsilon_std) return z_mean + K.exp(z_log_var / 2) * epsilon z_mean = Dense(latent_dim)(h_encoded) z_log_var = Dense(latent_dim)(h_encoded) z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var]) print z h_0 = Dense(hidden_dim, activation='relu')(z) print h_0 model_out = decoder(h_0) print model_out # model_out = decoder(h_encoded) y = Input(batch_shape=(None,output_length)) embedded_y = embedder(y) print embedded_y loss = tf.reduce_sum(mean_squared_error(model_out, embedded_y)) adam = tf.train.AdamOptimizer() optimizer = adam.minimize(loss) return optimizer, loss, x, y
def Seq2Seq(output_dim, output_length, lookup_matrix, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0., **kwargs): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None,) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful, return_states=broadcast_state) for i in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs)) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True, input_length=shape[1]) for i in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs)) input = Input(batch_shape=(shape[0],shape[1])) print input.shape embedded_input = Embedding(input_dim=lookup_matrix.shape[0], output_dim=lookup_matrix.shape[1], weights=[lookup_matrix])(input) print embedded_input.shape input._keras_history[0].supports_masking = True encoded_seq = dense1(embedded_input) # print encoded_seq.shape encoded_seq = encoder(encoded_seq) print encoded_seq if broadcast_state: states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = [None] * 2 encoded_seq = dense2(encoded_seq) inputs = [input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] decoded_seq = decoder({'input': encoded_seq, 'ground_truth': inputs[1] if teacher_force else None, 'initial_readout': encoded_seq, 'states': states}) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder print "==========Input=========" print model.input print "==========Input=========" print model.output return model
EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=True) print('Embedding matrix completed.') # -------------- DNN goes after here --------------------- cinput = Input(shape=(context_maxlen,), dtype='int32') cembed = embedding_layer(cinput) clstm1 = Bidirectional(LSTM(100, return_sequences=True))(cembed) qinput = Input(shape=(question_maxlen,), dtype='int32') qembed = embedding_layer(qinput) qlstm1 = Bidirectional(LSTM(100, return_sequences=True))(qembed) cdecoder = RecurrentContainer(decode=True, output_length=context_maxlen, input_length=context_maxlen) cdecoder.add(AttentionDecoderCell(output_dim=100, hidden_dim=100)) clstm2 = cdecoder(clstm1) ch1 = Attention(qlstm1)(clstm1) clstm2 = Bidirectional(LSTM(100, return_sequences=True))(ch1) qh1 = Attention(clstm2)(qlstm1) qlstm2 = Bidirectional(LSTM(100, return_sequences=True))(qh1) ch2 = Attention(qlstm2)(clstm2) qh2 = Attention(ch2)(qlstm2) h = Merge([ch2, qh2], mode='concat') hlstm = Bidirectional(LSTM(100))(h) output1 = Dense(context_maxlen, activation='softmax')(hlstm) hmerge = Merge([hlstm, output1], mode='concat')