def test_advanced_cells(): rnn = RecurrentSequential() rnn.add(RHNCell(10, recurrence_depth=2, input_dim=5)) a = Input((7, 5)) b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) model.predict(np.random.random((12, 7, 5)))
def test_decode_unroll(): a = Input((5, )) rnn = RecurrentSequential(decode=True, output_length=7, unroll=True) rnn.add(LSTMCell(10, input_dim=5)) rnn.add(LSTMCell(10)) rnn.add(LSTMCell(10)) b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit((np.random.random((12, 5))), np.random.random((12, 7, 10))) model.predict(np.random.random((12, 5)))
def test_readout_state_sync(): a = Input((7, 5)) rnn = RecurrentSequential(readout=True, state_sync=True) rnn.add(LSTMCell(5, input_dim=5)) rnn.add(LSTMCell(5)) rnn.add(LSTMCell(5)) b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit((np.random.random((12, 7, 5))), np.random.random((12, 5))) model.predict(np.random.random((12, 7, 5)))
def test_state_sync_unroll(): rnn = RecurrentSequential(state_sync=True, unroll=True) rnn.add(LSTMCell(10, input_dim=5)) rnn.add(LSTMCell(10)) rnn.add(LSTMCell(10)) a = Input((7, 5)) b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) model.predict(np.random.random((12, 7, 5)))
def test_rnn_cell_from_model(): x = Input((5, )) h_tm1 = Input((10, )) h = add([Dense(10)(x), Dense(10, use_bias=False)(h_tm1)]) h = Activation('tanh')(h) cell_model = Model([x, h_tm1], [h, h]) rnn_cell = RNNCellFromModel(cell_model) rnn = RecurrentSequential() rnn.add(rnn_cell) a = Input((7, 5)) b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit((np.random.random((32, 7, 5))), np.random.random((32, 10))) model.predict(np.zeros((32, 7, 5)))
def test_state_initializer(): rnn = RecurrentSequential(state_initializer='random_normal') rnn.add(LSTMCell(7, input_dim=5)) rnn.add(SimpleRNNCell(8)) rnn.add(GRUCell(10)) a = Input((7, 5)) b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) model.predict(np.random.random((12, 7, 5)))
def test_readout_unroll(): a = Input((7, 5)) rnn = RecurrentSequential(readout=True, unroll=True) rnn.add(LSTMCell(7, input_dim=5)) rnn.add(SimpleRNNCell(8)) rnn.add(GRUCell(5)) b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit((np.random.random((12, 7, 5))), np.random.random((12, 5))) model.predict(np.random.random((12, 7, 5)))
def test_state_initializer_as_list(): rnn = RecurrentSequential( state_initializer=['random_normal', 'glorot_uniform']) rnn.add(LSTMCell(7, batch_input_shape=(12, 5))) rnn.add(SimpleRNNCell(8)) rnn.add(GRUCell(10)) a = Input((7, 5)) b = rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) model.predict(np.random.random((12, 7, 5)))
def test_serialisation(): rnn = RecurrentSequential() rnn.add(LSTMCell(output_dim=7, input_dim=5)) rnn.add(SimpleRNNCell(8)) rnn.add(GRUCell(10)) rnn_config = rnn.get_config() recovered_rnn = RecurrentSequential.from_config(rnn_config) a = Input((7, 5)) b = recovered_rnn(a) model = Model(a, b) model.compile(loss='mse', optimizer='sgd') model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) model.predict(np.random.random((12, 7, 5)))
def test_teacher_force(): a = Input((7, 5)) rnn = RecurrentSequential(readout=True, teacher_force=True) rnn.add(LSTMCell(7, input_dim=5)) rnn.add(SimpleRNNCell(8)) rnn.add(GRUCell(5)) ground_truth = Input((7, 5)) b = rnn(a, ground_truth=ground_truth) model = Model([a, ground_truth], b) model.compile(loss='mse', optimizer='sgd') model.fit([np.random.random((12, 7, 5)), np.random.random((12, 7, 5))], np.random.random((12, 5))) model.predict([np.random.random((12, 7, 5))] * 2)
def encoder_init(input, postcshape, hidden_dim, depth, dropout=0, seq2seq=True, bidirectional=True, unroll=False, stateful=False, Encoder=None, global_name="", return_model=False): if Encoder == None: Encoder = [hidden_dim] * depth[0] else: if len(Encoder) < depth[0]: Encoder = Encoder + [hidden_dim] * (depth[0] - len(Encoder)) encoder = RecurrentSequential( unroll=unroll, stateful=stateful, # return_states=True, return_all_states=True, AllStateTransfer needs modification in the tensorflow backend return_sequences=True, name=global_name + 'encoder') encoder.add(LSTMCell(Encoder[0], batch_input_shape=postcshape[1:])) for k in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(Encoder[k])) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum', name=global_name + 'encoder') encoder.forward_layer.build(postcshape) encoder.backward_layer.build(postcshape) # patch encoder.layer = encoder.forward_layer if return_model: enc_input = Input(shape=postcshape[1:], name='encoder_input') encoded_out = encoder(enc_input) encoder_model = Model(inputs=[enc_input], outputs=[encoded_out]) return encoder_model(input) return encoder(input)
def Seq2SeqtableQA(row_maxlen, question_maxlen, answer_maxlen, len_dic, hidden_dim, batch_size, depth=(1, 1), dropout=0.0, unroll=False, stateful=False): # def Seq2SeqtableQA(output_dim, output_length, hidden_dim=None, input_shape=None, # batch_size=None, batch_input_shape=None, input_dim=None, # input_length=None, depth=1, dropout=0.0, unroll=False, # stateful=False): ''' Based on SimpleSeq2Seq from https://github.com/farizrahman4u/seq2seq/blob/master/seq2seq/models.py ''' # input placeholders table = Input((row_maxlen, len_dic)) question = Input((question_maxlen, len_dic)) # table encoder table_encoder = RecurrentSequential(unroll=unroll, stateful=stateful) # table_encoder.add(Embedding(input_dim=len_dic, # output_dim=hidden_dim, # input_length=row_maxlen, # # weights = [embedding_matrix], # mask_zero=True, # trainable=False)) table_encoder.add( LSTMCell(hidden_dim, batch_input_shape=(row_maxlen, len_dic))) for _ in range(1, depth[0]): table_encoder.add(Dropout(dropout)) table_encoder.add(LSTMCell(hidden_dim)) table_encoded = table_encoder(table) # question encoder question_encoder = RecurrentSequential(unroll=unroll, stateful=stateful) question_encoder.add( LSTMCell(hidden_dim, batch_input_shape=(question_maxlen, len_dic))) for _ in range(1, depth[0]): question_encoder.add(Dropout(dropout)) question_encoder.add(LSTMCell(hidden_dim)) question_encoded = question_encoder(question) # match table and question match = dot([table_encoded, question_encoded], axes=(1, 1)) # match = Activation('softmax')(match) # answer decoder answer_decoder = RecurrentSequential(unroll=unroll, stateful=stateful, decode=True, output_length=answer_maxlen) answer_decoder.add(Dropout(dropout, input_shape=(batch_size, hidden_dim))) if depth[1] == 1: answer_decoder.add(LSTMCell(len_dic)) else: answer_decoder.add(LSTMCell(hidden_dim)) for _ in range(depth[1] - 2): answer_decoder.add(Dropout(dropout)) answer_decoder.add(LSTMCell(hidden_dim)) answer_decoder.add(Dropout(dropout)) answer_decoder.add(LSTMCell(len_dic)) answer_decoded = answer_decoder(match) return Model(inputs=[table, question], outputs=answer_decoded)
def decoder_init(input, shape, input_dim, hidden_dim, output_dim, output_length, depth, dropout=0, bidirectional=True, unroll=False, stateful=False, Decoder=None, AttentionCell=AltAttentionDecoderCell, global_name="", return_model=False): if Decoder == None: Decoder = [hidden_dim] * depth[1] else: if len(Decoder) < depth[1]: Decoder = Decoder + [hidden_dim] * (depth[1] - len(Decoder)) decoder = RecurrentSequential(decode=True, output_length=output_length, unroll=unroll, stateful=stateful, name='decoder') decoder.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], input_dim))) if depth[1] == 1: decoder.add(AttentionCell) else: decoder.add(AttentionCell) for k in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=Decoder[k + 1], hidden_dim=Decoder[k])) decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=Decoder[-1])) return decoder(input)
def AttentionSeq2Seq( output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, input_dim=None, hidden_dim=None, depth=1, bidirectional=True, unroll=False, stateful=False, dropout=0.0, ): ''' [1] Sequence to Sequence Learning with Neural Networks [2] Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation [3] Neural Machine Translation by Jointly Learning to Align and Translate [4] A Neural Conversational Model This is an attention Seq2seq model based on [3]. Here, there is a soft allignment between the input and output sequence elements. A bidirection encoder is used by default. There is no hidden state transfer in this model. The math: Encoder: X = Input Sequence of length m. H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True, so H is a sequence of vectors of length m. Decoder: y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c) and v (called the context vector) is a weighted sum over H: v(i) = sigma(j = 0 to m-1) alpha(i, j) * H(j) The weight alpha[i, j] for each hj is computed as follows: energy = a(s(i-1), H(j)) alpha = softmax(energy) Where a is a feed forward network. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim # shape:[batch, max_encoder_length, input_dim] _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True # 1.定义encoder encoder = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder.add(LSTMCell( hidden_dim, batch_input_shape=(shape[0], shape[2]))) # shape[0]:batch, shape[2]:input_dim for _ in range(1, depth[0]): # 所谓的depth,就是lstm堆叠的层数 encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build( shape) # [batch, max_encoder_length, input_dim] encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer # 2.encode # _input:[batch, max_encoder_length, input_dim] # encoded: [batch, max_encoder_length, hidden] encoded = encoder(_input) # 3.定义decoder decoder = RecurrentSequential( decode=True, output_length=output_length, unroll=unroll, # False stateful=stateful) # False decoder.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) # attention decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) if depth[1] != 1: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) # 4. decode decoded = decoder(encoded) inputs = [_input] model = Model(inputs=inputs, outputs=decoded) return model
def Pointer(output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, input_dim=None, hidden_dim=None, depth=1, bidirectional=True, unroll=False, stateful=False, dropout=0.0,): ''' This is an attention Seq2seq model based on [3]. Here, there is a soft allignment between the input and output sequence elements. A bidirection encoder is used by default. There is no hidden state transfer in this model. The math: Encoder: X = Input Sequence of length m. H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True, so H is a sequence of vectors of length m. Decoder: y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c) and v (called the context vector) is a weighted sum over H: v(i) = sigma(j = 0 to m-1) alpha(i, j) * H(j) The weight alpha[i, j] for each hj is computed as follows: energy = a(s(i-1), H(j)) alpha = softmax(energy) Where a is a feed forward networ k. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim # print shape _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoder = RecurrentSequential(unroll=unroll, stateful=False, return_sequences=True) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build(shape) encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer encoded = encoder(_input) decoder = RecurrentSequential(decode=True, output_length=output_length, unroll=unroll, stateful=stateful,return_sequences=True) # decoder.add(Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) # if depth[1] == 1: # decoder.add(PointerDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) # decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) decoder.add(PointerDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim,batch_input_shape=(shape[0], shape[1], hidden_dim))) # decoder.add(TimeDistributed(Activation('softmax'))) # decoder.add(TimeDistributed(Activation('softmax'))) # output = TimeDistributed(Dense(output_dim, activation='softmax')) # output = TimeDistributed(Activation='softmax') # output = TimeDistributed(Dense(output_dim, activation='softmax')) # Dense(class_count, activation='softmax')(x) # decoder.add(Dense(output_dim, activation='softmax')(x)) # else: # decoder.add(PointerDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) # for _ in range(depth[1] - 2): # decoder.add(Dropout(dropout)) # decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) # decoder.add(Dropout(dropout)) # decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) # Softmax is outside # inputs = [_input] # decoded = decoder(encoded) # outputs = output(decoded) # model = Model(inputs, outputs) # return model # Softmax is inside cell inputs = [_input] decoded = decoder(encoded) model = Model(inputs, decoded) return model
def _make_model(self): """Constructs ContraVAE model. Returns: Nothing. """ # Load embedding in Embedding layer logger.info("Making model...") embedding_matrix = self._load_embedding() embedding_layer = Embedding( self.num_words + 1, self.config.embedding_dim, weights=[embedding_matrix], input_length=self.config.max_sequence_length, trainable=False) # Q(z|X,c) -- encoder # Embedded sequence input sequence_inputs = Input(batch_shape=(self.config.batch_size, self.config.max_sequence_length), dtype='int32') embedded_sequence_inputs = embedding_layer(sequence_inputs) # Merge with score inputs score_inputs = Input(batch_shape=(self.config.batch_size, 1)) score_inputs_repeated = RepeatVector( self.config.max_sequence_length)(score_inputs) last_layer = concatenate( [embedded_sequence_inputs, score_inputs_repeated], axis=2) # LSTM layers for dim in self.config.encoder_lstm_dims[:-1]: last_layer = LSTM(dim, return_sequences=True)(last_layer) last_layer = LSTM(self.config.encoder_lstm_dims[-1], return_sequences=False)(last_layer) # Mean and std of z z_mean = Dense(self.config.latent_dim, activation='tanh')(last_layer) z_log_sigma = Dense(self.config.latent_dim, activation='tanh')(last_layer) # Sample z ~ Q(z|X,c) def sampling(args): z_mean, z_log_sigma = args epsilon = K.random_normal_variable(shape=(self.config.batch_size, self.config.latent_dim), mean=0., scale=1.) return z_mean + K.exp(z_log_sigma / 2.) * epsilon z = Lambda(sampling)([z_mean, z_log_sigma]) # Second score inputs - at training time this is simply equal to # score_inputs; at sampling time this could vary. score_inputs2 = Input(batch_shape=(self.config.batch_size, 1)) z_c = concatenate([z, score_inputs2], axis=1) # Repeat z_c so every timestep has access to it #z_c_repeated = RepeatVector(self.config.max_sequence_length)(z_c) # P(X|z,c) -- decoder. rnn = RecurrentSequential( decode=True, output_length=self.config.max_sequence_length) rnn.add( LSTMCell(self.config.decoder_lstm_dims[0], input_dim=self.config.latent_dim + 1)) for dim in self.config.decoder_lstm_dims[1:]: rnn.add(LSTMCell(dim)) decoder_out = TimeDistributed( Dense(self.num_words + 1, activation='tanh')) # Decoder output # x_decoded = rnn(z_c_repeated, ground_truth=sequence_inputs) h_decoded = rnn(z_c) x_decoded = decoder_out(h_decoded) # Construct models # VAE vae = Model([sequence_inputs, score_inputs, score_inputs2], x_decoded) # Encoder encoder = Model([sequence_inputs, score_inputs], z_mean) # Generator generator_z_inputs = Input(batch_shape=(self.config.batch_size, self.config.latent_dim)) generator_z_c = concatenate([generator_z_inputs, score_inputs2], axis=1) generator_h_decoded = rnn(generator_z_c) generator_x_decoded = decoder_out(generator_h_decoded) generator = Model([generator_z_inputs, score_inputs2], generator_x_decoded) # Define loss function kl_weight = self.config.kl_weight def recon_loss(y_true, y_pred): """E[log P(X|z,y)]. """ recon = K.mean(K.sparse_categorical_crossentropy(output=y_pred, target=y_true, from_logits=True), axis=1) return recon def kl_loss(y_true, y_pred): """D_KL(Q(z|X,y) || P(z|X)); calculate in closed form as both dist. are Gaussian. """ kl = 0.5 * K.mean( K.exp(z_log_sigma) + K.square(z_mean) - 1. - z_log_sigma, axis=1) kl = kl * kl_weight return kl def vae_loss(y_true, y_pred): """Calculate loss = reconstruction loss + KL loss. """ recon = recon_loss(y_true, y_pred) kl = kl_loss(y_true, y_pred) return recon + kl # Compile model vae.compile(loss=vae_loss, optimizer=self.config.optimizer, metrics=[recon_loss, kl_loss]) self.vae = vae self.encoder = encoder self.generator = generator logger.info("Done making model.")
def AttentionSeq2Seq( output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, is_embedding=True, embedding_dim=None, n_tokens=1000, input_dim=None, hidden_dim=None, depth=1, bidirectional=False, unroll=False, stateful=False, dropout=0.0, ): ''' This is an attention Seq2seq model based on [3]. Here, there is a soft allignment between the input and output sequence elements. A bidirection encoder is used by default. There is no hidden state transfer in this model. The math: Encoder: X = Input Sequence of length m. H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True, so H is a sequence of vectors of length m. Decoder: y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c) and v (called the context vector) is a weighted sum over H: v(i) = sigma(j = 0 to m-1) alpha(i, j) * H(j) The weight alpha[i, j] for each hj is computed as follows: energy = a(s(i-1), H(j)) alpha = softmax(energy) Where a is a feed forward network. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) elif input_length: if is_embedding == False and n_tokens > 0: pass else: raise TypeError if hidden_dim is None: hidden_dim = output_dim if is_embedding: _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True else: i = Input(shape=(input_length, ), name='sentence_input', dtype='int32') i._keras_history[0].supports_masking = True if embedding_dim is None: embedding_dim = hidden_dim _input = Embedding(input_dim=n_tokens, output_dim=embedding_dim, input_length=input_length)(i) shape = (batch_size, ) + (input_length, ) + (embedding_dim, ) encoder = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build(shape) encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer decoder = RecurrentSequential(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoder.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) if depth[1] == 1: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) else: decoder.add( AttentionDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) x = encoder(_input) decoder_outputs = decoder(x) output = TimeDistributed(Dense(n_tokens, activation='softmax'))(decoder_outputs) if is_embedding: return Model(_input, output) else: return Model(i, output)
def Seq2Seq(output_dim, output_length, hidden_dim=None, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, is_embedding=True, embedding_dim=None, n_tokens=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=False, teacher_force=False, peek=False, dropout=0.): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) elif input_length: if is_embedding == False and n_tokens > 0: pass else: raise TypeError if hidden_dim is None: hidden_dim = output_dim if is_embedding: _input = Input(batch_shape=shape) else: i = Input(shape=(input_length, ), name='sentence_input', dtype='int32') if embedding_dim is None: embedding_dim = hidden_dim _input = Embedding(input_dim=n_tokens, output_dim=embedding_dim, mask_zero=True, input_length=input_length)(i) shape = (batch_size, ) + (input_length, ) + (embedding_dim, ) encoder = RecurrentSequential(readout=True, unroll=unroll, stateful=stateful, return_states=broadcast_state) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) # dense2 = Dense(output_dim) decoder = RecurrentSequential(readout='add' if peek else 'readout_only', decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim))) if depth[1] == 1: #decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim))) decoder.add(LSTMCell(output_dim)) else: #decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim))) decoder.add(LSTMCell(hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) #decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim))) decoder.add(LSTMCell(hidden_dim)) decoder.add(Dropout(dropout)) #decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim))) decoder.add(LSTMCell(output_dim)) x = encoder(_input) if broadcast_state: assert type(x) is list states = x[-2:] x = x[0] else: states = None decoder_outputs = decoder(x, initial_state=states, initial_readout=x) output = TimeDistributed(Dense(n_tokens, activation='softmax'))(decoder_outputs) if is_embedding: return Model(_input, output) else: return Model(i, output)
def paired_trimodal_model(output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, input_dim=None, hidden_dim=None, depth=1, bidirectional=True, unroll=False, stateful=False, dropout=0.0): """ One modal translates into two other modalities, no cycle involved The model has 1 encoder and 2 decoders """ if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True # encoder phase encoder = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]))) # encoder phase encoder_2 = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder_2.add( LSTMCell(hidden_dim, batch_input_shape=(shape[0], output_dim))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) encoder_2.add(Dropout(dropout)) encoder_2.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build(shape) encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer encoder_2 = Bidirectional(encoder_2, merge_mode='sum') encoder_2.forward_layer.build(shape) encoder_2.backward_layer.build(shape) # patch encoder_2.layer = encoder_2.forward_layer encoded_one = encoder(_input) # decoder phase decoder = RecurrentSequential(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoder.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) decoder_2 = RecurrentSequential(decode=True, output_length=input_length, unroll=unroll, stateful=stateful) decoder_2.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) if depth[1] == 1: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) else: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) if depth[1] == 1: decoder_2.add( AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim)) else: decoder_2.add( AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder_2.add(Dropout(dropout)) decoder_2.add( LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder_2.add(Dropout(dropout)) decoder_2.add( LSTMDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim)) inputs = [_input] decoded_one = decoder(encoded_one) encoded_two = encoder_2(decoded_one) decoded_two = decoder_2(encoded_two) return inputs, encoded_one, encoded_two, decoded_one, decoded_two
def Seq2Seq(output_dim, output_length, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) for _ in range(depth[0]): encoder.add( LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder = RecurrentSequential(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) for _ in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) encoded_seq = encoder(encoded_seq) if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] decoded_seq = decoder(encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def mctn_level2_model(input, output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, input_dim=None, hidden_dim=None, depth=1, bidirectional=True, unroll=False, stateful=False, dropout=0.0): """ Level 2 MCTN used for translation between the joint embedded of 2 modalities to the third one. Due to the lack of ground truth, no cycle phase happens """ if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise if hidden_dim is None: hidden_dim = output_dim encoder = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build(shape) encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer encoded = encoder(input) decoder = RecurrentSequential(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoder.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) if depth[1] == 1: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) else: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) inputs = [input] decoded = decoder(encoded) return inputs, encoded, decoded
def mctn_model(output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, input_dim=None, hidden_dim=None, depth=1, bidirectional=True, unroll=False, stateful=False, dropout=0, is_cycled=True): """ MCTN Model (by default with Cycle Consistency Loss) """ if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True # encoder phase encoder = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]))) # encoder.add(Dropout(dropout)) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build(shape) encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer encoded = encoder(_input) # decoder phase decoder = RecurrentSequential( decode=True, output_length=1, #output_length unroll=unroll, stateful=stateful) decoder.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) if depth[1] == 1: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) else: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) inputs = [_input] decoded_0 = decoder(encoded) decoded = Reshape((output_dim, ))(decoded_0) # cycle phase cycled_decoded = None if is_cycled: cycled_encoded = encoder(decoded_0) cycled_decoded = decoder(cycled_encoded) return inputs, encoded, decoded, cycled_decoded
def AttentionSeq2Seq( output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, input_dim=None, hidden_dim=None, depth=1, bidirectional=True, unroll=False, stateful=False, dropout=0.0, ): ''' This is an attention Seq2seq model based on [3]. Here, there is a soft allignment between the input and output sequence elements. A bidirection encoder is used by default. There is no hidden state transfer in this model. The math: Encoder: X = Input Sequence of length m. H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True, so H is a sequence of vectors of length m. Decoder: y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c) and v (called the context vector) is a weighted sum over H: v(i) = sigma(j = 0 to m-1) alpha(i, j) * H(j) The weight alpha[i, j] for each hj is computed as follows: energy = a(s(i-1), H(j)) alpha = softmax(energy) Where a is a feed forward network. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoder = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build(shape) encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer # encoded = encoder(_input) decoder = RecurrentSequential(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoder.add( Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) if depth[1] == 1: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) else: decoder.add( AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) # inputs = [_input] # decoded = decoder(encoded) # model = Model(inputs, decoded) return encoder, decoder
def seq2seq_model(x_train_1, x_train_2): #encoder S_inputs = Input(shape=(x_train_1.shape[1], x_train_1.shape[2])) # embeddings = Embedding(max_features, 128)(S_inputs) # embeddings = Position_Embedding()(S_inputs) # 增加Position_Embedding能轻微提高准确率 encoded = Attention(32, 32)([S_inputs, S_inputs, S_inputs]) # O_seq=Attention(16, 16)([O_seq, O_seq, O_seq]) # O_seq = GlobalAveragePooling1D()(O_seq) # O_seq = Dropout(dropout)(O_seq) # outputs = Dense(3, activation='softmax')(O_seq) #decoder decoder = RecurrentSequential( decode=True, output_length=1, # x_train_2.shape[1] unroll=False, stateful=False) decoder.add( Dropout(dropout, batch_input_shape=(None, x_train_1.shape[1], hidden_dim))) if depth[1] == 1: decoder.add( AttentionDecoderCell(output_dim=x_train_2.shape[2], hidden_dim=hidden_dim)) else: decoder.add( AttentionDecoderCell(output_dim=x_train_2.shape[2], hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=x_train_2.shape[2], hidden_dim=hidden_dim)) #regression model x = Attention(8, 16)([encoded, encoded, encoded]) x = GlobalAveragePooling1D()(x) x = Dropout(dropout)(x) regr_outputs = Dense(3, activation='softmax')(x) decoded = decoder(encoded) decoded = Reshape((x_train_2.shape[2], ))(decoded) model = Model(inputs=S_inputs, outputs=[decoded, regr_outputs]) print(model.summary()) # try using different optimizers and different optimizer configs model.compile(loss=['mse', 'categorical_crossentropy'], loss_weights=[1, 10], optimizer='adam', metrics=['categorical_accuracy']) return model
def AttentionSeqtoSeq(output_dim, output_length, batch_input_shape=None, batch_size=None, input_shape=None, input_length=None, input_dim=None, hidden_dim=None, depth=1, bidirectional=True, unroll=False, stateful=False, dropout=0.0, ): if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoder = RecurrentSequential(unroll=unroll, stateful=stateful, return_sequences=True) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]))) for _ in range(1, depth[0]): #encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) if bidirectional: encoder = Bidirectional(encoder, merge_mode='sum') encoder.forward_layer.build(shape) encoder.backward_layer.build(shape) # patch encoder.layer = encoder.forward_layer encoded = encoder(_input) #decoder_input = Input(batch_shape = encoded.shape)#新加的 decoder = RecurrentSequential(decode=True, output_length=output_length, unroll=unroll, stateful=stateful) decoder.add(Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim))) if depth[1] == 1: decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) else: decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): #decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) #decoder.add(Dropout(dropout)) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim)) decoder.add(Dense(output_dim*2)) decoder.add(Dense(output_dim,activation = "softmax")) inputs = [_input] decoded = decoder(encoded) model = Model(inputs, decoded) return model
def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, input_shape=None, batch_size=None, batch_input_shape=None, input_dim=None, input_length=None, is_embedding=True, embedding_dim=None, n_tokens=None, depth=1, dropout=0.0, unroll=False, stateful=False): ''' Simple model for sequence to sequence learning. The encoder encodes the input sequence to vector (called context vector) The decoder decodes the context vector in to a sequence of vectors. There is no one on one relation between the input and output sequence elements. The input sequence and output sequence may differ in length. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. dropout : Dropout probability in between layers. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) elif input_length: if is_embedding == False and n_tokens > 0: pass else: raise TypeError if hidden_dim is None: hidden_dim = output_dim if is_embedding: _input = Input(batch_shape=shape) else: i = Input(shape=(input_length, ), name='sentence_input', dtype='int32') if embedding_dim is None: embedding_dim = hidden_dim _input = Embedding(input_dim=n_tokens, output_dim=embedding_dim, mask_zero=True, input_length=input_length)(i) shape = (batch_size, ) + (input_length, ) + (embedding_dim, ) encoder = RecurrentSequential(unroll=unroll, stateful=stateful) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) decoder = RecurrentSequential(unroll=unroll, stateful=stateful, decode=True, output_length=output_length) decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim))) if depth[1] == 1: decoder.add(LSTMCell(output_dim)) else: decoder.add(LSTMCell(hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add(LSTMCell(hidden_dim)) decoder.add(Dropout(dropout)) decoder.add(LSTMCell(output_dim)) x = encoder(_input) decoder_outputs = decoder(x) output = TimeDistributed(Dense(n_tokens, activation='softmax'))(decoder_outputs) if is_embedding: return Model(_input, output) else: return Model(i, output)
def Seq2Seq(output_dim, output_length, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if isinstance(depth, int): depth = (depth, depth) # depth是整数时,相当于编码器和解码器都有相同的层数 if batch_input_shape: shape = batch_input_shape # 批输入的shape作为模型输入的shape elif input_shape: shape = (batch_size,) + input_shape # 不指定batch input shape,则用批大小拼接input shape,如batch size为32,input为768,拼接后就是(32,768) # input shape 必须是一个元组 elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) # 一般情况下通用的shape(批大小,输入序列长度,输入维度) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim # 隐藏层的维度如果也是None?那代表什么呢 encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) ''' 参数: readout:是否额外将输出进行处理 选项有add(True),multiply,average,maximum等 state_sync:状态是否在内部传播,源码中对initial_states的处理不一样,对每个cell的state都进行传播到下一个batch stateful:keras特性,在不同的batch之间传递cells的状态,而不是仅仅在cell之间传递状态,即stateful 在stateful = True 时,我们要在fit中手动使得shuffle = False。随后,在X[i](表示输入矩阵中第 i个sample)这个小序列训练完之后,Keras会将将训练完的记忆参数传递给X[i+bs](表示第i+bs个sample), 作为其初始的记忆参数。 unroll:keras特性,将LSTM网络展开,也就是原本的时序序列直接展开成多个cell拼接,可以加快速度,但是占用更多内存 ''' for _ in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) # 根据depth[0]指定编码器深度 dense1 = TimeDistributed(Dense(hidden_dim)) ''' # dence1: # 使用TimeDistributed层对1个batch中样本(input_length,input_dim)每个向量都进行Dense操作,在整个length长度下,这个样本 # 都共享TimeDistributed层的权重,即输出后变成(batch_size,input_length,hidden_dim) ''' dense1.supports_masking = True dense2 = Dense(output_dim) ''' dence2: 处理从encoder之后的编码,整型为output_dim,再送给decoder ''' decoder = RecurrentSequential(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) ''' 参数: teaching force :它每次不使用上一个state的输出作为下一个state的输入,而是直接 使用训练数据的标准答案(ground truth)的对应上一项作为下一个state的输入。 结合beam search和计划抽样,使用一个概率p来决定使用teaching还是free training,随着训练epoch增加, 概率p也会减少,相当于逐步的减小teaching的采样频率,确保模型既能快速学习,又有泛化能力 ''' for _ in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) # 根据depth[1]指定解码器的深度 _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) # 对输入数据先通过TimeDistributed层,处理成hidden_dim的向量维度 encoded_seq = encoder(encoded_seq) # 再通过encoder编码 # 以下是一些选项的处理,是否广播状态,是否teaching模式等 if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] # 编码之后的后续处理 # 解码,initial_state是否接受从编码器传递过来的状态, decoded_seq = decoder(encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) seq2seq_model = Model(inputs, decoded_seq) # 整个模型就是从输入到解码seq,可以将编码器单独拿出来,使用其中的编码 # 另外,模型处理的实时新闻序列到股价波动序列,如果要将休盘期内新闻信息也纳入训练, # 则需要共享编码器和解码权重,并增加新的Flatten和Dence层,将解码器输出序列视为波动编码,再进入Dence输出标量 # 涉及到,在RecurrentSequential后增加Sequencial序列 seq2seq_model.encoder = encoder seq2seq_model.decoder = decoder decoded_vec = Flatten()(decoded_seq) decoded_vec = Dense(1, activation='tanh')(decoded_vec) seq2vec_model = Model(inputs, [decoded_seq, decoded_vec]) # 最终模型有1个输入,2个输出 return seq2vec_model
def Seq2Seq(output_dim, output_length, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) for _ in range(depth[0]): encoder.add( LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder = RecurrentSequential(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) for _ in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) encoded_seq = encoder(encoded_seq) if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] decoded_seq = decoder(encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def Seq2Seq(output_dim, output_length, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' ''' Below block is used for computing the shape - batch_input_shape=(batch_size, timesteps, data_dim) batch_size creates a statefull LSTM while None makes it unstateful ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim ''' Sequential model :- https://keras.io/layers/recurrent/ unroll - Nothing important return_state - Boolean. Whether to return the last state in addition to the output. ''' encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) for _ in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) ''' TimeDistributed :- https://keras.io/layers/wrappers/ ''' dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) ''' Readout lets you feed the output of your RNN from the previous time step back to the current time step. ''' decoder = RecurrentSequential(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) for _ in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) encoded_seq = encoder(encoded_seq) if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = Trueoutput_dim inputs += [truth_tensor] decoded_seq = decoder(encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, input_shape=None, batch_size=None, batch_input_shape=None, input_dim=None, input_length=None, depth=1, dropout=0.0, unroll=False, stateful=False): ''' Simple model for sequence to sequence learning. The encoder encodes the input sequence to vector (called context vector) The decoder decodes the context vector in to a sequence of vectors. There is no one on one relation between the input and output sequence elements. The input sequence and output sequence may differ in length. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. dropout : Dropout probability in between layers. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim encoder = RecurrentSequential(unroll=unroll, stateful=stateful) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) decoder = RecurrentSequential(unroll=unroll, stateful=stateful, decode=True, output_length=output_length) decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim))) if depth[1] == 1: decoder.add(LSTMCell(output_dim)) else: decoder.add(LSTMCell(hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add(LSTMCell(hidden_dim)) decoder.add(Dropout(dropout)) decoder.add(LSTMCell(output_dim)) return encoder, decoder
def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, input_shape=None, batch_size=None, batch_input_shape=None, input_dim=None, input_length=None, depth=1, dropout=0.0, unroll=False, stateful=False): ''' Simple model for sequence to sequence learning. The encoder encodes the input sequence to vector (called context vector) The decoder decodes the context vector in to a sequence of vectors. There is no one on one relation between the input and output sequence elements. The input sequence and output sequence may differ in length. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. dropout : Dropout probability in between layers. ''' ''' Below block is trying to construct the shape tuple which is (batch_size,input_dim,input_length) batch_shape(batch_size,input_dim,input_length) # TODO: Add more information about the statefulness of the LSTM and the input_shape expected by the same. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim ''' batch_input_shape=(batch_size, timesteps, data_dim) for lstm arguments :- https://keras.io/layers/recurrent/#lstm >Specifically hidden_dim = units and it is a Positive integer representing dimensionality of the output space. ''' encoder = RecurrentSequential(unroll=unroll, stateful=stateful) encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1]))) for _ in range(1, depth[0]): encoder.add(Dropout(dropout)) encoder.add(LSTMCell(hidden_dim)) decoder = RecurrentSequential(unroll=unroll, stateful=stateful, decode=True, output_length=output_length) ''' hidden_dim is here equal to the output dim of the encoder thing. Hence should not have shape error. Also the second arguments is the expected dimension of the input. See below comment ''' decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim))) if depth[1] == 1: decoder.add(LSTMCell(output_dim)) else: decoder.add(LSTMCell(hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add(LSTMCell(hidden_dim)) decoder.add(Dropout(dropout)) decoder.add(LSTMCell(output_dim)) ''' This returns a tensor >batch_shape(batch_size,input_dim,input_length) https://keras.io/layers/recurrent/ :- Note on using statefulness in RNNs ''' _input = Input(batch_shape=shape) x = encoder(_input) output = decoder(x) ''' This Model thing; takes _input as the input and applies everything to reach output. This model will include all layers required in the computation of output given _input. So one can think of them as Theano function. ''' return Model(_input, output)