def build_models(pad_length=config.padding, n_chars=input_vocab.size(), n_labels=output_vocab.size(), embedding_learnable=False, encoder_units=32, decoder_units=32, trainable=True, return_probabilities=False): """Build the model""" input_ = Input(shape=(pad_length,), dtype='float32') input_embed = Embedding(n_chars, n_chars, input_length=pad_length, trainable=embedding_learnable, weights=[np.eye(n_chars)], name='OneHot')(input_) rnn_encoded = Bidirectional(CuDNNLSTM(encoder_units, return_sequences=True), name='bidirectional_1', merge_mode='concat', trainable=trainable)(input_embed) y_prob = AttentionDecoder(decoder_units, name='attention_decoder_prob', output_dim=n_labels, return_probabilities=True, trainable=False)(rnn_encoded) y_pred = AttentionDecoder(decoder_units, name='attention_decoder_1', output_dim=n_labels, return_probabilities=return_probabilities, trainable=trainable)(rnn_encoded) model = Model(inputs=input_, outputs=y_pred) model.summary() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) prob_model = Model(inputs=input_, outputs=y_prob) return model, prob_model
def _getEncoderDecoderModel(): model = Sequential() model.add(LSTM(32, input_shape=(max_len, num_chars), return_sequences=True)) model.add(AttentionDecoder(32, num_chars)) model.add(Dense(num_chars, activation="softmax")) return model
def __init__(self, vocab_size, wordvec_size, hidden_size): args = vocab_size, wordvec_size, hidden_size self.encoder = AttentionEncoder(*args) self.decoder = AttentionDecoder(*args) self.softmax = TimeSoftmaxWithLoss() self.params = self.encoder.params + self.decoder.params self.grads = self.encoder.grads + self.decoder.grads
def create_network(nb_attention_cells=1): model = Sequential() model.add( AttentionDecoder(nb_attention_cells, n_features, input_shape=(width, height))) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) return model
def define_model(src_vocab, tar_vocab, src_timesteps, tar_timesteps, n_units): model = Sequential() model.add( Embedding(src_vocab, n_units, input_length=src_timesteps, mask_zero=True)) model.add(LSTM(n_units)) model.add(RepeatVector(tar_timesteps)) model.add(AttentionDecoder(n_units, n_features)) return model
def one_hot_encode(sequence, n_unique): encoding = list() for value in sequence: vector = [0 for _ in range(n_unique)] vector[value] = 1 encoding.append(vector) return array(encoding) # decode a one hot encoded string def one_hot_decode(encoded_seq): return [argmax(vector) for vector in encoded_seq] # prepare data for the LSTM def get_pair(n_in, n_out, cardinality): # generate random sequence sequence_in = generate_sequence(n_in, cardinality) sequence_out = sequence_in[:n_out] + [0 for _ in range(n_in-n_out)] # one hot encode X = one_hot_encode(sequence_in, cardinality) y = one_hot_encode(sequence_out, cardinality) # reshape as 3D X = X.reshape((1, X.shape[0], X.shape[1])) y = y.reshape((1, y.shape[0], y.shape[1])) return X,y # configure problem n_features = 50 n_timesteps_in = 5 n_timesteps_out = 2 # define model model = Sequential() model.add(LSTM(150, input_shape=(n_timesteps_in, n_features), return_sequences=True)) model.add(AttentionDecoder(150, n_features)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) # train LSTM for epoch in range(5000): # generate new random sequence X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features) # fit model for one epoch on this sequence model.fit(X, y, epochs=1, verbose=2) # evaluate LSTM total, correct = 100, 0 for _ in range(total): X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features) yhat = model.predict(X, verbose=0) if array_equal(one_hot_decode(y[0]), one_hot_decode(yhat[0])): correct += 1 print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0)) # spot check some examples for _ in range(10): X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features) yhat = model.predict(X, verbose=0) print('Expected:', one_hot_decode(y[0]), 'Predicted', one_hot_decode(yhat[0]))
def attention_model(n_timesteps_in, n_features): model = Sequential() model.add( LSTM(150, input_shape=(n_timesteps_in, n_features), return_sequences=True)) model.add(AttentionDecoder(150, n_features)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def attention(lstm_cells, n_timesteps_in, n_features): model = Sequential(name="AttentionLSTM") model.add( LSTM(lstm_cells, input_shape=(n_timesteps_in, n_features), return_sequences=True)) model.add(AttentionDecoder(lstm_cells, n_features)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) print(model.summary()) return (model)
def create_model(X_vocab_len, X_max_len, y_vocab_len, y_max_len, hidden_size, num_layers, embedding_matrix): def smart_merge(vectors, **kwargs): return vectors[0] if len(vectors) == 1 else merge(vectors, **kwargs) root_word_in = Input(shape=(X_max_len, ), dtype='int32') tag_word_in = Input(shape=(y_max_len, ), dtype='int32') # print root_word_in # this embedding encodes input sequence into a sequence of # dense X_vocab_len-dimensional vectors. emb_layer = Embedding(X_vocab_len, EMBEDDING_DIM, weights=[embedding_matrix], input_length=X_max_len, mask_zero=True, trainable=True) # DEFINITION of layer hindi_word_embedding = emb_layer(root_word_in) # POSITION of layer #root_word_embedding = Embedding(dim_embedding, 64, # input_length=dim_root_word_in, # W_constraint=maxnorm(2))(root_word_in) ''' # A lstm will transform the vector sequence into a single vector, # containing information about the entire sequence LtoR_LSTM = LSTM(512, return_sequences=False) LtoR_LSTM_vector = LtoR_LSTM(hindi_word_embedding) RtoL_LSTM = LSTM(512, return_sequences=False, go_backwards=True) RtoL_LSTM_vector = RtoL_LSTM(hindi_word_embedding) BidireLSTM_vector = [LtoR_LSTM_vector] BidireLSTM_vector.append(RtoL_LSTM_vector) BidireLSTM_vector= smart_merge(BidireLSTM_vector, mode='concat') ''' BidireLSTM_vector1 = Bidirectional( LSTM(HIDDEN_DIM, return_sequences=True, dropout=0, kernel_regularizer=regularizers.l2(0.1)))(hindi_word_embedding) #BidireLSTM_vector2 = LSTM(512, return_sequences=True, dropout=0.5)(BidireLSTM_vector1) Attention = AttentionDecoder(HIDDEN_DIM, X_vocab_len)(BidireLSTM_vector1) all_inputs = [root_word_in] model = Model(input=all_inputs, output=Attention) adam = Adam() model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) return model
def attention_model(self): model = Sequential() model.add( LSTM(150, input_shape=(self.n_timesteps_in, self.n_features), return_sequences=True)) model.add(AttentionDecoder(150, self.n_features)) nadam = Nadam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) model.compile(loss=cos_distance, optimizer=nadam, metrics=['acc']) return model
def seq2seq_model(n_symbols, embedding_weights, x_train, y_train, x_test, y_test, idx_to_word, new_model = False): if (not new_model) and os.path.exists(seq2seq_file): logging.info("Reading seq2seq_model from {}".format(seq2seq_file)) en_de_model = load_model(seq2seq_file, custom_objects={'AttentionDecoder':AttentionDecoder}) else: logging.info("Building new seq2seq_model...") inputs = Input(shape=(maxlen,)) out = Embedding(input_dim=n_symbols, output_dim=w2v_dim, input_length=maxlen, mask_zero = True, weights = [embedding_weights], trainable = True, name="Embedding_1")(inputs) out = Bidirectional(LSTM(c_dim,return_sequences = True), merge_mode = 'sum')(out) out = AttentionDecoder(decode_dim, n_symbols)(out) out = Dense(n_symbols, activation="relu", name="Dense_1")(out) #en_de_model.add(RepeatVector(maxlen)) #en_de_model.add(TimeDistributed(Dense(maxlen, activation="linear"))) out = Activation('softmax', name = "Activation_1")(out) en_de_model = Model(inputs = inputs, outputs=out) layer = en_de_model.get_layer(name = "Embedding_1") print(layer.input_shape," ", layer.output_shape) logging.info('Compiling...') time_start = time.time() en_de_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) time_end = time.time() logging.info('Cost time of compilation is:%fsecond!' % (time_end - time_start)) logging.info('Start training...') for iter_num in range(loop): en_de_model.fit(x_train, y_train, batch_size=seq2seq_batch_size, epochs = seq2seq_epochs, verbose = 2) out_predicts = en_de_model.predict(x_test) for i_idx, out_predict in enumerate(out_predicts): predict_sequence = [] ground_truth = [] for predict_vector in out_predict: next_index = np.argmax(predict_vector) if next_index != 0: next_token = idx_to_word[next_index] predict_sequence.append(next_token) next_index = np.where(y_test[i_idx] == True) if next_index[0].shape[0] != 0: next_token = idx_to_word[next_index[0][0]] ground_truth.append(next_token) print('Target output:', str.join(ground_truth)) print('Predict output:', str.join(predict_sequence)) logging.info('Current iter_num is:%d' % iter_num) en_de_model.save(seq2seq_file) en_de_model.save_weights(seq2seq_weights_file) return en_de_model
def build_attention_model(n_timesteps_in, n_features): model = Sequential() model.add( LSTM(150, input_shape=(n_timesteps_in, n_features), return_sequences=True)) model.add(AttentionDecoder(150, n_features)) start = time.time() # model.compile(optimizer = "rmsprop", loss = root_mean_squared_error) # model.compile(optimizer='Adam', loss=root_mean_squared_error) model.compile(optimizer='adam', loss='mean_squared_error') model.summary() print('> Compilation Time : ', time.time() - start) return model
def get_seq2seq_attention(INPUT_LEN, dim): hidden_size = 128 model = Sequential() model.add( LSTM(hidden_size, input_shape=(INPUT_LEN, dim), return_sequences=True)) model.add( LSTM(hidden_size, input_shape=(INPUT_LEN, dim), return_sequences=True)) model.add( LSTM(hidden_size, input_shape=(INPUT_LEN, dim), return_sequences=True)) model.add(AttentionDecoder(hidden_size, dim)) model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy']) return model
def define_model_gru_gru(vocab, timesteps, n_units): model = Sequential() model.add(Embedding(vocab, n_units, input_length=timesteps, mask_zero=True)) model.add( GRU(n_units, return_sequences=False, dropout=0.5, recurrent_dropout=0.5)) model.add(RepeatVector(timesteps)) model.add( GRU(n_units, return_sequences=True, dropout=0.5, recurrent_dropout=0.5)) model.add(BatchNormalization()) model.add(AttentionDecoder(n_units, vocab)) return model
def define_model(src_vocab, tar_vocab, src_timesteps, tar_timesteps, n_units): model = Sequential() model.add( Embedding(src_vocab, n_units, input_length=src_timesteps, mask_zero=True)) model.add( LSTM(n_units, activation='softsign', dropout=0.0, recurrent_dropout=0.0)) model.add(RepeatVector(tar_timesteps)) model.add(AttentionDecoder(n_units, tar_vocab)) #model.add(LSTM(n_units, activation='softsign', dropout=0.0, recurrent_dropout=0.0, return_sequences=True)) #model.add(TimeDistributed(Dense(tar_vocab, activation='softmax'))) return model
def define_model(vocab, timesteps, n_units, encoder, decoder, attention): model = Sequential() model.add(Embedding(vocab, n_units, input_length=timesteps, mask_zero=True)) #model.add(Embedding(vocab, n_units, weights=[embedding_vectors], input_length=timesteps, trainable=False)) if (encoder == "LSTM"): model.add( LSTM(n_units, return_sequences=False, dropout=0.5, recurrent_dropout=0.5)) elif (encoder == "GRU"): model.add( GRU(n_units, return_sequences=False, dropout=0.5, recurrent_dropout=0.5)) model.add(RepeatVector(timesteps)) if (decoder == "LSTM"): model.add( LSTM(n_units, return_sequences=True, dropout=0.5, recurrent_dropout=0.5)) elif (decoder == "GRU"): model.add( GRU(n_units, return_sequences=True, dropout=0.5, recurrent_dropout=0.5)) model.add(BatchNormalization()) if (attention == "ATTNDECODER"): model.add(AttentionDecoder(n_units, vocab)) else: model.add( TimeDistributed( Dense( vocab, activation='softmax', #kernel_regularizer=regularizers.l2(0.01), #activity_regularizer=regularizers.l2(0.01) ))) return model
def create_model(X_vocab_len, X_max_len, y_vocab_len, y_max_len, hidden_size, num_layers): def smart_merge(vectors, **kwargs): return vectors[0] if len(vectors) == 1 else merge(vectors, **kwargs) root_word_in = Input(shape=(X_max_len, ), dtype='int32') emb_layer = Embedding(X_vocab_len, EMBEDDING_DIM, input_length=X_max_len, mask_zero=True) hindi_word_embedding = emb_layer(root_word_in) # POSITION of layer BidireLSTM_vector = Bidirectional( LSTM(40, dropout=0, return_sequences=True))(hindi_word_embedding) ''' att = AttentionWithContext()(BidireLSTM_vector) #print(att.shape) RepLayer= RepeatVector(y_max_len) RepVec= RepLayer(att) Emb_plus_repeat=[hindi_word_embedding] Emb_plus_repeat.append(RepVec) Emb_plus_repeat = smart_merge(Emb_plus_repeat, mode='concat') for _ in range(num_layers): LtoR_LSTM = Bidirectional(LSTM(40, dropout=dropout, return_sequences=True)) temp = LtoR_LSTM(Emb_plus_repeat) # for each time step in the input, we intend to output |y_vocab_len| time steps time_dist_layer = TimeDistributed(Dense(y_vocab_len))(temp) outputs = Activation('softmax')(time_dist_layer) ''' outputs = AttentionDecoder(HIDDEN_DIM, X_vocab_len)(BidireLSTM_vector) all_inputs = [root_word_in] model = Model(input=all_inputs, output=outputs) opt = Adam() model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) return model
def __init__(self, vocab_size, embed_size, hidden_size, XP): """ initial setting :param vocab_size: :param embed_size: :param hidden_size: :return: """ super(AttentionDialogue, self).__init__( emb=SrcEmbed(vocab_size, embed_size), forward_encode=AttentionEncoder(embed_size, hidden_size), back_encdode=AttentionEncoder(embed_size, hidden_size), attention=Attention(hidden_size), dec=AttentionDecoder(vocab_size, embed_size, hidden_size), ) self.vocab_size = vocab_size self.embed_size = embed_size self.hidden_size = hidden_size self.XP = XP
def simpleNMT(pad_length=100, n_chars=105, n_labels=6, embedding_learnable=False, encoder_units=256, decoder_units=256, trainable=True, return_probabilities=False): """ Builds a Neural Machine Translator that has alignment attention :param pad_length: the size of the input sequence :param n_chars: the number of characters in the vocabulary :param n_labels: the number of possible labelings for each character :param embedding_learnable: decides if the one hot embedding should be refinable. :return: keras.models.Model that can be compiled and fit'ed *** REFERENCES *** Lee, Jason, Kyunghyun Cho, and Thomas Hofmann. "Neural Machine Translation By Jointly Learning To Align and Translate" """ input_ = Input(shape=(pad_length, ), dtype='float32') input_embed = Embedding(n_chars, n_chars, input_length=pad_length, trainable=embedding_learnable, weights=[np.eye(n_chars)], name='OneHot')(input_) rnn_encoded = Bidirectional(LSTM(encoder_units, return_sequences=True), name='bidirectional_1', merge_mode='concat', trainable=trainable)(input_embed) y_hat = AttentionDecoder(decoder_units, name='attention_decoder_1', output_dim=n_labels, return_probabilities=return_probabilities, trainable=trainable)(rnn_encoded) model = Model(inputs=input_, outputs=y_hat) return model
def define_model_rnn(vocab, timesteps, n_units, encoder, decoder, attention): model = Sequential() model.add(Embedding(vocab, n_units, input_length=timesteps, mask_zero=True)) #model.add(Embedding(vocab, n_units, weights=[embedding_vectors], input_length=timesteps, trainable=False)) model.add(SimpleRNN(n_units, return_sequences=False)) model.add(RepeatVector(timesteps)) model.add(SimpleRNN(n_units, return_sequences=True)) #model.add(BatchNormalization()) if (attention == "ATTNDECODER"): model.add(AttentionDecoder(n_units, vocab)) else: model.add( TimeDistributed( Dense( vocab, activation='tanh', #kernel_regularizer=regularizers.l2(0.01), #activity_regularizer=regularizers.l2(0.01) ))) return model
def create_UniLSTMwithAttention(X_vocab_len, X_max_len, y_vocab_len, y_max_len, hidden_size, num_layers, return_probabilities=False): model = Sequential() model.add(Embedding(X_vocab_len, 300, input_length=X_max_len)) model.add( Bidirectional( LSTM(hidden_size, return_sequences=True, recurrent_dropout=0.2))) model.add( AttentionDecoder(hidden_size, name='decoder', output_dim=y_vocab_len, return_probabilities=return_probabilities, trainable=True)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) return model
for _ in range(20): x, y = generate_pair(n_timestep_in, n_timestep_out, n_feature) pred = model.predict(x) print("expected:", onehot_decoder(y[0]), "predicted:", onehot_decoder(pred[0])) #https://github.com/datalogue/keras-attention from attention_decoder import AttentionDecoder model_att = Sequential() model_att.add( LSTM(150, input_shape=(n_timestep_in, n_feature), return_sequences=True)) model_att.add(LSTM(150, return_sequences=True)) model_att.add(LSTM(150, return_sequences=True)) model_att.add(AttentionDecoder(150, n_feature)) print(model_att.summary()) model_att.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) for epoch in range(500): x, y = generate_pair(n_timestep_in, n_timestep_out, n_feature) model_att.fit(x, y, epochs=1, verbose=1) correct = 0 for _ in range(epochs): x, y = generate_pair(n_timestep_in, n_timestep_out, n_feature) pred = model_att.predict(x) if array_equal(onehot_decoder(y[0]), onehot_decoder(pred[0])):
def main(): parser = argparse.ArgumentParser() parser.register('type', 'bool', str2bool) parser.add_argument('--emb_dim', type=str, default=300, help='Embeddings dimension') parser.add_argument('--hidden_size', type=int, default=512, help='Hidden size') parser.add_argument('--batch_size', type=int, default=32, help='Batch size') parser.add_argument('--n_epochs', type=int, default=100, help='Num epochs') parser.add_argument('--optimizer', type=str, default='adam', help='Optimizer') parser.add_argument('--seq_length', type=int, default=100, help='Maximum sequence length') parser.add_argument('--input_data', type=str, default='data/input.pkl', help='Input data') parser.add_argument('--model_fname', type=str, default='models/autoencoder.h5', help='Model filename') parser.add_argument('--seed', type=int, default=1337, help='Random seed') args = parser.parse_args() print('Model args: ', args) np.random.seed(args.seed) print("Starting...") print("Now building the autoencoder") n_features = args.emb_dim n_timesteps_in = args.seq_length n_timesteps_out = args.seq_length print((n_timesteps_in, n_features)) model = Sequential() model.add( LSTM(args.hidden_size, input_shape=(n_timesteps_in, n_features), return_sequences=True)) model.add(AttentionDecoder(args.hidden_size, n_features)) model.compile(loss='mse', optimizer='adam') print(model.summary()) print("Now loading data...") sequences = pickle.load(open(args.input_data, 'rb')) print('Found %s sequences.' % len(sequences)) print("Now training the model...") checkpoint = ModelCheckpoint(filepath=args.model_fname, save_best_only=True) model.fit(sequences, sequences, epochs=args.n_epochs, verbose=2, validation_split=0.2, callbacks=[checkpoint]) # xtest = sequences # ytest = model.predict(xtest) # cosims = np.zeros((xtest.shape[0])) for seq in sequences: seq = seq.reshape((1, seq.shape[0], seq.shape[1])) y = model.predict(seq) y = y.reshape((1, seq.shape[2], seq.shape[1]))
num_encoder_tokens = 3 num_decoder_tokens = 80 encoder_inputs = Input(shape=(2000, num_encoder_tokens)) encoder, forward_h, forward_c, backward_h, backward_c = Bidirectional( CuDNNLSTM(500, return_sequences=True, return_state=True))(encoder_inputs) state_h = Concatenate()([forward_h, backward_h]) state_c = Concatenate()([forward_c, backward_c]) decoder_inputs = Input(shape=(70, num_decoder_tokens)) decoder = CuDNNLSTM(1000, return_sequences=True)(decoder_inputs, initial_state=[state_h, state_c]) att = AttentionDecoder(70, 80)(decoder) model = Model([encoder_inputs, decoder_inputs], att) # Run training adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print(model.summary())
import numpy as np INPUT_VOCABE_SIZE = 50 # in this examle input sequence is the same as output sequence OUTPUT_VOCABE_SIZE = 50 INPUT_EMBEDDING_DIM = 10 OUTPUT_EMBEDDING_DIM = 10 model = Sequential() model.add(Embedding(INPUT_VOCABE_SIZE, INPUT_EMBEDDING_DIM)) model.add(Bidirectional(LSTM(150, return_sequences=True))) model.add( AttentionDecoder(150, OUTPUT_VOCABE_SIZE, embedding_dim=OUTPUT_EMBEDDING_DIM)) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['acc']) model.summary() n = 10000 t = 10 x = np.random.randint(0, INPUT_VOCABE_SIZE, size=(n, t)) # reshape is needed for computing sparse_categorical_crossentropy loss # which expect labels_true to have shape (batch, time, 1) and not (batch, time) y = np.expand_dims(x, -1) model.fit(x, y, epochs=10)
# reshape as 3D X = X.reshape((1, X.shape[0], X.shape[1])) y = y.reshape((1, y.shape[0], y.shape[1])) return X, y # configure problem n_features = 50 n_timesteps_in = 5 n_timesteps_out = 2 # define model model = Sequential() model.add( LSTM(150, input_shape=(n_timesteps_in, n_features), return_sequences=True)) model.add(AttentionDecoder(150, n_features)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) # train LSTM for epoch in range(5000): # generate new random sequence X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features) # fit model for one epoch on this sequence model.fit(X, y, epochs=1, verbose=2) # evaluate LSTM total, correct = 100, 0 for _ in range(total): X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features) yhat = model.predict(X, verbose=0) if array_equal(one_hot_decode(y[0]), one_hot_decode(yhat[0])):
y_val = np.expand_dims(x_val, axis=-1) # building model inputs = Input(shape=(None, ), dtype='int64') outp_true = Input(shape=(None, ), dtype='int64') embedded = Embedding(n_labels, n_labels, weights=[np.eye(n_labels)], trainable=False)(inputs) pos_emb = PositionEmbedding(max_time=1000, n_waves=20, d_model=40)(embedded) nnet = concatenate([embedded, pos_emb], axis=-1) attention_decoder = AttentionDecoder(40, n_labels, embedding_dim=5, is_monotonic=False, normalize_energy=False) # use teacher forcing output = attention_decoder([nnet, outp_true]) # (alternative) without teacher forcing # output = attention_decoder(nnet) # or # output = attention_decoder([nnet, outp_true], use_teacher_forcing=False) # the last variant is useful for generating outputs with number of timesteps different from input # (without it the length of the output sequence will be the same as input sequence) # so to produce outputs of different shape on inference the one could place outp_true=np.zeros(batch_size, outp_time) model = Model(inputs=[inputs, outp_true], outputs=[output]) model.compile(loss='sparse_categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
def embedding_model_lstm(self, words, embedding_weights_a=None, embedding_weights_b=None, trainable=False, skip_embed=False, return_sequences_b=False): lstm_unit_a = units lstm_unit_b = units * 2 embed_unit = int(hparams['embed_size']) x_shape = (tokens_per_sentence, ) decoder_dim = units * 2 # (tokens_per_sentence, units *2) if hparams['dense_activation'] is None or hparams[ 'dense_activation'] == 'none': decoder_dim = embed_unit valid_word_a = Input(shape=x_shape) #valid_word_b = Input(shape=x_shape) embeddings_a = Embedding(words, embed_unit, weights=[embedding_weights_a], input_length=tokens_per_sentence, trainable=trainable) embed_a = embeddings_a(valid_word_a) ### encoder for training ### lstm_a = Bidirectional( LSTM( units=lstm_unit_a, return_sequences=True, dropout=0.5 #return_state=True, #recurrent_dropout=0.2, #input_shape=(None,) ), merge_mode='concat', trainable=True) recurrent_a = lstm_a(embed_a) ############# #conv1d_b = Conv1D(tokens_per_sentence,lstm_unit_b)(recurrent_a) lstm_b = AttentionDecoder( units=lstm_unit_b, output_dim=decoder_dim, kernel_constraint=min_max_norm(), dropout=0.5 #return_sequences=return_sequences_b, #return_state=True ) recurrent_b = lstm_b(recurrent_a) if hparams['dense_activation'] is not None and hparams[ 'dense_activation'] != 'none': dense_b = Dense( embed_unit, input_shape=(tokens_per_sentence, ), activation=hparams['dense_activation'] #softmax, tanh, or relu #name='dense_layer_b', ) decoder_b = dense_b(recurrent_b) # recurrent_b dropout_b = Dropout(0.5)(decoder_b) model = Model([valid_word_a], dropout_b) # decoder_b else: model = Model([valid_word_a], recurrent_b) ### boilerplate ### adam = optimizers.Adam(lr=learning_rate) # loss try 'categorical_crossentropy', 'mse', 'binary_crossentropy' # optimizer try 'rmsprop' model.compile(optimizer=adam, loss='mse', metrics=['acc']) return model, None, None #, None, model_inference
EarlyStopping(monitor='val_loss', patience=2), ModelCheckpoint(filepath=model_weights, monitor='val_loss', save_best_only=True) ] # define model model = Sequential() model.add( LSTM(256, input_shape=(MAXLEN, len(chars)), return_sequences=True)) model.add(LSTM(256, return_sequences=True)) model.add( Bidirectional(LSTM(100, return_sequences=True), input_shape=(256, 100))) #model.add(LSTM(50, return_sequences=True)) model.add(AttentionDecoder(100, len(chars))) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) model.summary() # train LSTM for epoch in range(500): # generate new random sequence # X,y = get_pair(n_timesteps_in, n_timesteps_out, vocab_size) # fit model for one epoch on this sequence model.fit(X_train, y_train, epochs=1, verbose=2) model.save_weights(model_weights) #score, acc = model.evaluate(X_val, y_val, batch_size=BATCH_SIZE) #print(score)
def get_pair(X_train, Y_train): X = embedding_encode(X_train, embedding) y = embedding_encode(Y_train, embedding) X = X.reshape((1, X.shape[0], X.shape[1])) y = y.reshape((1, y.shape[0], y.shape[1])) return X, y vocab_size = len(word_to_id) embed_size = 128 # define model model = Sequential() #model.add(Embedding(vocab_size, embed_size, weights=[embedding], input_length=maxlen, trainable=False)) model.add(Bidirectional(LSTM(150, return_sequences=True))) model.add(AttentionDecoder(150, vocab_size)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) for epoch in range(5000): i = randint(0, len(row) - 1) # generate new random sequence X, y = get_pair(X_train[i], Y_train[i]) # fit model for one epoch on this sequence model.fit(X, Y_train[i], epochs=1, verbose=2) def one_hot_decode(encoded_seq): return [argmax(vector) for vector in encoded_seq]