Example #1
0
def build_models(pad_length=config.padding, n_chars=input_vocab.size(), n_labels=output_vocab.size(),
                 embedding_learnable=False, encoder_units=32, decoder_units=32, trainable=True, return_probabilities=False):
    """Build the model"""
    input_ = Input(shape=(pad_length,), dtype='float32')
    input_embed = Embedding(n_chars, n_chars,
                            input_length=pad_length,
                            trainable=embedding_learnable,
                            weights=[np.eye(n_chars)],
                            name='OneHot')(input_)

    rnn_encoded = Bidirectional(CuDNNLSTM(encoder_units, return_sequences=True),
                                name='bidirectional_1',
                                merge_mode='concat',
                                trainable=trainable)(input_embed)

    y_prob = AttentionDecoder(decoder_units,
                              name='attention_decoder_prob',
                              output_dim=n_labels,
                              return_probabilities=True,
                              trainable=False)(rnn_encoded)

    y_pred = AttentionDecoder(decoder_units,
                              name='attention_decoder_1',
                              output_dim=n_labels,
                              return_probabilities=return_probabilities,
                              trainable=trainable)(rnn_encoded)

    model = Model(inputs=input_, outputs=y_pred)
    model.summary()
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    prob_model = Model(inputs=input_, outputs=y_prob)
    return model, prob_model
Example #2
0
def _getEncoderDecoderModel():
    model = Sequential()
    model.add(LSTM(32, input_shape=(max_len, num_chars), return_sequences=True))
    model.add(AttentionDecoder(32, num_chars))
    model.add(Dense(num_chars, activation="softmax"))

    return model
Example #3
0
 def __init__(self, vocab_size, wordvec_size, hidden_size):
     args = vocab_size, wordvec_size, hidden_size
     self.encoder = AttentionEncoder(*args)
     self.decoder = AttentionDecoder(*args)
     self.softmax = TimeSoftmaxWithLoss()
     self.params = self.encoder.params + self.decoder.params
     self.grads = self.encoder.grads + self.decoder.grads
Example #4
0
def create_network(nb_attention_cells=1):
    model = Sequential()
    model.add(
        AttentionDecoder(nb_attention_cells,
                         n_features,
                         input_shape=(width, height)))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])
    return model
Example #5
0
def define_model(src_vocab, tar_vocab, src_timesteps, tar_timesteps, n_units):
    model = Sequential()
    model.add(
        Embedding(src_vocab,
                  n_units,
                  input_length=src_timesteps,
                  mask_zero=True))
    model.add(LSTM(n_units))
    model.add(RepeatVector(tar_timesteps))
    model.add(AttentionDecoder(n_units, n_features))
    return model
Example #6
0
def one_hot_encode(sequence, n_unique):
	encoding = list()
	for value in sequence:
	vector = [0 for _ in range(n_unique)]
	vector[value] = 1
	encoding.append(vector)
	return array(encoding)
 
# decode a one hot encoded string
def one_hot_decode(encoded_seq):
	return [argmax(vector) for vector in encoded_seq]
 
# prepare data for the LSTM
def get_pair(n_in, n_out, cardinality):
	# generate random sequence
	sequence_in = generate_sequence(n_in, cardinality)
	sequence_out = sequence_in[:n_out] + [0 for _ in range(n_in-n_out)]
	# one hot encode
	X = one_hot_encode(sequence_in, cardinality)
	y = one_hot_encode(sequence_out, cardinality)
	# reshape as 3D
	X = X.reshape((1, X.shape[0], X.shape[1]))
	y = y.reshape((1, y.shape[0], y.shape[1]))
	return X,y
 
# configure problem
n_features = 50
n_timesteps_in = 5
n_timesteps_out = 2
 
# define model
model = Sequential()
model.add(LSTM(150, input_shape=(n_timesteps_in, n_features), return_sequences=True))
model.add(AttentionDecoder(150, n_features))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
# train LSTM
for epoch in range(5000):
	# generate new random sequence
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	# fit model for one epoch on this sequence
	model.fit(X, y, epochs=1, verbose=2)
# evaluate LSTM
total, correct = 100, 0
for _ in range(total):
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	yhat = model.predict(X, verbose=0)
	if array_equal(one_hot_decode(y[0]), one_hot_decode(yhat[0])):
	correct += 1
print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0))
# spot check some examples
for _ in range(10):
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	yhat = model.predict(X, verbose=0)
	print('Expected:', one_hot_decode(y[0]), 'Predicted', one_hot_decode(yhat[0]))
Example #7
0
def attention_model(n_timesteps_in, n_features):
    model = Sequential()
    model.add(
        LSTM(150,
             input_shape=(n_timesteps_in, n_features),
             return_sequences=True))
    model.add(AttentionDecoder(150, n_features))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model
Example #8
0
def attention(lstm_cells, n_timesteps_in, n_features):
    model = Sequential(name="AttentionLSTM")
    model.add(
        LSTM(lstm_cells,
             input_shape=(n_timesteps_in, n_features),
             return_sequences=True))
    model.add(AttentionDecoder(lstm_cells, n_features))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])
    print(model.summary())
    return (model)
Example #9
0
def create_model(X_vocab_len, X_max_len, y_vocab_len, y_max_len, hidden_size,
                 num_layers, embedding_matrix):
    def smart_merge(vectors, **kwargs):
        return vectors[0] if len(vectors) == 1 else merge(vectors, **kwargs)

    root_word_in = Input(shape=(X_max_len, ), dtype='int32')
    tag_word_in = Input(shape=(y_max_len, ), dtype='int32')
    # print root_word_in

    # this embedding encodes input sequence into a sequence of
    # dense X_vocab_len-dimensional vectors.
    emb_layer = Embedding(X_vocab_len,
                          EMBEDDING_DIM,
                          weights=[embedding_matrix],
                          input_length=X_max_len,
                          mask_zero=True,
                          trainable=True)  # DEFINITION of layer

    hindi_word_embedding = emb_layer(root_word_in)  # POSITION of layer
    #root_word_embedding = Embedding(dim_embedding, 64,
    #					   input_length=dim_root_word_in,
    #					   W_constraint=maxnorm(2))(root_word_in)
    '''
	# A lstm will transform the vector sequence into a single vector,
	# containing information about the entire sequence
	LtoR_LSTM = LSTM(512, return_sequences=False)
	LtoR_LSTM_vector = LtoR_LSTM(hindi_word_embedding)

	RtoL_LSTM = LSTM(512, return_sequences=False, go_backwards=True)
	RtoL_LSTM_vector = RtoL_LSTM(hindi_word_embedding)

	BidireLSTM_vector = [LtoR_LSTM_vector]
	BidireLSTM_vector.append(RtoL_LSTM_vector)
	BidireLSTM_vector= smart_merge(BidireLSTM_vector, mode='concat')
	'''
    BidireLSTM_vector1 = Bidirectional(
        LSTM(HIDDEN_DIM,
             return_sequences=True,
             dropout=0,
             kernel_regularizer=regularizers.l2(0.1)))(hindi_word_embedding)
    #BidireLSTM_vector2 = LSTM(512, return_sequences=True, dropout=0.5)(BidireLSTM_vector1)
    Attention = AttentionDecoder(HIDDEN_DIM, X_vocab_len)(BidireLSTM_vector1)

    all_inputs = [root_word_in]
    model = Model(input=all_inputs, output=Attention)
    adam = Adam()
    model.compile(optimizer=adam,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model
Example #10
0
 def attention_model(self):
     model = Sequential()
     model.add(
         LSTM(150,
              input_shape=(self.n_timesteps_in, self.n_features),
              return_sequences=True))
     model.add(AttentionDecoder(150, self.n_features))
     nadam = Nadam(lr=0.001,
                   beta_1=0.9,
                   beta_2=0.999,
                   epsilon=None,
                   schedule_decay=0.004)
     model.compile(loss=cos_distance, optimizer=nadam, metrics=['acc'])
     return model
Example #11
0
def seq2seq_model(n_symbols, embedding_weights, x_train, y_train, x_test, y_test, idx_to_word, new_model = False):
	if (not new_model) and os.path.exists(seq2seq_file):
		logging.info("Reading seq2seq_model from {}".format(seq2seq_file))
		en_de_model = load_model(seq2seq_file, custom_objects={'AttentionDecoder':AttentionDecoder})
	else:
		logging.info("Building new seq2seq_model...")
		inputs = Input(shape=(maxlen,))  
		out = Embedding(input_dim=n_symbols,   
							output_dim=w2v_dim,   
							input_length=maxlen,
							mask_zero = True,
							weights = [embedding_weights],
							trainable = True, name="Embedding_1")(inputs)   
		out = Bidirectional(LSTM(c_dim,return_sequences = True), merge_mode = 'sum')(out)
		out = AttentionDecoder(decode_dim, n_symbols)(out)
		out = Dense(n_symbols, activation="relu", name="Dense_1")(out)
		#en_de_model.add(RepeatVector(maxlen))
		#en_de_model.add(TimeDistributed(Dense(maxlen, activation="linear")))   
		out = Activation('softmax', name = "Activation_1")(out)
		en_de_model = Model(inputs = inputs, outputs=out)
		layer = en_de_model.get_layer(name = "Embedding_1")
		print(layer.input_shape,"  ", layer.output_shape)
		logging.info('Compiling...')   
		time_start = time.time()   
		en_de_model.compile(loss='categorical_crossentropy', optimizer='adam',  metrics=['accuracy'])   
		time_end = time.time()   
		logging.info('Cost time of compilation is:%fsecond!' % (time_end - time_start)) 
	
	logging.info('Start training...')
	for iter_num in range(loop):   
		en_de_model.fit(x_train, y_train, batch_size=seq2seq_batch_size, epochs = seq2seq_epochs, verbose = 2) 
		out_predicts = en_de_model.predict(x_test)   
		for i_idx, out_predict in enumerate(out_predicts):   
			predict_sequence = []   
			ground_truth = []
			for predict_vector in out_predict:   
				next_index = np.argmax(predict_vector)   
				if next_index != 0:
					next_token = idx_to_word[next_index]  
					predict_sequence.append(next_token)
				next_index = np.where(y_test[i_idx] == True)
				if next_index[0].shape[0] != 0:
					next_token = idx_to_word[next_index[0][0]]
					ground_truth.append(next_token)
			print('Target output:', str.join(ground_truth))   
			print('Predict output:', str.join(predict_sequence)) 
		logging.info('Current iter_num is:%d' % iter_num)  
		en_de_model.save(seq2seq_file)
		en_de_model.save_weights(seq2seq_weights_file) 
	return en_de_model
Example #12
0
def build_attention_model(n_timesteps_in, n_features):
    model = Sequential()
    model.add(
        LSTM(150,
             input_shape=(n_timesteps_in, n_features),
             return_sequences=True))
    model.add(AttentionDecoder(150, n_features))

    start = time.time()
    # model.compile(optimizer = "rmsprop", loss = root_mean_squared_error)
    # model.compile(optimizer='Adam', loss=root_mean_squared_error)
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.summary()
    print('> Compilation Time : ', time.time() - start)
    return model
Example #13
0
def get_seq2seq_attention(INPUT_LEN, dim):
    hidden_size = 128
    model = Sequential()
    model.add(
        LSTM(hidden_size, input_shape=(INPUT_LEN, dim), return_sequences=True))
    model.add(
        LSTM(hidden_size, input_shape=(INPUT_LEN, dim), return_sequences=True))
    model.add(
        LSTM(hidden_size, input_shape=(INPUT_LEN, dim), return_sequences=True))
    model.add(AttentionDecoder(hidden_size, dim))

    model.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model
Example #14
0
def define_model_gru_gru(vocab, timesteps, n_units):
    model = Sequential()
    model.add(Embedding(vocab, n_units, input_length=timesteps,
                        mask_zero=True))
    model.add(
        GRU(n_units,
            return_sequences=False,
            dropout=0.5,
            recurrent_dropout=0.5))
    model.add(RepeatVector(timesteps))
    model.add(
        GRU(n_units, return_sequences=True, dropout=0.5,
            recurrent_dropout=0.5))
    model.add(BatchNormalization())
    model.add(AttentionDecoder(n_units, vocab))
    return model
def define_model(src_vocab, tar_vocab, src_timesteps, tar_timesteps, n_units):
    model = Sequential()
    model.add(
        Embedding(src_vocab,
                  n_units,
                  input_length=src_timesteps,
                  mask_zero=True))
    model.add(
        LSTM(n_units,
             activation='softsign',
             dropout=0.0,
             recurrent_dropout=0.0))
    model.add(RepeatVector(tar_timesteps))
    model.add(AttentionDecoder(n_units, tar_vocab))
    #model.add(LSTM(n_units, activation='softsign', dropout=0.0, recurrent_dropout=0.0, return_sequences=True))
    #model.add(TimeDistributed(Dense(tar_vocab, activation='softmax')))
    return model
Example #16
0
def define_model(vocab, timesteps, n_units, encoder, decoder, attention):
    model = Sequential()
    model.add(Embedding(vocab, n_units, input_length=timesteps,
                        mask_zero=True))
    #model.add(Embedding(vocab, n_units, weights=[embedding_vectors], input_length=timesteps, trainable=False))
    if (encoder == "LSTM"):
        model.add(
            LSTM(n_units,
                 return_sequences=False,
                 dropout=0.5,
                 recurrent_dropout=0.5))
    elif (encoder == "GRU"):
        model.add(
            GRU(n_units,
                return_sequences=False,
                dropout=0.5,
                recurrent_dropout=0.5))

    model.add(RepeatVector(timesteps))
    if (decoder == "LSTM"):
        model.add(
            LSTM(n_units,
                 return_sequences=True,
                 dropout=0.5,
                 recurrent_dropout=0.5))
    elif (decoder == "GRU"):
        model.add(
            GRU(n_units,
                return_sequences=True,
                dropout=0.5,
                recurrent_dropout=0.5))

    model.add(BatchNormalization())
    if (attention == "ATTNDECODER"):
        model.add(AttentionDecoder(n_units, vocab))
    else:
        model.add(
            TimeDistributed(
                Dense(
                    vocab,
                    activation='softmax',
                    #kernel_regularizer=regularizers.l2(0.01),
                    #activity_regularizer=regularizers.l2(0.01)
                )))
    return model
def create_model(X_vocab_len, X_max_len, y_vocab_len, y_max_len, hidden_size,
                 num_layers):
    def smart_merge(vectors, **kwargs):
        return vectors[0] if len(vectors) == 1 else merge(vectors, **kwargs)

    root_word_in = Input(shape=(X_max_len, ), dtype='int32')

    emb_layer = Embedding(X_vocab_len,
                          EMBEDDING_DIM,
                          input_length=X_max_len,
                          mask_zero=True)

    hindi_word_embedding = emb_layer(root_word_in)  # POSITION of layer

    BidireLSTM_vector = Bidirectional(
        LSTM(40, dropout=0, return_sequences=True))(hindi_word_embedding)
    '''
	att = AttentionWithContext()(BidireLSTM_vector)
	#print(att.shape)
	RepLayer= RepeatVector(y_max_len)
	RepVec= RepLayer(att)
	Emb_plus_repeat=[hindi_word_embedding]
	Emb_plus_repeat.append(RepVec)
	Emb_plus_repeat = smart_merge(Emb_plus_repeat, mode='concat')
	
	
	for _ in range(num_layers):
		LtoR_LSTM = Bidirectional(LSTM(40, dropout=dropout, return_sequences=True))
		temp = LtoR_LSTM(Emb_plus_repeat)
	
	# for each time step in the input, we intend to output |y_vocab_len| time steps
	time_dist_layer = TimeDistributed(Dense(y_vocab_len))(temp)
	outputs = Activation('softmax')(time_dist_layer)
	'''
    outputs = AttentionDecoder(HIDDEN_DIM, X_vocab_len)(BidireLSTM_vector)

    all_inputs = [root_word_in]
    model = Model(input=all_inputs, output=outputs)
    opt = Adam()
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model
 def __init__(self, vocab_size, embed_size, hidden_size, XP):
     """
     initial setting
     :param vocab_size:
     :param embed_size:
     :param hidden_size:
     :return:
     """
     super(AttentionDialogue, self).__init__(
         emb=SrcEmbed(vocab_size, embed_size),
         forward_encode=AttentionEncoder(embed_size, hidden_size),
         back_encdode=AttentionEncoder(embed_size, hidden_size),
         attention=Attention(hidden_size),
         dec=AttentionDecoder(vocab_size, embed_size, hidden_size),
     )
     self.vocab_size = vocab_size
     self.embed_size = embed_size
     self.hidden_size = hidden_size
     self.XP = XP
Example #19
0
def simpleNMT(pad_length=100,
              n_chars=105,
              n_labels=6,
              embedding_learnable=False,
              encoder_units=256,
              decoder_units=256,
              trainable=True,
              return_probabilities=False):
    """
    Builds a Neural Machine Translator that has alignment attention
    :param pad_length: the size of the input sequence
    :param n_chars: the number of characters in the vocabulary
    :param n_labels: the number of possible labelings for each character
    :param embedding_learnable: decides if the one hot embedding should be refinable.
    :return: keras.models.Model that can be compiled and fit'ed
    *** REFERENCES ***
    Lee, Jason, Kyunghyun Cho, and Thomas Hofmann. 
    "Neural Machine Translation By Jointly Learning To Align and Translate" 
    """
    input_ = Input(shape=(pad_length, ), dtype='float32')
    input_embed = Embedding(n_chars,
                            n_chars,
                            input_length=pad_length,
                            trainable=embedding_learnable,
                            weights=[np.eye(n_chars)],
                            name='OneHot')(input_)

    rnn_encoded = Bidirectional(LSTM(encoder_units, return_sequences=True),
                                name='bidirectional_1',
                                merge_mode='concat',
                                trainable=trainable)(input_embed)

    y_hat = AttentionDecoder(decoder_units,
                             name='attention_decoder_1',
                             output_dim=n_labels,
                             return_probabilities=return_probabilities,
                             trainable=trainable)(rnn_encoded)

    model = Model(inputs=input_, outputs=y_hat)

    return model
Example #20
0
def define_model_rnn(vocab, timesteps, n_units, encoder, decoder, attention):
    model = Sequential()
    model.add(Embedding(vocab, n_units, input_length=timesteps,
                        mask_zero=True))
    #model.add(Embedding(vocab, n_units, weights=[embedding_vectors], input_length=timesteps, trainable=False))
    model.add(SimpleRNN(n_units, return_sequences=False))
    model.add(RepeatVector(timesteps))
    model.add(SimpleRNN(n_units, return_sequences=True))
    #model.add(BatchNormalization())
    if (attention == "ATTNDECODER"):
        model.add(AttentionDecoder(n_units, vocab))
    else:
        model.add(
            TimeDistributed(
                Dense(
                    vocab,
                    activation='tanh',
                    #kernel_regularizer=regularizers.l2(0.01),
                    #activity_regularizer=regularizers.l2(0.01)
                )))
    return model
Example #21
0
def create_UniLSTMwithAttention(X_vocab_len,
                                X_max_len,
                                y_vocab_len,
                                y_max_len,
                                hidden_size,
                                num_layers,
                                return_probabilities=False):
    model = Sequential()
    model.add(Embedding(X_vocab_len, 300, input_length=X_max_len))
    model.add(
        Bidirectional(
            LSTM(hidden_size, return_sequences=True, recurrent_dropout=0.2)))
    model.add(
        AttentionDecoder(hidden_size,
                         name='decoder',
                         output_dim=y_vocab_len,
                         return_probabilities=return_probabilities,
                         trainable=True))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])
    return model
Example #22
0
for _ in range(20):
    x, y = generate_pair(n_timestep_in, n_timestep_out, n_feature)
    pred = model.predict(x)
    print("expected:", onehot_decoder(y[0]), "predicted:",
          onehot_decoder(pred[0]))

#https://github.com/datalogue/keras-attention
from attention_decoder import AttentionDecoder

model_att = Sequential()

model_att.add(
    LSTM(150, input_shape=(n_timestep_in, n_feature), return_sequences=True))
model_att.add(LSTM(150, return_sequences=True))
model_att.add(LSTM(150, return_sequences=True))
model_att.add(AttentionDecoder(150, n_feature))

print(model_att.summary())
model_att.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['acc'])
for epoch in range(500):
    x, y = generate_pair(n_timestep_in, n_timestep_out, n_feature)
    model_att.fit(x, y, epochs=1, verbose=1)

correct = 0

for _ in range(epochs):
    x, y = generate_pair(n_timestep_in, n_timestep_out, n_feature)
    pred = model_att.predict(x)
    if array_equal(onehot_decoder(y[0]), onehot_decoder(pred[0])):
def main():

    parser = argparse.ArgumentParser()
    parser.register('type', 'bool', str2bool)
    parser.add_argument('--emb_dim',
                        type=str,
                        default=300,
                        help='Embeddings dimension')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=512,
                        help='Hidden size')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='Batch size')
    parser.add_argument('--n_epochs', type=int, default=100, help='Num epochs')
    parser.add_argument('--optimizer',
                        type=str,
                        default='adam',
                        help='Optimizer')
    parser.add_argument('--seq_length',
                        type=int,
                        default=100,
                        help='Maximum sequence length')
    parser.add_argument('--input_data',
                        type=str,
                        default='data/input.pkl',
                        help='Input data')
    parser.add_argument('--model_fname',
                        type=str,
                        default='models/autoencoder.h5',
                        help='Model filename')
    parser.add_argument('--seed', type=int, default=1337, help='Random seed')
    args = parser.parse_args()
    print('Model args: ', args)

    np.random.seed(args.seed)

    print("Starting...")

    print("Now building the autoencoder")
    n_features = args.emb_dim
    n_timesteps_in = args.seq_length
    n_timesteps_out = args.seq_length

    print((n_timesteps_in, n_features))

    model = Sequential()
    model.add(
        LSTM(args.hidden_size,
             input_shape=(n_timesteps_in, n_features),
             return_sequences=True))
    model.add(AttentionDecoder(args.hidden_size, n_features))
    model.compile(loss='mse', optimizer='adam')

    print(model.summary())

    print("Now loading data...")

    sequences = pickle.load(open(args.input_data, 'rb'))
    print('Found %s sequences.' % len(sequences))

    print("Now training the model...")

    checkpoint = ModelCheckpoint(filepath=args.model_fname,
                                 save_best_only=True)
    model.fit(sequences,
              sequences,
              epochs=args.n_epochs,
              verbose=2,
              validation_split=0.2,
              callbacks=[checkpoint])

    # xtest = sequences
    # ytest = model.predict(xtest)

    # cosims = np.zeros((xtest.shape[0]))

    for seq in sequences:
        seq = seq.reshape((1, seq.shape[0], seq.shape[1]))
        y = model.predict(seq)
        y = y.reshape((1, seq.shape[2], seq.shape[1]))
Example #24
0
num_encoder_tokens = 3
num_decoder_tokens = 80

encoder_inputs = Input(shape=(2000, num_encoder_tokens))
encoder, forward_h, forward_c, backward_h, backward_c = Bidirectional(
    CuDNNLSTM(500, return_sequences=True, return_state=True))(encoder_inputs)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])

decoder_inputs = Input(shape=(70, num_decoder_tokens))
decoder = CuDNNLSTM(1000,
                    return_sequences=True)(decoder_inputs,
                                           initial_state=[state_h, state_c])

att = AttentionDecoder(70, 80)(decoder)

model = Model([encoder_inputs, decoder_inputs], att)

# Run training
adam = optimizers.Adam(lr=0.0001,
                       beta_1=0.9,
                       beta_2=0.999,
                       epsilon=None,
                       decay=0.0,
                       amsgrad=False)
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print(model.summary())
Example #25
0
import numpy as np

INPUT_VOCABE_SIZE = 50
# in this examle input sequence is the same as output sequence
OUTPUT_VOCABE_SIZE = 50

INPUT_EMBEDDING_DIM = 10
OUTPUT_EMBEDDING_DIM = 10

model = Sequential()
model.add(Embedding(INPUT_VOCABE_SIZE, INPUT_EMBEDDING_DIM))
model.add(Bidirectional(LSTM(150, return_sequences=True)))
model.add(
    AttentionDecoder(150,
                     OUTPUT_VOCABE_SIZE,
                     embedding_dim=OUTPUT_EMBEDDING_DIM))
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])
model.summary()

n = 10000
t = 10
x = np.random.randint(0, INPUT_VOCABE_SIZE, size=(n, t))
# reshape is needed for computing sparse_categorical_crossentropy loss
# which expect labels_true to have shape (batch, time, 1) and not (batch, time)
y = np.expand_dims(x, -1)

model.fit(x, y, epochs=10)
Example #26
0
    # reshape as 3D
    X = X.reshape((1, X.shape[0], X.shape[1]))
    y = y.reshape((1, y.shape[0], y.shape[1]))
    return X, y


# configure problem
n_features = 50
n_timesteps_in = 5
n_timesteps_out = 2

# define model
model = Sequential()
model.add(
    LSTM(150, input_shape=(n_timesteps_in, n_features), return_sequences=True))
model.add(AttentionDecoder(150, n_features))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])
# train LSTM
for epoch in range(5000):
    # generate new random sequence
    X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
    # fit model for one epoch on this sequence
    model.fit(X, y, epochs=1, verbose=2)
# evaluate LSTM
total, correct = 100, 0
for _ in range(total):
    X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
    yhat = model.predict(X, verbose=0)
    if array_equal(one_hot_decode(y[0]), one_hot_decode(yhat[0])):
Example #27
0
y_val = np.expand_dims(x_val, axis=-1)

# building model
inputs = Input(shape=(None, ), dtype='int64')
outp_true = Input(shape=(None, ), dtype='int64')
embedded = Embedding(n_labels,
                     n_labels,
                     weights=[np.eye(n_labels)],
                     trainable=False)(inputs)

pos_emb = PositionEmbedding(max_time=1000, n_waves=20, d_model=40)(embedded)
nnet = concatenate([embedded, pos_emb], axis=-1)

attention_decoder = AttentionDecoder(40,
                                     n_labels,
                                     embedding_dim=5,
                                     is_monotonic=False,
                                     normalize_energy=False)
# use teacher forcing
output = attention_decoder([nnet, outp_true])
# (alternative) without teacher forcing
# output = attention_decoder(nnet)
# or
# output = attention_decoder([nnet, outp_true], use_teacher_forcing=False)
# the last variant is useful for generating outputs with number of timesteps different from input
# (without it the length of the output sequence will be the same as input sequence)
# so to produce outputs of different shape on inference the one could place outp_true=np.zeros(batch_size, outp_time)
model = Model(inputs=[inputs, outp_true], outputs=[output])
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])
Example #28
0
    def embedding_model_lstm(self,
                             words,
                             embedding_weights_a=None,
                             embedding_weights_b=None,
                             trainable=False,
                             skip_embed=False,
                             return_sequences_b=False):

        lstm_unit_a = units
        lstm_unit_b = units * 2
        embed_unit = int(hparams['embed_size'])

        x_shape = (tokens_per_sentence, )
        decoder_dim = units * 2  # (tokens_per_sentence, units *2)

        if hparams['dense_activation'] is None or hparams[
                'dense_activation'] == 'none':
            decoder_dim = embed_unit

        valid_word_a = Input(shape=x_shape)
        #valid_word_b = Input(shape=x_shape)

        embeddings_a = Embedding(words,
                                 embed_unit,
                                 weights=[embedding_weights_a],
                                 input_length=tokens_per_sentence,
                                 trainable=trainable)

        embed_a = embeddings_a(valid_word_a)

        ### encoder for training ###
        lstm_a = Bidirectional(
            LSTM(
                units=lstm_unit_a,
                return_sequences=True,
                dropout=0.5
                #return_state=True,
                #recurrent_dropout=0.2,
                #input_shape=(None,)
            ),
            merge_mode='concat',
            trainable=True)

        recurrent_a = lstm_a(embed_a)

        #############
        #conv1d_b = Conv1D(tokens_per_sentence,lstm_unit_b)(recurrent_a)

        lstm_b = AttentionDecoder(
            units=lstm_unit_b,
            output_dim=decoder_dim,
            kernel_constraint=min_max_norm(),
            dropout=0.5
            #return_sequences=return_sequences_b,
            #return_state=True
        )

        recurrent_b = lstm_b(recurrent_a)

        if hparams['dense_activation'] is not None and hparams[
                'dense_activation'] != 'none':
            dense_b = Dense(
                embed_unit,
                input_shape=(tokens_per_sentence, ),
                activation=hparams['dense_activation']  #softmax, tanh, or relu
                #name='dense_layer_b',
            )

            decoder_b = dense_b(recurrent_b)  # recurrent_b

            dropout_b = Dropout(0.5)(decoder_b)

            model = Model([valid_word_a], dropout_b)  # decoder_b

        else:
            model = Model([valid_word_a], recurrent_b)

        ### boilerplate ###

        adam = optimizers.Adam(lr=learning_rate)

        # loss try 'categorical_crossentropy', 'mse', 'binary_crossentropy'
        # optimizer try 'rmsprop'
        model.compile(optimizer=adam, loss='mse', metrics=['acc'])

        return model, None, None  #, None, model_inference
Example #29
0
        EarlyStopping(monitor='val_loss', patience=2),
        ModelCheckpoint(filepath=model_weights,
                        monitor='val_loss',
                        save_best_only=True)
    ]

    # define model
    model = Sequential()
    model.add(
        LSTM(256, input_shape=(MAXLEN, len(chars)), return_sequences=True))
    model.add(LSTM(256, return_sequences=True))
    model.add(
        Bidirectional(LSTM(100, return_sequences=True),
                      input_shape=(256, 100)))
    #model.add(LSTM(50, return_sequences=True))
    model.add(AttentionDecoder(100, len(chars)))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])
    model.summary()

    # train LSTM
    for epoch in range(500):
        # generate new random sequence
        #	X,y = get_pair(n_timesteps_in, n_timesteps_out, vocab_size)
        # fit model for one epoch on this sequence
        model.fit(X_train, y_train, epochs=1, verbose=2)
        model.save_weights(model_weights)
        #score, acc = model.evaluate(X_val, y_val, batch_size=BATCH_SIZE)
        #print(score)
def get_pair(X_train, Y_train):
    X = embedding_encode(X_train, embedding)
    y = embedding_encode(Y_train, embedding)
    X = X.reshape((1, X.shape[0], X.shape[1]))
    y = y.reshape((1, y.shape[0], y.shape[1]))
    return X, y


vocab_size = len(word_to_id)
embed_size = 128

# define model
model = Sequential()
#model.add(Embedding(vocab_size, embed_size, weights=[embedding], input_length=maxlen, trainable=False))
model.add(Bidirectional(LSTM(150, return_sequences=True)))
model.add(AttentionDecoder(150, vocab_size))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])

for epoch in range(5000):
    i = randint(0, len(row) - 1)
    # generate new random sequence
    X, y = get_pair(X_train[i], Y_train[i])
    # fit model for one epoch on this sequence
    model.fit(X, Y_train[i], epochs=1, verbose=2)


def one_hot_decode(encoded_seq):
    return [argmax(vector) for vector in encoded_seq]