def create_model(encode_seqs, decode_seqs, src_vocab_size, emb_dim, is_train=True, reuse=False): with tf.variable_scope("model", reuse=reuse): # for chatbot, you can use the same embedding layer, # for translation, you may want to use 2 seperated embedding layers with tf.variable_scope("embedding") as vs: net_encode = EmbeddingInputlayer( inputs = encode_seqs, vocabulary_size = src_vocab_size, embedding_size = emb_dim, name = 'seq_embedding') vs.reuse_variables() net_decode = EmbeddingInputlayer( inputs = decode_seqs, vocabulary_size = src_vocab_size, embedding_size = emb_dim, name = 'seq_embedding') net_rnn = Seq2Seq(net_encode, net_decode, cell_fn = tf.nn.rnn_cell.LSTMCell, n_hidden = emb_dim, initializer = tf.random_uniform_initializer(-0.1, 0.1), encode_sequence_length = retrieve_seq_length_op2(encode_seqs), decode_sequence_length = retrieve_seq_length_op2(decode_seqs), initial_state_encode = None, dropout = (0.5 if is_train else None), n_layer = 1, return_seq_2d = True, name = 'seq2seq') net_out = DenseLayer(net_rnn, n_units=src_vocab_size, act=tf.identity, name='output') return net_out, net_rnn
def model(encode_seqs, decode_seqs, is_train=True, reuse=False): with tf.variable_scope("model", reuse=reuse): with tf.variable_scope("embedding") as vs: net_encode = EmbeddingInputlayer( inputs=encode_seqs, vocabulary_size=xvocab_size, embedding_size=embedding_dimension, name='seq_embedding') vs.reuse_variables() net_decode = EmbeddingInputlayer( inputs=decode_seqs, vocabulary_size=xvocab_size, embedding_size=embedding_dimension, name='seq_embedding') net_rnn = Seq2Seq( net_encode, net_decode, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=embedding_dimension, initializer=tf.random_uniform_initializer(-0.1, 0.1), encode_sequence_length=retrieve_seq_length_op2(encode_seqs), decode_sequence_length=retrieve_seq_length_op2(decode_seqs), initial_state_encode=None, n_layer=3, return_seq_2d=True, name='seq2seq') net_out = DenseLayer(net_rnn, n_units=xvocab_size, act=tf.identity, name='output') return net_out, net_rnn
def create_model_pretrained(encode_seqs, decode_seqs, src_vocab_size, emb_dim, hidden_size, pretrainedModelName, is_train=True, reuse=False): with tf.variable_scope("model", reuse=reuse): # for chatbot, you can use the same embedding layer, # for translation, you may want to use 2 seperated embedding layers word2idx, weights, vocab_size, embedding_dim = loadGloveModel( pretrainedModelName) #'glove.6B.100d.txt') with tf.variable_scope("embedding") as vs: glove_weights_initializer = tf.constant_initializer(weights) embedding_encode = EmbeddingInputlayer( inputs=encode_seqs, vocabulary_size=vocab_size, embedding_size=embedding_dim, E_init=glove_weights_initializer, name='seq_embedding') vs.reuse_variables() embedding_decode = EmbeddingInputlayer( inputs=decode_seqs, vocabulary_size=vocab_size, embedding_size=embedding_dim, E_init=glove_weights_initializer, name='seq_embedding') net_rnn = Seq2Seq( embedding_encode, embedding_decode, cell_fn=tf.nn.rnn_cell.LSTMCell, n_hidden=hidden_size, initializer=tf.random_uniform_initializer(-0.1, 0.1), encode_sequence_length=retrieve_seq_length_op2(encode_seqs), decode_sequence_length=retrieve_seq_length_op2(decode_seqs), initial_state_encode=None, dropout=(0.5 if is_train else None), n_layer=3, return_seq_2d=True, name='seq2seq') net_out = DenseLayer(net_rnn, n_units=src_vocab_size, act=tf.identity, name='output') return net_out, net_rnn
def _model(encode_seqs, decode_seqs, hypes, metadata, mode): # We add two here for start, end ids as well as unknown and pad. xvocab_size = len(metadata['idx2w']) + 2 reuse = (mode != ModeKeys.TRAIN) with tf.variable_scope("model", reuse=tf.AUTO_REUSE): # for chatbot, you can use the same embedding layer with tf.variable_scope("embedding") as vs: net_encode = EmbeddingInputlayer(inputs=encode_seqs, vocabulary_size=xvocab_size, embedding_size=hypes['emb_dim'], name='seq_embedding') vs.reuse_variables() # tl.layers.set_name_reuse(True) # remove if TL version == 1.8.0+ net_decode = EmbeddingInputlayer(inputs=decode_seqs, vocabulary_size=xvocab_size, embedding_size=hypes['emb_dim'], name='seq_embedding') cell_fn = tf.contrib.rnn.GRUCell if hypes[ 'cell_fn'] == 'GRU' else tf.contrib.rnn.BasicLSTMCell net_rnn = Seq2Seq( net_encode, net_decode, cell_fn=cell_fn, n_hidden=hypes['emb_dim'], initializer=tf.random_uniform_initializer(-0.1, 0.1), encode_sequence_length=retrieve_seq_length_op2(encode_seqs), decode_sequence_length=retrieve_seq_length_op2(decode_seqs), initial_state_encode=None, dropout=(hypes['dropout'] if mode == ModeKeys.TRAIN else None), n_layer=hypes['seq2seq']['n_layer'], return_seq_2d=True, name='seq2seq') net_out = DenseLayer(net_rnn, n_units=xvocab_size, act=tf.identity, name='output') return net_out, net_rnn
def seq2seq_model(encode_sequences, decode_sequences, vocabulary_size, embedding_dim, is_train=True, reuse=False): with tf.variable_scope("model", reuse=tf.AUTO_REUSE): tl.layers.set_name_reuse(tf.AUTO_REUSE) with tf.variable_scope("embedding"): net_encode = EmbeddingInputlayer(inputs=encode_sequences, vocabulary_size=vocabulary_size, embedding_size=embedding_dim, name='seq_embedding') net_decode = EmbeddingInputlayer(inputs=decode_sequences, vocabulary_size=vocabulary_size, embedding_size=embedding_dim, name='seq_embedding') net_rnn = Seq2Seq( net_encode, net_decode, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=embedding_dim, initializer=tf.random_uniform_initializer(-0.1, 0.1), encode_sequence_length=retrieve_seq_length_op2(encode_sequences), decode_sequence_length=retrieve_seq_length_op2(decode_sequences), initial_state_encode=None, dropout=(0.5 if is_train else None), n_layer=3, return_seq_2d=True, name='seq2seq') net_out = DenseLayer(net_rnn, n_units=vocabulary_size, act=tf.identity, name='output') return net_out, net_rnn
name="encode_seqs") decode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="decode_seqs") target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs") target_mask = tf.placeholder( dtype=tf.int64, shape=[batch_size, None], name="target_mask") # tl.prepro.sequences_get_mask() with tf.variable_scope("model"): # for chatbot, you can use the same embedding layer, # for translation, you may want to use 2 seperated embedding layers with tf.variable_scope("embedding") as vs: net_encode = EmbeddingInputlayer(inputs=encode_seqs, vocabulary_size=10000, embedding_size=200, name='seq_embed') vs.reuse_variables() # tl.layers.set_name_reuse(True) net_decode = EmbeddingInputlayer(inputs=decode_seqs, vocabulary_size=10000, embedding_size=200, name='seq_embed') net = Seq2Seq(net_encode, net_decode, cell_fn=tf.contrib.rnn.BasicLSTMCell, n_hidden=200, initializer=tf.random_uniform_initializer(-0.1, 0.1), encode_sequence_length=retrieve_seq_length_op2(encode_seqs), decode_sequence_length=retrieve_seq_length_op2(decode_seqs), initial_state_encode=None,