Exemple #1
0
def create_model(encode_seqs, decode_seqs, src_vocab_size, emb_dim, is_train=True, reuse=False):
    with tf.variable_scope("model", reuse=reuse):
        # for chatbot, you can use the same embedding layer,
        # for translation, you may want to use 2 seperated embedding layers
        with tf.variable_scope("embedding") as vs:
            net_encode = EmbeddingInputlayer(
                inputs = encode_seqs,
                vocabulary_size = src_vocab_size,
                embedding_size = emb_dim,
                name = 'seq_embedding')
            vs.reuse_variables()
            net_decode = EmbeddingInputlayer(
                inputs = decode_seqs,
                vocabulary_size = src_vocab_size,
                embedding_size = emb_dim,
                name = 'seq_embedding')
            
        net_rnn = Seq2Seq(net_encode, net_decode,
                cell_fn = tf.nn.rnn_cell.LSTMCell,
                n_hidden = emb_dim,
                initializer = tf.random_uniform_initializer(-0.1, 0.1),
                encode_sequence_length = retrieve_seq_length_op2(encode_seqs),
                decode_sequence_length = retrieve_seq_length_op2(decode_seqs),
                initial_state_encode = None,
                dropout = (0.5 if is_train else None),
                n_layer = 1,
                return_seq_2d = True,
                name = 'seq2seq')

        net_out = DenseLayer(net_rnn, n_units=src_vocab_size, act=tf.identity, name='output')
    return net_out, net_rnn
def model(encode_seqs, decode_seqs, is_train=True, reuse=False):
    with tf.variable_scope("model", reuse=reuse):
        with tf.variable_scope("embedding") as vs:
            net_encode = EmbeddingInputlayer(
                inputs=encode_seqs,
                vocabulary_size=xvocab_size,
                embedding_size=embedding_dimension,
                name='seq_embedding')
            vs.reuse_variables()
            net_decode = EmbeddingInputlayer(
                inputs=decode_seqs,
                vocabulary_size=xvocab_size,
                embedding_size=embedding_dimension,
                name='seq_embedding')
        net_rnn = Seq2Seq(
            net_encode,
            net_decode,
            cell_fn=tf.contrib.rnn.BasicLSTMCell,
            n_hidden=embedding_dimension,
            initializer=tf.random_uniform_initializer(-0.1, 0.1),
            encode_sequence_length=retrieve_seq_length_op2(encode_seqs),
            decode_sequence_length=retrieve_seq_length_op2(decode_seqs),
            initial_state_encode=None,
            n_layer=3,
            return_seq_2d=True,
            name='seq2seq')
        net_out = DenseLayer(net_rnn,
                             n_units=xvocab_size,
                             act=tf.identity,
                             name='output')
    return net_out, net_rnn
Exemple #3
0
def create_model_pretrained(encode_seqs,
                            decode_seqs,
                            src_vocab_size,
                            emb_dim,
                            hidden_size,
                            pretrainedModelName,
                            is_train=True,
                            reuse=False):
    with tf.variable_scope("model", reuse=reuse):
        # for chatbot, you can use the same embedding layer,
        # for translation, you may want to use 2 seperated embedding layers
        word2idx, weights, vocab_size, embedding_dim = loadGloveModel(
            pretrainedModelName)  #'glove.6B.100d.txt')

        with tf.variable_scope("embedding") as vs:
            glove_weights_initializer = tf.constant_initializer(weights)
            embedding_encode = EmbeddingInputlayer(
                inputs=encode_seqs,
                vocabulary_size=vocab_size,
                embedding_size=embedding_dim,
                E_init=glove_weights_initializer,
                name='seq_embedding')
            vs.reuse_variables()
            embedding_decode = EmbeddingInputlayer(
                inputs=decode_seqs,
                vocabulary_size=vocab_size,
                embedding_size=embedding_dim,
                E_init=glove_weights_initializer,
                name='seq_embedding')

        net_rnn = Seq2Seq(
            embedding_encode,
            embedding_decode,
            cell_fn=tf.nn.rnn_cell.LSTMCell,
            n_hidden=hidden_size,
            initializer=tf.random_uniform_initializer(-0.1, 0.1),
            encode_sequence_length=retrieve_seq_length_op2(encode_seqs),
            decode_sequence_length=retrieve_seq_length_op2(decode_seqs),
            initial_state_encode=None,
            dropout=(0.5 if is_train else None),
            n_layer=3,
            return_seq_2d=True,
            name='seq2seq')

        net_out = DenseLayer(net_rnn,
                             n_units=src_vocab_size,
                             act=tf.identity,
                             name='output')
    return net_out, net_rnn
Exemple #4
0
def _model(encode_seqs, decode_seqs, hypes, metadata, mode):
    # We add two here for start, end ids as well as unknown and pad.
    xvocab_size = len(metadata['idx2w']) + 2

    reuse = (mode != ModeKeys.TRAIN)
    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
        # for chatbot, you can use the same embedding layer
        with tf.variable_scope("embedding") as vs:
            net_encode = EmbeddingInputlayer(inputs=encode_seqs,
                                             vocabulary_size=xvocab_size,
                                             embedding_size=hypes['emb_dim'],
                                             name='seq_embedding')
            vs.reuse_variables()
            # tl.layers.set_name_reuse(True) # remove if TL version == 1.8.0+
            net_decode = EmbeddingInputlayer(inputs=decode_seqs,
                                             vocabulary_size=xvocab_size,
                                             embedding_size=hypes['emb_dim'],
                                             name='seq_embedding')
        cell_fn = tf.contrib.rnn.GRUCell if hypes[
            'cell_fn'] == 'GRU' else tf.contrib.rnn.BasicLSTMCell
        net_rnn = Seq2Seq(
            net_encode,
            net_decode,
            cell_fn=cell_fn,
            n_hidden=hypes['emb_dim'],
            initializer=tf.random_uniform_initializer(-0.1, 0.1),
            encode_sequence_length=retrieve_seq_length_op2(encode_seqs),
            decode_sequence_length=retrieve_seq_length_op2(decode_seqs),
            initial_state_encode=None,
            dropout=(hypes['dropout'] if mode == ModeKeys.TRAIN else None),
            n_layer=hypes['seq2seq']['n_layer'],
            return_seq_2d=True,
            name='seq2seq')
        net_out = DenseLayer(net_rnn,
                             n_units=xvocab_size,
                             act=tf.identity,
                             name='output')
    return net_out, net_rnn
Exemple #5
0
def seq2seq_model(encode_sequences,
                  decode_sequences,
                  vocabulary_size,
                  embedding_dim,
                  is_train=True,
                  reuse=False):
    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
        tl.layers.set_name_reuse(tf.AUTO_REUSE)
        with tf.variable_scope("embedding"):
            net_encode = EmbeddingInputlayer(inputs=encode_sequences,
                                             vocabulary_size=vocabulary_size,
                                             embedding_size=embedding_dim,
                                             name='seq_embedding')

            net_decode = EmbeddingInputlayer(inputs=decode_sequences,
                                             vocabulary_size=vocabulary_size,
                                             embedding_size=embedding_dim,
                                             name='seq_embedding')
        net_rnn = Seq2Seq(
            net_encode,
            net_decode,
            cell_fn=tf.contrib.rnn.BasicLSTMCell,
            n_hidden=embedding_dim,
            initializer=tf.random_uniform_initializer(-0.1, 0.1),
            encode_sequence_length=retrieve_seq_length_op2(encode_sequences),
            decode_sequence_length=retrieve_seq_length_op2(decode_sequences),
            initial_state_encode=None,
            dropout=(0.5 if is_train else None),
            n_layer=3,
            return_seq_2d=True,
            name='seq2seq')
        net_out = DenseLayer(net_rnn,
                             n_units=vocabulary_size,
                             act=tf.identity,
                             name='output')
    return net_out, net_rnn
target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask")  # tl.prepro.sequences_get_mask()
with tf.variable_scope("model"):
    # for chatbot, you can use the same embedding layer,
    # for translation, you may want to use 2 seperated embedding layers
    with tf.variable_scope("embedding") as vs:
        net_encode = EmbeddingInputlayer(inputs=encode_seqs, vocabulary_size=10000, embedding_size=200, name='seq_embed')
        vs.reuse_variables()
        # tl.layers.set_name_reuse(True)
        net_decode = EmbeddingInputlayer(inputs=decode_seqs, vocabulary_size=10000, embedding_size=200, name='seq_embed')
    net = Seq2Seq(
        net_encode,
        net_decode,
        cell_fn=tf.contrib.rnn.BasicLSTMCell,
        n_hidden=200,
        initializer=tf.random_uniform_initializer(-0.1, 0.1),
        encode_sequence_length=retrieve_seq_length_op2(encode_seqs),
        decode_sequence_length=retrieve_seq_length_op2(decode_seqs),
        initial_state_encode=None,
        dropout=None,
        n_layer=2,
        return_seq_2d=True,
        name='Seq2seq')
net = DenseLayer(net, n_units=10000, act=tf.identity, name='oo')
e_loss = tl.cost.cross_entropy_seq_with_mask(logits=net.outputs, target_seqs=target_seqs, input_mask=target_mask, return_details=False, name='cost')
y = tf.nn.softmax(net.outputs)

net.print_layers()
net.print_params(False)

shape = net.outputs.get_shape().as_list()
if shape[-1] != 10000:
Exemple #7
0
        net_encode = EmbeddingInputlayer(inputs=encode_seqs,
                                         vocabulary_size=10000,
                                         embedding_size=200,
                                         name='seq_embed')
        vs.reuse_variables()
        # tl.layers.set_name_reuse(True)
        net_decode = EmbeddingInputlayer(inputs=decode_seqs,
                                         vocabulary_size=10000,
                                         embedding_size=200,
                                         name='seq_embed')
    net = Seq2Seq(net_encode,
                  net_decode,
                  cell_fn=tf.contrib.rnn.BasicLSTMCell,
                  n_hidden=200,
                  initializer=tf.random_uniform_initializer(-0.1, 0.1),
                  encode_sequence_length=retrieve_seq_length_op2(encode_seqs),
                  decode_sequence_length=retrieve_seq_length_op2(decode_seqs),
                  initial_state_encode=None,
                  dropout=None,
                  n_layer=2,
                  return_seq_2d=True,
                  name='Seq2seq')
net = DenseLayer(net, n_units=10000, act=tf.identity, name='oo')
e_loss = tl.cost.cross_entropy_seq_with_mask(logits=net.outputs,
                                             target_seqs=target_seqs,
                                             input_mask=target_mask,
                                             return_details=False,
                                             name='cost')
y = tf.nn.softmax(net.outputs)

net.print_layers()