Beispiel #1
0
def get_model(feed_previous=False):
    learning_rate = tf.Variable(float(init_learning_rate), trainable=False, dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in range(input_seq_len):
        encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i)))
    for i in range(output_seq_len + 1):
        decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i)))
    for i in range(output_seq_len):
        target_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i)))

    targets = [decoder_inputs[i + 1] for i in range(output_seq_len)]

    cell = tf.contrib.rnn.BasicLSTMCell(size)

    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=feed_previous,
        dtype=tf.float32)

    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    opt = tf.train.GradientDescentOptimizer(learning_rate)
    update = opt.apply_gradients(opt.compute_gradients(loss))
    saver = tf.train.Saver(tf.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
Beispiel #2
0
def get_model(feed_previous=False):
    """构造模型
    """

    learning_rate = tf.Variable(float(init_learning_rate),
                                trainable=False,
                                dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in range(input_seq_len):
        encoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="encoder{0}".format(i)))
    for i in range(output_seq_len + 1):
        decoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="decoder{0}".format(i)))
    for i in range(output_seq_len):
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[None],
                           name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in range(output_seq_len)]

    # cell = tf.contrib.rnn.BasicLSTMCell(size)
    dropout = 1
    num_layers = 3
    cell = tf.contrib.rnn.BasicLSTMCell(size)
    cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=dropout)
    cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers)  # 纵向上有两个LSTM

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=feed_previous,
        dtype=tf.float32)

    # 计算加权交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    opt = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    # 优化目标:让loss最小化
    # update = opt.apply_gradients(opt.compute_gradients(loss))
    # 模型持久化
    saver = tf.train.Saver(tf.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, opt, saver, learning_rate_decay_op, learning_rate
Beispiel #3
0
def get_model(feed_previous=False):
    """
    构造模型:seq2seq
    feed_previous表示decoder_inputs是我们直接提供训练数据的输入,
    还是用前一个RNNCell的输出映射出来的,如果feed_previous为True,
    那么就是用前一个RNNCell的输出,并经过Wx+b线性变换成
    """

    learning_rate = tf.Variable(float(init_learning_rate),
                                trainable=False,
                                dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in range(input_seq_len):
        encoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="encoder{0}".format(i)))
    for i in range(output_seq_len + 1):
        decoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="decoder{0}".format(i)))
    for i in range(output_seq_len):
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[None],
                           name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in range(output_seq_len)]

    cell = tf.contrib.rnn.BasicLSTMCell(size)

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=feed_previous,
        dtype=tf.float32)

    # 计算加权交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    opt = tf.train.GradientDescentOptimizer(learning_rate)
    # 优化目标:让loss最小化
    update = opt.apply_gradients(opt.compute_gradients(loss))
    # 模型持久化
    saver = tf.train.Saver(tf.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
Beispiel #4
0
 def EmbeddingAttentionSeq2SeqNoTuple(enc_inp, dec_inp, feed_previous):
   cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
   return seq2seq_lib.embedding_attention_seq2seq(
       enc_inp,
       dec_inp,
       cell,
       num_encoder_symbols,
       num_decoder_symbols,
       embedding_size=2,
       feed_previous=feed_previous)
Beispiel #5
0
 def GRUSeq2Seq(enc_inp, dec_inp):
   cell = core_rnn_cell_impl.MultiRNNCell(
       [core_rnn_cell_impl.GRUCell(24)] * 2, state_is_tuple=True)
   return seq2seq_lib.embedding_attention_seq2seq(
       enc_inp,
       dec_inp,
       cell,
       num_encoder_symbols=classes,
       num_decoder_symbols=classes,
       embedding_size=24)
Beispiel #6
0
 def EmbeddingAttentionSeq2SeqNoTuple(enc_inp, dec_inp, feed_previous):
   cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
   return seq2seq_lib.embedding_attention_seq2seq(
       enc_inp,
       dec_inp,
       cell,
       num_encoder_symbols,
       num_decoder_symbols,
       embedding_size=2,
       feed_previous=feed_previous)
Beispiel #7
0
 def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
     return seq2seq.embedding_attention_seq2seq(encoder_inputs=encoder_inputs,
                                                decoder_inputs=decoder_inputs,
                                                cell=cell,
                                                num_encoder_symbols=self.vocab_size,
                                                num_decoder_symbols=self.vocab_size,
                                                embedding_size=embedding_size,
                                                output_projection=output_projection,
                                                feed_previous=do_decode,
                                                beam_search=beam_search,
                                                beam_size=beam_size)
Beispiel #8
0
 def GRUSeq2Seq(enc_inp, dec_inp):
   cell = rnn_cell.MultiRNNCell(
       [rnn_cell.GRUCell(24) for _ in range(2)], state_is_tuple=True)
   return seq2seq_lib.embedding_attention_seq2seq(
       enc_inp,
       dec_inp,
       cell,
       num_encoder_symbols=classes,
       num_decoder_symbols=classes,
       embedding_size=24,
       output_projection=(w, b))
Beispiel #9
0
 def GRUSeq2Seq(enc_inp, dec_inp):
   cell = rnn_cell.MultiRNNCell(
       [rnn_cell.GRUCell(24) for _ in range(2)], state_is_tuple=True)
   return seq2seq_lib.embedding_attention_seq2seq(
       enc_inp,
       dec_inp,
       cell,
       num_encoder_symbols=classes,
       num_decoder_symbols=classes,
       embedding_size=24,
       output_projection=(w, b))
Beispiel #10
0
def get_model():
    # 这个方法需要的参数分别是:inputs_tensor,decoder_tensor,cell,类似与vocab_size的symbols,虽然我不知道encoder_symbolsy有什么用
    # 然后是embed_size,应该和cell的size一样,然后是需不需要softmax,decode_inputs是来自前面的RNNcell还是我们自己输入,最后是数
    # 据类型
    '''
    embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs,
        cell,
        num_encoder_symbols,
        num_decoder_symbols,
        embedding_size,
        num_heads=1,
        output_projection=None,
        feed_previous=False,
        dtype=None,
        scope=None,
        initial_state_attention=False
    )
    '''
    encoder_inputs = []
    decoder_inputs = []
    targets_weigh = []
    for i in range(input_seq_len):
        encoder_inputs.append(tf.placeholder(shape=[None],dtype=tf.int32,name="encoder{0}".format(i)))
    for i in range(output_seq_len):
        decoder_inputs.append(tf.placeholder(shape=[None],dtype=tf.int32,name="decode{0}".format(i)))
    for i in range(output_seq_len):
        targets_weigh.append(
            tf.placeholder(shape=[None],dtype=tf.float32,name="weight{0}".format(i))
        )
    targets = [decoder_inputs[i] for i in range(1,output_seq_len)]
    targets.append(np.zeros(shape=[2],dtype=np.int32))
    cell = tf.nn.rnn_cell.BasicLSTMCell(size)
    outputs,_ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs,
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=False,
        dtype=tf.float32
    )
    loss = seq2seq.sequence_loss(
        outputs,targets,targets_weigh
    )
    opt = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    update = opt.apply_gradients(opt.compute_gradients(loss))
    saver = tf.train.Saver(tf.global_variables())
    return encoder_inputs,decoder_inputs,targets_weigh,outputs,loss,update,saver
    pass
Beispiel #11
0
def get_model(feed_previous=False):
    learning_rate = tf.Variable(float(init_learning_rate),
                                trainable=False,
                                dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in range(input_seq_len):
        encoder_inputs.append(
            tf.compat.v1.placeholder(tf.int32,
                                     shape=[None],
                                     name="encoder{0}".format(i)))
    for i in range(output_seq_len + 1):
        decoder_inputs.append(
            tf.compat.v1.placeholder(tf.int32,
                                     shape=[None],
                                     name="decoder{0}".format(i)))
    for i in range(output_seq_len):
        target_weights.append(
            tf.compat.v1.placeholder(tf.float32,
                                     shape=[None],
                                     name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in range(output_seq_len)]

    cell = tf.contrib.rnn.BasicLSTMCell(size)

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        # 是一个(W, B)结构的tuple,W是shape为[output_size x num_decoder_symbols]的weight矩阵,B是shape为[num_decoder_symbols]的偏置向量
        feed_previous=feed_previous,
        dtype=tf.float32)

    # 计算交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    opt = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    # 优化目标:让loss最小化
    update = opt.apply_gradients(opt.compute_gradients(loss))
    # 模型持久化,保存所有的变量
    saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
Beispiel #12
0
def get_model(feed_previous=False):
    """构造模型
    """

    learning_rate = tf.Variable(float(init_learning_rate),
                                trainable=False,
                                dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.99)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in range(input_seq_len):
        encoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="encoder{0}".format(i)))
    for i in range(output_seq_len + 1):
        decoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="decoder{0}".format(i)))
    for i in range(output_seq_len):
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[None],
                           name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in range(output_seq_len)]

    cell = tf.contrib.rnn.LSTMCell(size)

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=feed_previous,
        dtype=tf.float32)

    # 计算加权交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    update = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    # 模型持久化
    saver = tf.train.Saver(tf.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
Beispiel #13
0
def get_model(feed_previous=False):
    """构造模型
    """
    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in xrange(input_seq_len):
        encoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="encoder{0}".format(i)))
    for i in xrange(output_seq_len + 1):
        decoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="decoder{0}".format(i)))
    for i in xrange(output_seq_len):
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[None],
                           name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in xrange(output_seq_len)]

    cell = tf.contrib.rnn.BasicLSTMCell(size)

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=feed_previous,
        dtype=tf.float32)

    # 计算加权交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    opt = tf.train.GradientDescentOptimizer(learning_rate)
    # 优化目标:让loss最小化
    update = opt.apply_gradients(opt.compute_gradients(loss))
    # 模型持久化
    saver = tf.train.Saver(tf.global_variables())
    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver
Beispiel #14
0
def get_model(feed_previous=False):
    """构造模型
    """

    learning_rate = tf.Variable(float(init_learning_rate), trainable=False, dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in xrange(input_seq_len):
        encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i)))
    for i in xrange(output_seq_len + 1):
        decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i)))
    for i in xrange(output_seq_len):
        target_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in xrange(output_seq_len)]

    cell = tf.contrib.rnn.BasicLSTMCell(size)

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
                        encoder_inputs,
                        decoder_inputs[:output_seq_len],
                        cell,
                        num_encoder_symbols=num_encoder_symbols,
                        num_decoder_symbols=num_decoder_symbols,
                        embedding_size=size,
                        output_projection=None,
                        feed_previous=feed_previous,
                        dtype=tf.float32)

    # 计算加权交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    opt = tf.train.GradientDescentOptimizer(learning_rate)
    # 优化目标:让loss最小化
    update = opt.apply_gradients(opt.compute_gradients(loss))
    # 模型持久化
    saver = tf.train.Saver(tf.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
Beispiel #15
0
    def model(self,
              mode="train",
              num_layers=1,
              cell_size=128,
              cell_type="BasicLSTMCell",
              embedding_size=20,
              learning_rate=0.001,
              tensorboard_verbose=0,
              checkpoint_path=None):
        '''
		Build tensor specifying graph of operations for the seq2seq neural network model.

		mode = string, either "train" or "predict"
		cell_type = attribute of rnn_cell specifying which RNN cell type to use
		cell_size = size for the hidden layer in the RNN cell
		num_layers = number of RNN cell layers to use

		Return TFLearn model instance.  Use DNN model for this.
		'''
        assert mode in ["train", "predict"]

        checkpoint_path = checkpoint_path or (
            "%s%ss2s_checkpoint.tfl" %
            (self.data_dir or "", "/" if self.data_dir else ""))
        GO_VALUE = self.out_max_int + 1  # unique integer value used to trigger decoder outputs in the seq2seq RNN
        tflearn.config.init_graph(seed=None,
                                  log_device=False,
                                  num_cores=int(cpu_count() * 2 / 3),
                                  gpu_memory_fraction=0,
                                  soft_placement=True)
        network = tflearn.input_data(
            shape=[None, self.in_seq_len + self.out_seq_len],
            dtype=tf.int32,
            name="XY")
        encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len],
                                  name="enc_in")  # get encoder inputs
        encoder_inputs = tf.unstack(
            encoder_inputs, axis=1
        )  # transform into list of self.in_seq_len elements, each [-1]

        decoder_inputs = tf.slice(network, [0, self.in_seq_len],
                                  [-1, self.out_seq_len],
                                  name="dec_in")  # get decoder inputs
        decoder_inputs = tf.unstack(
            decoder_inputs, axis=1
        )  # transform into list of self.out_seq_len elements, each [-1]

        go_input = tf.multiply(
            tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE
        )  # insert "GO" symbol as the first decoder input; drop the last decoder input
        decoder_inputs = [
            go_input
        ] + decoder_inputs[:self.out_seq_len -
                           1]  # insert GO as first; drop last decoder input

        feed_previous = not (mode == "train")

        if self.verbose > 3:
            print("feed_previous = %s" % str(feed_previous))
            print("encoder inputs: %s" % str(encoder_inputs))
            print("decoder inputs: %s" % str(decoder_inputs))
            print("len decoder inputs: %s" % len(decoder_inputs))

        self.n_input_symbols = self.in_max_int + 1  # default is integers from 0 to 9
        self.n_output_symbols = self.out_max_int + 2  # extra "GO" symbol for decoder inputs

        single_cell = getattr(tf.nn.rnn_cell, cell_type)(cell_size,
                                                         state_is_tuple=True)
        if num_layers == 1:
            print("rnn net later number:{}".format(num_layers))
            cell = single_cell
        else:
            print("rnn net later number:{}".format(num_layers))
            cell = rnn_cell.MultiRNNCell([single_cell] * num_layers)

        if self.seq2seq_model == "embedding_rnn":
            model_outputs, states = seq2seq.embedding_rnn_seq2seq(
                encoder_inputs,
                # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=embedding_size,
                feed_previous=feed_previous)
        elif self.seq2seq_model == "embedding_attention":
            model_outputs, states = seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=embedding_size,
                num_heads=1,
                initial_state_attention=False,
                feed_previous=feed_previous)
        else:
            raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' %
                            self.seq2seq_model)

        tf.add_to_collection(
            tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model",
            model_outputs)  # for TFLearn to know what to save and restore

        # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs.
        if self.verbose > 2:
            print("model outputs: %s" % model_outputs)
        network = tf.stack(
            model_outputs, axis=1
        )  # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols]
        if self.verbose > 2:
            print("packed model outputs: %s" % network)

        if self.verbose > 3:
            all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
            print("all_vars = %s" % all_vars)

        with tf.name_scope(
                "TargetsData"
        ):  # placeholder for target variable (i.e. trainY input)
            targetY = tf.placeholder(shape=[None, self.out_seq_len],
                                     dtype=tf.int32,
                                     name="Y")

        network = tflearn.regression(network,
                                     placeholder=targetY,
                                     optimizer='adam',
                                     learning_rate=learning_rate,
                                     loss=self.sequence_loss,
                                     metric=self.accuracy,
                                     name="Y")

        model = tflearn.DNN(network,
                            tensorboard_verbose=tensorboard_verbose,
                            checkpoint_path=checkpoint_path)
        return model
Beispiel #16
0
    def create_network(self):
        self.seq2seq_model = "embedding_attention"
        mode = "train"
        GO_VALUE = self.out_max_int + 1

        self.net = tflearn.input_data(shape=[None, self.in_seq_len],
                                      dtype=tf.int32,
                                      name="XY")
        encoder_inputs = tf.slice(self.net, [0, 0], [-1, self.in_seq_len],
                                  name="enc_in")  # get encoder inputs
        encoder_inputs = tf.unstack(
            encoder_inputs,
            axis=1)  #transform to list of self.in_seq_len elements, each [-1]

        decoder_inputs = tf.slice(self.net, [0, 0], [-1, self.out_seq_len],
                                  name="dec_in")
        decoder_inputs = tf.unstack(
            decoder_inputs,
            axis=1)  # transform into list of self.out_seq_len elements

        go_input = tf.multiply(tf.ones_like(decoder_inputs[0], dtype=tf.int32),
                               GO_VALUE)
        decoder_inputs = [
            go_input
        ] + decoder_inputs[:self.out_seq_len -
                           1]  # insert GO as first; drop last decoder input

        feed_previous = not (mode == "train")

        self.n_input_symbols = self.in_max_int + 1  # default is integers from 0 to 9
        self.n_output_symbols = self.out_max_int + 2  # extra "GO" symbol for decoder inputs

        cell = rnn.MultiRNNCell([
            rnn.GRUCell(128),
            rnn.GRUCell(128),
            rnn.GRUCell(128),
            rnn.GRUCell(128),
            rnn.GRUCell(128),
            rnn.GRUCell(128),
            rnn.GRUCell(128),
            rnn.GRUCell(128)
        ])

        if self.seq2seq_model == "embedding_rnn":
            model_outputs, states = seq2seq.embedding_rnn_seq2seq(
                encoder_inputs,
                # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=200,
                feed_previous=feed_previous)
        elif self.seq2seq_model == "embedding_attention":
            model_outputs, states = seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=200,
                num_heads=1,
                initial_state_attention=False,
                feed_previous=feed_previous)
        else:
            raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' %
                            self.seq2seq_model)

        tf.add_to_collection(
            tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model",
            model_outputs)  # for TFLearn to know what to save and restore
        self.net = tf.stack(
            model_outputs, axis=1
        )  # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols]

        with tf.name_scope(
                "TargetsData"
        ):  # placeholder for target variable (i.e. trainY input)
            targetY = tf.placeholder(shape=[None, self.out_seq_len],
                                     dtype=tf.int32,
                                     name="Y")

        self.net = tflearn.regression(self.net,
                                      placeholder=targetY,
                                      optimizer='adam',
                                      learning_rate=0.00005,
                                      loss=self.sequence_loss,
                                      metric=self.accuracy,
                                      name="Y")

        self.model = tflearn.DNN(self.net)
Beispiel #17
0
	def model(self, mode="train", num_layers=1, cell_size=128, cell_type="BasicLSTMCell", embedding_size=20,
	          learning_rate=0.001,
	          tensorboard_verbose=0, checkpoint_path=None):
		'''
		Build tensor specifying graph of operations for the seq2seq neural network model.

		mode = string, either "train" or "predict"
		cell_type = attribute of rnn_cell specifying which RNN cell type to use
		cell_size = size for the hidden layer in the RNN cell
		num_layers = number of RNN cell layers to use

		Return TFLearn model instance.  Use DNN model for this.
		'''
		assert mode in ["train", "predict"]

		checkpoint_path = checkpoint_path or (
			"%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else ""))
		GO_VALUE = self.out_max_int + 1  # unique integer value used to trigger decoder outputs in the seq2seq RNN
		tflearn.config.init_graph(seed=None, log_device=False, num_cores=int(cpu_count() * 2 / 3),
		                          gpu_memory_fraction=0, soft_placement=True)
		network = tflearn.input_data(shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY")
		encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in")  # get encoder inputs
		encoder_inputs = tf.unstack(encoder_inputs,
		                            axis=1)  # transform into list of self.in_seq_len elements, each [-1]

		decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len],
		                          name="dec_in")  # get decoder inputs
		decoder_inputs = tf.unstack(decoder_inputs,
		                            axis=1)  # transform into list of self.out_seq_len elements, each [-1]

		go_input = tf.multiply(tf.ones_like(decoder_inputs[0], dtype=tf.int32),
		                       GO_VALUE)  # insert "GO" symbol as the first decoder input; drop the last decoder input
		decoder_inputs = [go_input] + decoder_inputs[
		                              : self.out_seq_len - 1]  # insert GO as first; drop last decoder input

		feed_previous = not (mode == "train")

		if self.verbose > 3:
			print("feed_previous = %s" % str(feed_previous))
			print("encoder inputs: %s" % str(encoder_inputs))
			print("decoder inputs: %s" % str(decoder_inputs))
			print("len decoder inputs: %s" % len(decoder_inputs))

		self.n_input_symbols = self.in_max_int + 1  # default is integers from 0 to 9
		self.n_output_symbols = self.out_max_int + 2  # extra "GO" symbol for decoder inputs

		single_cell = getattr(tf.nn.rnn_cell, cell_type)(cell_size, state_is_tuple=True)
		if num_layers == 1:
			print("rnn net later number:{}".format(num_layers))
			cell = single_cell
		else:
			print("rnn net later number:{}".format(num_layers))
			cell = rnn_cell.MultiRNNCell([single_cell] * num_layers)

		if self.seq2seq_model == "embedding_rnn":
			model_outputs, states = seq2seq.embedding_rnn_seq2seq(encoder_inputs,
			                                                      # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
			                                                      decoder_inputs,
			                                                      cell,
			                                                      num_encoder_symbols=self.n_input_symbols,
			                                                      num_decoder_symbols=self.n_output_symbols,
			                                                      embedding_size=embedding_size,
			                                                      feed_previous=feed_previous)
		elif self.seq2seq_model == "embedding_attention":
			model_outputs, states = seq2seq.embedding_attention_seq2seq(encoder_inputs,
			                                                            # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
			                                                            decoder_inputs,
			                                                            cell,
			                                                            num_encoder_symbols=self.n_input_symbols,
			                                                            num_decoder_symbols=self.n_output_symbols,
			                                                            embedding_size=embedding_size,
			                                                            num_heads=1,
			                                                            initial_state_attention=False,
			                                                            feed_previous=feed_previous)
		else:
			raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model)

		tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model",
		                     model_outputs)  # for TFLearn to know what to save and restore

		# model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs.
		if self.verbose > 2:
			print("model outputs: %s" % model_outputs)
		network = tf.stack(model_outputs, axis=1)  # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols]
		if self.verbose > 2:
			print("packed model outputs: %s" % network)

		if self.verbose > 3:
			all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
			print("all_vars = %s" % all_vars)

		with tf.name_scope("TargetsData"):  # placeholder for target variable (i.e. trainY input)
			targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y")

		network = tflearn.regression(network,
		                             placeholder=targetY,
		                             optimizer='adam',
		                             learning_rate=learning_rate,
		                             loss=self.sequence_loss,
		                             metric=self.accuracy,
		                             name="Y")

		model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path)
		return model
Beispiel #18
0
  def testEmbeddingAttentionSeq2Seq(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        enc_inp = [
            constant_op.constant(
                1, dtypes.int32, shape=[2]) for i in range(2)
        ]
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]
        cell_fn = lambda: core_rnn_cell_impl.BasicLSTMCell(2)
        cell = cell_fn()
        dec, mem = seq2seq_lib.embedding_attention_seq2seq(
            enc_inp,
            dec_inp,
            cell,
            num_encoder_symbols=2,
            num_decoder_symbols=5,
            embedding_size=2)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 5), res[0].shape)

        res = sess.run([mem])
        self.assertEqual((2, 2), res[0].c.shape)
        self.assertEqual((2, 2), res[0].h.shape)

        # Test with state_is_tuple=False.
        with variable_scope.variable_scope("no_tuple"):
          cell_fn = functools.partial(
              core_rnn_cell_impl.BasicLSTMCell,
              2, state_is_tuple=False)
          cell_nt = cell_fn()
          dec, mem = seq2seq_lib.embedding_attention_seq2seq(
              enc_inp,
              dec_inp,
              cell_nt,
              num_encoder_symbols=2,
              num_decoder_symbols=5,
              embedding_size=2)
          sess.run([variables.global_variables_initializer()])
          res = sess.run(dec)
          self.assertEqual(3, len(res))
          self.assertEqual((2, 5), res[0].shape)

          res = sess.run([mem])
          self.assertEqual((2, 4), res[0].shape)

        # Test externally provided output projection.
        w = variable_scope.get_variable("proj_w", [2, 5])
        b = variable_scope.get_variable("proj_b", [5])
        with variable_scope.variable_scope("proj_seq2seq"):
          dec, _ = seq2seq_lib.embedding_attention_seq2seq(
              enc_inp,
              dec_inp,
              cell_fn(),
              num_encoder_symbols=2,
              num_decoder_symbols=5,
              embedding_size=2,
              output_projection=(w, b))
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 2), res[0].shape)
Beispiel #19
0
  def testEmbeddingAttentionSeq2Seq(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        enc_inp = [
            constant_op.constant(
                1, dtypes.int32, shape=[2]) for i in range(2)
        ]
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]
        cell_fn = lambda: core_rnn_cell_impl.BasicLSTMCell(2)
        cell = cell_fn()
        dec, mem = seq2seq_lib.embedding_attention_seq2seq(
            enc_inp,
            dec_inp,
            cell,
            num_encoder_symbols=2,
            num_decoder_symbols=5,
            embedding_size=2)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 5), res[0].shape)

        res = sess.run([mem])
        self.assertEqual((2, 2), res[0].c.shape)
        self.assertEqual((2, 2), res[0].h.shape)

        # Test with state_is_tuple=False.
        with variable_scope.variable_scope("no_tuple"):
          cell_fn = functools.partial(
              core_rnn_cell_impl.BasicLSTMCell,
              2, state_is_tuple=False)
          cell_nt = cell_fn()
          dec, mem = seq2seq_lib.embedding_attention_seq2seq(
              enc_inp,
              dec_inp,
              cell_nt,
              num_encoder_symbols=2,
              num_decoder_symbols=5,
              embedding_size=2)
          sess.run([variables.global_variables_initializer()])
          res = sess.run(dec)
          self.assertEqual(3, len(res))
          self.assertEqual((2, 5), res[0].shape)

          res = sess.run([mem])
          self.assertEqual((2, 4), res[0].shape)

        # Test externally provided output projection.
        w = variable_scope.get_variable("proj_w", [2, 5])
        b = variable_scope.get_variable("proj_b", [5])
        with variable_scope.variable_scope("proj_seq2seq"):
          dec, _ = seq2seq_lib.embedding_attention_seq2seq(
              enc_inp,
              dec_inp,
              cell_fn(),
              num_encoder_symbols=2,
              num_decoder_symbols=5,
              embedding_size=2,
              output_projection=(w, b))
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 2), res[0].shape)
Beispiel #20
0
# add one more target
targets.append(tf.placeholder(dtype = tf.int32, shape = [None], name = 'last_target'))
target_weights = [tf.placeholder(dtype = tf.float32, shape = [None], name = 'target_w{}'.format(i)) for i in range(output_seq_len)]

# output projection
size = 512
w_t = tf.get_variable('proj_w', [en_vocab_size, size], tf.float32)
b = tf.get_variable('proj_b', [en_vocab_size], tf.float32)
w = tf.transpose(w_t)
output_projection = (w, b)

outputs, states = seq2seq_lib.embedding_attention_seq2seq(
                                            encoder_inputs,
                                            decoder_inputs,
                                            BasicLSTMCell(size),
                                            num_encoder_symbols = zh_vocab_size,
                                            num_decoder_symbols = en_vocab_size,
                                            embedding_size = 100,
                                            feed_previous = False,
                                            output_projection = output_projection,
                                            dtype = tf.float32)

# define our loss function

def sampled_loss(labels, logits):
    return tf.nn.sampled_softmax_loss(
                        weights = w_t,
                        biases = b,
                        labels = tf.reshape(labels, [-1, 1]),
                        inputs = logits,
                        num_sampled = 512,
                        num_classes = en_vocab_size)
Beispiel #21
0
  def testEmbeddingAttentionSeq2Seq(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        enc_inp = [
            constant_op.constant(
                1, dtypes.int32, shape=[2]) for i in range(2)
        ]
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]
        cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=True)
        dec, mem = seq2seq_lib.embedding_attention_seq2seq(
            enc_inp,
            dec_inp,
            cell,
            num_encoder_symbols=2,
            num_decoder_symbols=5,
            embedding_size=2)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 5), res[0].shape)

        res = sess.run([mem])
        self.assertEqual((2, 2), res[0].c.shape)
        self.assertEqual((2, 2), res[0].h.shape)

        # Test with state_is_tuple=False.
        with variable_scope.variable_scope("no_tuple"):
          cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
          dec, mem = seq2seq_lib.embedding_attention_seq2seq(
              enc_inp,
              dec_inp,
              cell,
              num_encoder_symbols=2,
              num_decoder_symbols=5,
              embedding_size=2)
          sess.run([variables.global_variables_initializer()])
          res = sess.run(dec)
          self.assertEqual(3, len(res))
          self.assertEqual((2, 5), res[0].shape)

          res = sess.run([mem])
          self.assertEqual((2, 4), res[0].shape)

        # Test externally provided output projection.
        w = variable_scope.get_variable("proj_w", [2, 5])
        b = variable_scope.get_variable("proj_b", [5])
        with variable_scope.variable_scope("proj_seq2seq"):
          dec, _ = seq2seq_lib.embedding_attention_seq2seq(
              enc_inp,
              dec_inp,
              cell,
              num_encoder_symbols=2,
              num_decoder_symbols=5,
              embedding_size=2,
              output_projection=(w, b))
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 2), res[0].shape)

        # Test that previous-feeding model ignores inputs after the first.
        dec_inp2 = [
            constant_op.constant(
                0, dtypes.int32, shape=[2]) for _ in range(3)
        ]
        with variable_scope.variable_scope("other"):
          d3, _ = seq2seq_lib.embedding_attention_seq2seq(
              enc_inp,
              dec_inp2,
              cell,
              num_encoder_symbols=2,
              num_decoder_symbols=5,
              embedding_size=2,
              feed_previous=constant_op.constant(True))
        sess.run([variables.global_variables_initializer()])
        variable_scope.get_variable_scope().reuse_variables()
        d1, _ = seq2seq_lib.embedding_attention_seq2seq(
            enc_inp,
            dec_inp,
            cell,
            num_encoder_symbols=2,
            num_decoder_symbols=5,
            embedding_size=2,
            feed_previous=True)
        d2, _ = seq2seq_lib.embedding_attention_seq2seq(
            enc_inp,
            dec_inp2,
            cell,
            num_encoder_symbols=2,
            num_decoder_symbols=5,
            embedding_size=2,
            feed_previous=True)
        res1 = sess.run(d1)
        res2 = sess.run(d2)
        res3 = sess.run(d3)
        self.assertAllClose(res1, res2)
        self.assertAllClose(res1, res3)