예제 #1
0
with tf.name_scope('dropout'):
    keep_prob = tf.placeholder("float", name='keep_prob')

# In[5]:

cells = [
    rnn_cell.DropoutWrapper(rnn_cell.BasicLSTMCell(num_hidden),
                            output_keep_prob=keep_prob)
    for i in range(num_layers)
]

stacked_lstm = rnn_cell.MultiRNNCell(cells)

with tf.variable_scope("decoders") as scope:
    decode_outputs, decode_state = seq2seq.embedding_attention_seq2seq(
        encode_input, decode_input, stacked_lstm, vocab_size, vocab_size,
        num_hidden)

    scope.reuse_variables()

    decode_outputs_test, decode_state_test = seq2seq.embedding_attention_seq2seq(
        encode_input,
        decode_input,
        stacked_lstm,
        vocab_size,
        vocab_size,
        num_hidden,
        feed_previous=True)

# In[6]:
예제 #2
0
    def model(self,
              mode="train",
              num_layers=1,
              cell_size=32,
              cell_type="BasicLSTMCell",
              embedding_size=20,
              learning_rate=0.0001,
              tensorboard_verbose=0,
              checkpoint_path=None):
        '''
        Build tensor specifying graph of operations for the seq2seq neural network model.
        mode = string, either "train" or "predict"
        cell_type = attribute of rnn_cell specifying which RNN cell type to use
        cell_size = size for the hidden layer in the RNN cell
        num_layers = number of RNN cell layers to use
        Return TFLearn model instance.  Use DNN model for this.
        '''
        assert mode in ["train", "predict"]

        checkpoint_path = checkpoint_path or (
            "%s%ss2s_checkpoint.tfl" %
            (self.data_dir or "", "/" if self.data_dir else ""))
        GO_VALUE = self.out_max_int + 1  # unique integer value used to trigger decoder outputs in the seq2seq RNN

        network = tflearn.input_data(
            shape=[None, self.in_seq_len + self.out_seq_len],
            dtype=tf.int32,
            name="XY")
        encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len],
                                  name="enc_in")  # get encoder inputs
        encoder_inputs = tf.unstack(
            encoder_inputs, axis=1
        )  # transform into list of self.in_seq_len elements, each [-1]

        decoder_inputs = tf.slice(network, [0, self.in_seq_len],
                                  [-1, self.out_seq_len],
                                  name="dec_in")  # get decoder inputs
        decoder_inputs = tf.unstack(
            decoder_inputs, axis=1
        )  # transform into list of self.out_seq_len elements, each [-1]

        go_input = tf.multiply(
            tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE
        )  # insert "GO" symbol as the first decoder input; drop the last decoder input
        decoder_inputs = [
            go_input
        ] + decoder_inputs[:self.out_seq_len -
                           1]  # insert GO as first; drop last decoder input

        feed_previous = not (mode == "train")

        if self.verbose > 3:
            print("feed_previous = %s" % str(feed_previous))
            print("encoder inputs: %s" % str(encoder_inputs))
            print("decoder inputs: %s" % str(decoder_inputs))
            print("len decoder inputs: %s" % len(decoder_inputs))

        self.n_input_symbols = self.in_max_int + 1  # default is integers from 0 to 9
        self.n_output_symbols = self.out_max_int + 2  # extra "GO" symbol for decoder inputs

        single_cell = getattr(rnn_cell, cell_type)(cell_size,
                                                   state_is_tuple=True)
        if num_layers == 1:
            cell = single_cell
        else:
            cell = rnn_cell.MultiRNNCell([single_cell] * num_layers)

        if self.seq2seq_model == "embedding_rnn":
            model_outputs, states = seq2seq.embedding_rnn_seq2seq(
                encoder_inputs,  # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=embedding_size,
                feed_previous=feed_previous)
        elif self.seq2seq_model == "embedding_attention":
            model_outputs, states = seq2seq.embedding_attention_seq2seq(
                encoder_inputs,  # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=embedding_size,
                num_heads=1,
                initial_state_attention=False,
                feed_previous=feed_previous)
        else:
            raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' %
                            self.seq2seq_model)

        tf.add_to_collection(
            tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model",
            model_outputs)  # for TFLearn to know what to save and restore

        # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs.
        if self.verbose > 2: print("model outputs: %s" % model_outputs)
        network = tf.stack(
            model_outputs, axis=1
        )  # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols]
        if self.verbose > 2: print("packed model outputs: %s" % network)

        if self.verbose > 3:
            all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
            print("all_vars = %s" % all_vars)

        with tf.name_scope(
                "TargetsData"
        ):  # placeholder for target variable (i.e. trainY input)
            targetY = tf.placeholder(shape=[None, self.out_seq_len],
                                     dtype=tf.int32,
                                     name="Y")

        network = tflearn.regression(network,
                                     placeholder=targetY,
                                     optimizer='adam',
                                     learning_rate=learning_rate,
                                     loss=self.sequence_loss,
                                     metric=self.accuracy,
                                     name="Y")

        model = tflearn.DNN(network,
                            tensorboard_verbose=tensorboard_verbose,
                            checkpoint_path=checkpoint_path)
        return model
예제 #3
0
    def _prepare_model(self):
        self.encode_in = [
            tf.placeholder(tf.int32, shape=(None, ), name="ei_%i" % i)
            for i in range(self.seq_in_len)
        ]

        self.labels = [
            tf.placeholder(tf.int32, shape=(None, ), name="l_%i" % i)
            for i in range(self.seq_out_len)
        ]

        loss_weights = [tf.ones_like(l, dtype=tf.float32) for l in self.labels]

        decode_in = [
            tf.zeros_like(self.encode_in[0], dtype=np.int32, name="GO")
        ] + self.labels[:-1]

        cell = rnn_cell.GRUCell(self.cell_units)

        if hasattr(self.params, 'keep_probability'):
            self.keep_prob = tf.placeholder("float")
            cell = rnn_cell.DropoutWrapper(cell,
                                           output_keep_prob=self.keep_prob)

        if hasattr(self.params, 'num_layers'):
            cell = rnn_cell.MultiRNNCell([cell] * self.params.num_layers)

        with tf.variable_scope("decoders") as scope:
            if self.params.attention:
                decode_outs, decode_state = seq2seq.embedding_attention_seq2seq(
                    encoder_inputs=self.encode_in,
                    decoder_inputs=decode_in,
                    cell=cell,
                    num_encoder_symbols=self.vocab_in_size,
                    num_decoder_symbols=self.vocab_out_size,
                    embedding_size=self.embedding_dim,
                    feed_previous=False)

                scope.reuse_variables()

                self.decode_outs_test, decode_state_test = \
                    seq2seq.embedding_attention_seq2seq(encoder_inputs=self.encode_in,
                                                        decoder_inputs=decode_in,
                                                        cell=cell,
                                                        num_encoder_symbols=self.vocab_in_size,
                                                        num_decoder_symbols=self.vocab_out_size,
                                                        embedding_size=self.embedding_dim,
                                                        feed_previous=True)

            else:
                decode_outs, decode_state = seq2seq.embedding_rnn_seq2seq(
                    encoder_inputs=self.encode_in,
                    decoder_inputs=decode_in,
                    cell=cell,
                    num_encoder_symbols=self.vocab_in_size,
                    num_decoder_symbols=self.vocab_out_size,
                    embedding_size=self.embedding_dim,
                    feed_previous=False)
                scope.reuse_variables()

                self.decode_outs_test, decode_state_test = \
                    seq2seq.embedding_rnn_seq2seq(encoder_inputs=self.encode_in,
                                                  decoder_inputs=decode_in,
                                                  cell=cell,
                                                  num_encoder_symbols=self.vocab_in_size,
                                                  num_decoder_symbols=self.vocab_out_size,
                                                  embedding_size=self.embedding_dim,
                                                  feed_previous=True)

        self.loss = seq2seq.sequence_loss(decode_outs, self.labels,
                                          loss_weights, self.vocab_out_size)
        self.optimizer = tf.train.AdamOptimizer(1e-4)
        self.train_op = self.optimizer.minimize(self.loss)
예제 #4
0
    def model(self, mode="train", num_layers=1, cell_size=32, cell_type="BasicLSTMCell", embedding_size=20, learning_rate=0.0001,
              tensorboard_verbose=0, checkpoint_path=None):
        '''
        Build tensor specifying graph of operations for the seq2seq neural network model.

        mode = string, either "train" or "predict"
        cell_type = attribute of rnn_cell specifying which RNN cell type to use
        cell_size = size for the hidden layer in the RNN cell
        num_layers = number of RNN cell layers to use

        Return TFLearn model instance.  Use DNN model for this.
        '''
        assert mode in ["train", "predict"]

        checkpoint_path = checkpoint_path or ("%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else ""))
        GO_VALUE = self.out_max_int + 1		# unique integer value used to trigger decoder outputs in the seq2seq RNN

        network = tflearn.input_data(shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY")
        encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in")	# get encoder inputs
        encoder_inputs = tf.unstack(encoder_inputs, axis=1)					# transform into list of self.in_seq_len elements, each [-1]

        decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len], name="dec_in")	# get decoder inputs
        decoder_inputs = tf.unstack(decoder_inputs, axis=1)					# transform into list of self.out_seq_len elements, each [-1]

        go_input = tf.multiply( tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE ) # insert "GO" symbol as the first decoder input; drop the last decoder input
        decoder_inputs = [go_input] + decoder_inputs[: self.out_seq_len-1]				# insert GO as first; drop last decoder input

        feed_previous = not (mode=="train")

        if self.verbose > 3:
            print ("feed_previous = %s" % str(feed_previous))
            print ("encoder inputs: %s" % str(encoder_inputs))
            print ("decoder inputs: %s" % str(decoder_inputs))
            print ("len decoder inputs: %s" % len(decoder_inputs))

        self.n_input_symbols = self.in_max_int + 1		# default is integers from 0 to 9 
        self.n_output_symbols = self.out_max_int + 2		# extra "GO" symbol for decoder inputs

        single_cell = getattr(rnn_cell, cell_type)(cell_size, state_is_tuple=True)
        if num_layers==1:
            cell = single_cell
        else:
            cell = rnn_cell.MultiRNNCell([single_cell] * num_layers)

        if self.seq2seq_model=="embedding_rnn":
            model_outputs, states = seq2seq.embedding_rnn_seq2seq(encoder_inputs,	# encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                                                                  decoder_inputs,
                                                                  cell,
                                                                  num_encoder_symbols=self.n_input_symbols,
                                                                  num_decoder_symbols=self.n_output_symbols,
                                                                  embedding_size=embedding_size,
                                                                  feed_previous=feed_previous)
        elif self.seq2seq_model=="embedding_attention":
            model_outputs, states = seq2seq.embedding_attention_seq2seq(encoder_inputs,	# encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                                                                        decoder_inputs,
                                                                        cell,
                                                                        num_encoder_symbols=self.n_input_symbols,
                                                                        num_decoder_symbols=self.n_output_symbols,
                                                                        embedding_size=embedding_size,
                                                                        num_heads=1,
                                                                        initial_state_attention=False,
                                                                        feed_previous=feed_previous)
        else:
            raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model)
            
        tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs)	# for TFLearn to know what to save and restore

        # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs.
        if self.verbose > 2: print ("model outputs: %s" % model_outputs)
        network = tf.stack(model_outputs, axis=1)		# shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols]
        if self.verbose > 2: print ("packed model outputs: %s" % network)
        
        if self.verbose > 3:
            all_vars = tf.get_collection(tf.GraphKeys.VARIABLES)
            print ("all_vars = %s" % all_vars)

        with tf.name_scope("TargetsData"):			# placeholder for target variable (i.e. trainY input)
            targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y")

        network = tflearn.regression(network, 
                                     placeholder=targetY,
                                     optimizer='adam',
                                     learning_rate=learning_rate,
                                     loss=self.sequence_loss, 
                                     metric=self.accuracy,
                                     name="Y")

        model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path)
        return model
예제 #5
0
    def __init__(self,args,infer=False):
        self.args=args
        if infer:
            args.batch_size=1
            args.seq_length=1
        if args.model=='rnn':
            cell_fn=rnn_cell.BasicRNNCell
        elif args.model=='gru':
            cell_fn=rnn_cell.GRUCell
        elif args.model=='lstm':
            cell_fn=rnn_cell.BasicLSTMCell
        else:
            raise Exception("模型不支持:{}".format(args.model))

        cell=cell_fn(args.rnn_size)

        self.cell=cell=rnn_cell.MultiRNNCell([cell]*args.num_layers)

        self.input_data=tf.placeholder(tf.int32,[args.batch_size,args.seq_length]) #(10,25)

        self.targets=tf.placeholder(tf.int32,[args.batch_size,args.seq_length])

        self.initial_state=cell.zero_state(args.batch_size,tf.float32)

        
        #因为想要达到变量共享的效果, 就要在 tf.variable_scope()的作用域下使用 tf.get_variable() 这种方式产生和提取变量. 
        #不像 tf.Variable() 每次都会产生新的变量, tf.get_variable() 如果遇到了已经存在名字的变量时, 它会单纯的提取这个同样名字的变量,
        #如果不存在名字的变量再创建.
        with tf.variable_scope("rnnlm"):
            softmax_w=tf.get_variable("softmax_w",[args.rnn_size,args.vocab_size])  #args.vocab_size=19,19个方法
            softmax_b=tf.get_variable("softmax_b",[args.vocab_size])
            #attention=tf.get_variable("attention",[1,1,args.vocab_size])
            '''
            with tf.device("/cpu:0"):
                embedding=tf.get_variable("embedding",[args.vocab_size,args.rnn_size])
                
                #输入数据 self.input_data 的维度是 (batch_size , seq_length)
                #而输出的input_embedding 的维度成为 (batch_size ,num_steps ,rnn_size).   就是一个立方体,每个样例就是从头顶上削一片下来

                #词嵌入后成了这样一个三维数组,里面每一个元素是一个二维数组(25,32)
                temp=tf.nn.embedding_lookup(embedding,self.input_data)   #(10,25,32)

                
                #tf.split()函数将长方体按每一列切片,切成了25个片,每一片都是(10,32),表示这是这一批样本们的第t个特征,即在第xt时间步传入的input,embedding代替了ont-hot
                inputs=tf.split(1,args.seq_length,temp)   #len(inputs)=25
                #print(inputs[0].shape)    (10,1,32)
                #删除维度1  (10,32)   #每个数据从一列变成了一个扁平的长方形
                inputs=[tf.squeeze(input_,[1]) for input_ in inputs]
        '''
        '''
        def loop(prev,_):
            prev=tf.matmul(prev,softmax_w)+softmax_b
            
            #axis=1的时候,将每一行最大元素所在的索引记录下来,最后返回每一行最大元素所在的索引数组
            prev_symbol=tf.stop_gradient(tf.argmax(prev,1))
            #stop_gradients也是一个list,list中的元素是tensorflow graph中的op,
            # 一旦进入这个list,将不会被计算梯度,更重要的是,在该op之后的BP计算都不会运行
            return tf.nn.embedding_lookup(embedding,prev_symbol)
        '''
        
        inputs=tf.split(1,args.seq_length,self.input_data)
        inputs=[tf.squeeze(input_,[1]) for input_ in inputs]

        #inputss=[tf.reshape(self.input_data[:,i],-1) for i in range(args.seq_length)]
        
        outputs,last_state=seq2seq.embedding_attention_seq2seq(inputs,inputs,cell,args.vocab_size,args.vocab_size,
                                                                args.rnn_size)

        #outputs,last_state=seq2seq.attention_decoder(inputs,self.initial_state, attention,cell,loop_function=loop if infer else None,scope='rnnlm')
        #outputs,last_state=seq2seq.rnn_decoder(inputs,self.initial_state,cell,loop_function=loop if infer else None,scope='rnnlm')
        
        self.saved_outputs=outputs 
        
        #print(len(outputs))  #是一个三维数组,有25个元素,对应步长,每个元素是一个二维数组(10,32)
        output=tf.reshape(tf.concat(1,outputs),[-1,args.vocab_size])
        #print(output)     //(250,32),将这25个(10,32)的二维数组按行堆叠了起来,行数变成了10*25
        
        #网络的最后输出(相当于最后添加了一个全连接层)
        #self.logits=tf.matmul(output,softmax_w)+softmax_b  #(250,19)
        self.logits=output
        #过一个softmax
        
        self.probs=tf.nn.softmax(self.logits)


        #参数要求:output [batch*numsteps, vocab_size]
        #target, [batch_size, num_steps]
        #weight:[tf.ones([batch_size * num_steps]
        #output具体的维度讲解见chrome"https://blog.csdn.net/xyz1584172808/article/details/83056179?depth_1-utm_source=distribute.pc_relevant.none-task&utm_source=distribute.pc_relevant.none-task"
        loss=seq2seq.sequence_loss_by_example([self.logits],[tf.reshape(self.targets,[-1])],[tf.ones([args.batch_size*args.seq_length])],args.vocab_size)


        self.cost=tf.reduce_sum(loss)/args.batch_size/args.seq_length
        self.final_state=last_state

        self.lr=tf.Variable(0.0,trainable=False)
        tvars=tf.trainable_variables()

        grads,_=tf.clip_by_global_norm(tf.gradients(self.cost,tvars),args.grad_clip)


        optimizer=tf.train.AdamOptimizer(self.lr)
        self.train_op=optimizer.apply_gradients(zip(grads,tvars))