Exemple #1
0
 def get_dec_cell(self, cell_size):
   cell = core_rnn_cell.GRUCell(cell_size)
   if self.phase_train:
     cell = core_rnn_cell.DropoutWrapper(
         cell, input_keep_prob=0.5, output_keep_prob=0.5)
   cell = core_rnn_cell.InputProjectionWrapper(cell, cell_size)
   return cell
Exemple #2
0
 def get_enc_cell(self, cell_size, vocab_size):
     cell = core_rnn_cell.GRUCell(cell_size)
     # TODO
     # if self.is_training:
     # cell = core_rnn_cell.DropoutWrapper(cell, 0.5, 0.5)
     cell = core_rnn_cell.InputProjectionWrapper(cell, cell_size)
     cell = core_rnn_cell.OutputProjectionWrapper(cell, cell_size)
     return cell
Exemple #3
0
 def get_cell(cell_type, hidden_size):
     if cell_type == 'vanilla':
         return core_rnn_cell.BasicRNNCell(num_units=hidden_size)
     elif cell_type == 'GRU':
         return core_rnn_cell.GRUCell(num_units=hidden_size)
     elif cell_type == 'LSTM':
         return core_rnn_cell.LSTMCell(num_units=hidden_size,
                                       state_is_tuple=False)
Exemple #4
0
def inference_test(examples, batch_size, memory_dim, seq_len, feat_dim):
    """ test inference without transpose matrix 

    """
    dec_inp = ([tf.zeros_like(examples[0], dtype=tf.float32, name="GO")] +
               examples[:-1])

    cell = core_rnn_cell.GRUCell(memory_dim)
    dec_outputs, enc_memory, dec_memory = seq2seq.basic_rnn_seq2seq_with_bottle_memory(
        examples, dec_inp, cell)
    ### no transition here ###

    return dec_outputs
Exemple #5
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     is_training=True,
                     sparse_labels=None,
                     label_weights=None,
                     input_weights=None,
                     dense_labels=None,
                     **unused_params):
        self.input_size = 1024 + 128
        self.cell_size = self.input_size
        self.num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
        # model_input = utils.SampleRandomSequence(model_input, num_frames,
        # self.max_steps)
        self.input_weights_2d = input_weights
        self.input_weights = tf.tile(tf.expand_dims(input_weights, 2),
                                     [1, 1, self.input_size])
        self.model_input = model_input * self.input_weights

        self.runtime_batch_size = tf.shape(self.model_input)[0]
        self.init_state = tf.reduce_sum(self.model_input,
                                        axis=1) / self.num_frames
        self.dec_cell = core_rnn_cell.GRUCell(self.cell_size)
        self.vocab_size = vocab_size
        # TODO
        if self.num_max_labels == 1:
            self.sparse_labels = tf.reshape(sparse_labels, [-1])
            self.target_labels = tf.reshape(label_weights, [-1])
        else:
            self.sparse_labels = sparse_labels
            self.target_labels = label_weights
        if is_training:
            predictions, loss = lstm_memnet_train.train(
                self, decoder_fn=embedding_attention_decoder)
        else:
            predictions, loss = lstm_memnet_train.eval(
                self,
                decoder_fn=embedding_attention_decoder,
                linear_fn=_linear)

        return {
            "predictions": predictions,
            "loss": loss,
        }
Exemple #6
0
 def get_dec_cell(self, cell_size):
     cell = core_rnn_cell.GRUCell(cell_size)
     # TODO
     if True:
         num_layers = 2
         '''
   if self.phase_train:
     cell = core_rnn_cell.DropoutWrapper(
         cell, input_keep_prob=0.5)
   '''
         cell = core_rnn_cell.MultiRNNCell([cell] * num_layers)
         '''
   if self.phase_train:
     cell = core_rnn_cell.DropoutWrapper(
         cell, output_keep_prob=0.5)
   '''
     else:
         if self.phase_train:
             cell = core_rnn_cell.DropoutWrapper(cell,
                                                 input_keep_prob=0.5,
                                                 output_keep_prob=0.5)
     return cell
Exemple #7
0
def inference(examples, batch_size, memory_dim, seq_len, feat_dim):
    """ Build the seq2seq model 
    Args: 
      Sequence Inputs: list of 2-D tensors
      batch_size
      memory_dim
      feat_dim 

    Returns:
      Sequence Results: list of 2-D tensors
    """
    ### Decoder input: prepend all  "GO" tokens and drop the final    ###
    ### token of the encoder input                                    ###
    ### input: GO GO GO GO GO ... GO                                  ###
    dec_inp = (tf.unstack(
        tf.zeros_like(examples[:], dtype=tf.float32, name="GO")))
    #dec_inp = ([tf.zeros_like(examples[0], dtype=tf.float32,
    #    name="GO")] + examples[:-1])

    ### these two calls defined main cell in seq2seq and seq2seq model ###
    cell = core_rnn_cell.GRUCell(memory_dim, activation=tf.nn.relu)

    dec_outputs, enc_memory, dec_memory = \
    seq2seq.stack_rnn_seq2seq_with_bottle_memory(examples, dec_inp, cell,
        STACK_NUM)
    ######################################################################

    dec_reshape = tf.transpose(tf.reshape(dec_outputs, (seq_len*batch_size,\
            memory_dim)))
    W_p = tf.get_variable("output_proj_w", [feat_dim, memory_dim])
    b_p = tf.get_variable("output_proj_b", shape=(feat_dim), \
            initializer=tf.constant_initializer(0.0))
    b_p = [b_p for i in range(seq_len * batch_size)]
    b_p = tf.transpose(b_p)
    dec_proj_outputs = tf.matmul(W_p, dec_reshape) + b_p

    return dec_proj_outputs, enc_memory
Exemple #8
0
	def __init__(self, pre_trained_seq2seq, pre_trained_backward, vocab_size, buckets, layer_size, num_layers, 
		max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, 
		use_lstm=False, num_samples=512, forward_only = False, dtype= tf.float32):

		"""Create a Model:
		Similar to the seq2seq_model_rl.py code but it has differences in:
		- loss function
		-
		INPUTS:
			vocab_size: size of vocabulary
			buckets: a list of pairs (I,O), where I specifies maximum input length that 
				will be processed in that bucket, and O specifies maximum output length. Traning 
				instances that have inputs longer than I or outputs longer than O will be pushed 
				to the next bucket and padded accordingly. We assume that the list is sorted.
				** We may not use bucketing for Dialogue.
			layer_size: the number of units in each layer
			num_layers: the number of the layers in the model
			max_gradient_norm : gradients will be clipped to maximally this norm?
			candidate_size : the number of candidates (actions)
			learning_rate : learning rate to start with.
			learning_rate_decay_factor : decay learning rate by this much when needed.
			use_lstm: True -> LSTM cells, False -> GRU cells
			num_samples: the number of samples for sampled softmax
			forward_only : if set, we do not construct the backward pass in the model
			dtype: the data type to use to store internal variables.
		"""
		self.vocab_size = vocab_size
		self.buckets = buckets
		self.buckets_back = [(x[1],x[1]) for x in buckets]
		self.batch_size = batch_size
		self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype = dtype)
		self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate*learning_rate_decay_factor)
		self.global_step = tf.Variable(0, trainable=False)
		self.pre_trained_seq2seq = pre_trained_seq2seq
		self.pre_trained_backward = pre_trained_backward
		#self.bucket_id = tf.placeholder(tf.int32, shape=(2,), name="bucket_id") # [bucket_id, 0]
		self.bucket_id = 0
		# Variables
		
		w_t = tf.get_variable("proj_w",[self.vocab_size, layer_size], dtype = dtype)
		w = tf.transpose(w_t)
		b = tf.get_variable("proj_b", [self.vocab_size], dtype=dtype)
		output_projection = (w,b)

		if use_lstm:
			single_cell = core_rnn_cell.BasicLSTMCell(layer_size)
		else:
			single_cell = core_rnn_cell.GRUCell(layer_size)

		if num_layers > 1:
			cell = core_rnn_cell.MultiRNNCell([single_cell]*num_layers)
		else:
			cell = single_cell

		def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
			return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell,
			 num_encoder_symbols = vocab_size, num_decoder_symbols=vocab_size, embedding_size = layer_size, 
			 output_projection = output_projection, feed_previous = do_decode, dtype = dtype)
		
		self.states, self.states_back, self.action_dums = [], [], [] # states_back : the 2nd half of the states (each)
		self.actions , self.actions_back = [], []
		self.weights, self.weights_back = [],[]

		for i in xrange(self.buckets[-1][0]):
			self.states.append(tf.placeholder(tf.int32, shape=[None], name ="state{0}".format(i)))

		for i in xrange(self.buckets[-1][1]):
			self.action_dums.append(tf.placeholder(tf.int32, shape=[None], name ="action_dum{0}".format(i)))
			self.actions.append(tf.placeholder(tf.int32, shape=[None], name ="action{0}".format(i)))
			self.weights.append(tf.placeholder(dtype, shape=[None], name ="weight_rl{0}".format(i)))

		for i in xrange(self.buckets_back[-1][0]):
			self.actions_back.append(tf.placeholder(tf.int32, shape=[None], name ="action_back{0}".format(i)))

		for i in xrange(self.buckets_back[-1][1]):
			self.states_back.append(tf.placeholder(tf.int32, shape=[None], name ="state_back{0}".format(i)))

		for i in xrange(self.buckets[-1][1]):
			self.weights_back.append(tf.placeholder(dtype, shape=[None], name="weight_rl_back{0}".format(i)))

		# 1. Get batch actions 
		#>>self.actions, self.actions_back, self.weights, self.joint_logits = self.generate_batch_action(self.states, self.action_dums, self.bucket_id, lambda x,y:seq2seq_f(x,y,True), output_projection= output_projection)
		self.actions_sam, self.logprob = self.generate_batch_action(self.states, self.action_dums, self.bucket_id, lambda x,y:seq2seq_f(x,y,True), output_projection= output_projection)
		# 2. Get the loss
		def mi_score(states, actions, weights, states_back, actions_back, weights_back):
			
			"""
			Args
			#	states, states_back, weights_back : placeholder
			#	actions, actions_back, weights : from generate_batch_action 
			"""
			#self.feeding_data(self.pre_trained_seq2seq, self.buckets, states, actions, weights)
			#self.feeding_data(self.pre_trained_backward, self.buckets_back, actions_back, states_back, weights_back)

			#output_logits = tf.slice(tf.constant(output_logits, dtype=tf.float32), self.bucket_id, [1,-1])
			
			# if self.bucket_id < (len(self.buckets)-1):
			# 	for i in xrange(self.buckets[-1][1]-self.buckets[self.bucket_id][1]):
			# 		actions.append(tf.placeholder(tf.int32, shape=[None], name="action{0}".format(i+self.buckets[self.bucket_id][1])))
			# 		weights.append(tf.placeholder(tf.int32, shape=[None], name="weight_rl{0}".format(i+self.buckets[self.bucket_id][1])))
			# with tf.variable_scope("forward", reuse=True) as scope:
			# 	scope.reuse_variables()
			# 	output_logits,_ = tf.contrib.legacy_seq2seq.model_with_buckets(states, actions, actions[0:],weights, self.buckets, lambda x,y: self.pre_trained_seq2seq.seq2seq_f(x,y,True), softmax_loss_function=self.pre_trained_seq2seq.softmax_loss_function)
			
			output_logits = self.pre_trained_seq2seq.outputs[self.bucket_id]
			#output_logprob = [-tf.log(tf.ones(shape = (self.batch_size, self.vocab_size), dtype=tf.float32) + tf.exp(-logit)) for logit in output_logits]
			log_prob = []
			logprob_s2s = tf.nn.log_softmax(output_logits,dim=0)

			for word_idx in xrange(self.buckets[self.bucket_id][1]):
				one_hot_mat = tf.one_hot(actions[word_idx],depth=self.vocab_size, on_value = 1.0, off_value=0.0, axis =1, dtype=tf.float32 )	
				tmp1 = tf.reshape(tf.slice(logprob_s2s, [word_idx,0,0],[1,-1,-1]), shape = (self.batch_size, self.vocab_size))
				log_prob_word = tf.subtract(tf.reduce_sum(tf.multiply(tmp1 , one_hot_mat),1), tf.log(tf.reduce_sum(tf.exp(tmp1),1)))
				log_prob.append(tf.multiply(log_prob_word, weights[word_idx]))
			
			output_logits_back = self.pre_trained_backward.outputs[self.bucket_id]
			#output_logprob_back = [-tf.log(tf.ones(shape = (self.batch_size, self.vocab_size), dtype=tf.float32) + tf.exp(-logit)) for logit in output_logits_back]
			log_prob_back = []
			logprob_back = tf.nn.log_softmax(output_logits_back,dim=0)
			w_back_new = [np.ones(self.batch_size, dtype = np.float32)] + weights_back[:-1]
			
			for word_idx in xrange(self.buckets_back[self.bucket_id][1]):
				one_hot_mat = tf.one_hot(states_back[word_idx],depth=self.vocab_size, on_value = 1.0, off_value=0.0, axis =1, dtype=tf.float32 )	
				tmp2 = tf.reshape(tf.slice(logprob_back, [word_idx,0,0],[1,-1,-1]), shape = (self.batch_size, self.vocab_size))
				log_prob_word = tf.subtract(tf.reduce_sum(tf.multiply(tmp2 , one_hot_mat),1), tf.log(tf.reduce_sum(tf.exp(tmp2),1)))
				log_prob_back.append(tf.multiply(log_prob_word, w_back_new[word_idx]))
			
			return tf.divide(tf.add_n(log_prob), tf.add_n(weights[:self.buckets[self.bucket_id][1]])) + tf.divide(tf.add_n(log_prob_back), tf.add_n(w_back_new[:self.buckets_back[self.bucket_id][1]])) #+ tf.constant(20.0, shape=(self.batch_size,), dtype = tf.float32)
		
		if not forward_only:
			self.neg_penalty = tf.placeholder(tf.float32, shape=[None], name="neg_penalty") #repeat_penalty(self.actions)
			self.reward =  mi_score(self.states, self.actions, self.weights, self.states_back, self.actions_back, self.weights_back) + tf.scalar_mul(tf.constant(0.05,shape=()), tf.add_n(self.weights[:self.buckets[self.bucket_id][1]]))
			joint_logprob = tf.reduce_sum(self.logprob,axis=0)
			# 3. Gradient Descent Optimization
			params = [x for x in tf.trainable_variables() if "mi" in str(x.name).split("/")]
			cost = tf.scalar_mul(tf.constant(-1.0,shape=()), tf.add(self.neg_penalty, self.reward)) #tf.add(self.neg_penalty, self.reward)
			opt = tf.train.GradientDescentOptimizer(self.learning_rate)
			gradients = tf.gradients(tf.matmul(tf.reshape(cost, shape=(self.batch_size,1)), tf.reshape(joint_logprob,shape=(self.batch_size,1)), transpose_a=True), params)
			clipped_gradients, global_norm = tf.clip_by_global_norm(gradients, max_gradient_norm) # Clips values of multiple tensors by the ratio of the sum of their norms.
			self.updates = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) #An Operation that applies the specified gradients. If global_step was not None, that operation also increments global_step.

		self.names = {str(x.name).split(":0")[0] : x for x in tf.global_variables() if 'mi' in str(x.name).split("/")}	
		self.saver = tf.train.Saver(self.names)
Exemple #9
0
 def get_cell():
     cell_size = 1024
     cell = core_rnn_cell.GRUCell(cell_size)
     cell = core_rnn_cell.OutputProjectionWrapper(cell, fea_size)
     return cell
Exemple #10
0
 def get_dec_cell(self, cell_size):
     cell = core_rnn_cell.GRUCell(cell_size)
     cell = core_rnn_cell.DropoutWrapper(cell, 0.5, 0.5)
     # num_layers = 1
     # cell = core_rnn_cell.MultiRNNCell([cell] * num_layers)
     return cell
Exemple #11
0
    def __init__(self,
                 vocab_size,
                 buckets,
                 layer_size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 learning_rate_decay_factor,
                 use_lstm=False,
                 num_samples=512,
                 MI_use=False,
                 forward_only=False,
                 dtype=tf.float32):
        """Create a Model:
		Similar to the seq2seq_model.py code in the tensorflow version 0.12.1
		INPUTS:
			vocab_size: size of vocabulary
			buckets: a list of pairs (I,O), where I specifies maximum input length that 
				will be processed in that bucket, and O specifies maximum output length. Traning 
				instances that have inputs longer than I or outputs longer than O will be pushed 
				to the next bucket and padded accordingly. We assume that the list is sorted.
				** We may not use bucketing for Dialogue.
			layer_size: the number of units in each layer
			num_layers: the number of the layers in the model
			max_gradient_norm : gradients will be clipped to maximally this norm?
			batch_size : the size of the batches used during training; the model construction
				is independent of batch_size, so it can be changed after initialization if this is convenient, e.g., for decoding.
			learning_rate : learning rate to start with.
			learning_rate_decay_factor : decay learning rate by this much when needed.
			use_lstm: True -> LSTM cells, False -> GRU cells
			num_samples: the number of samples for sampled softmax
			forward_only : if set, we do not construct the backward pass in the model
			dtype: the data type to use to store internal variables.
		
		"""
        self.vocab_size = vocab_size
        self.buckets = buckets
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=dtype)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        output_projection = None
        softmax_loss_function = None

        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < self.vocab_size:
            w_t = tf.get_variable("proj_w", [self.vocab_size, layer_size],
                                  dtype=dtype)
            w = tf.transpose(w_t)
            b = tf.get_variable("proj_b", [self.vocab_size], dtype=dtype)
            output_projection = (w, b)

            def sampled_loss(
                labels, inputs
            ):  # The order is opposite to the order in 0.12.x version!!! What the hell?

                labels = tf.reshape(labels, [-1, 1])  # -1 makes it 1-D.
                # We need to compute the sampled_softmax_loss using 32bit flotas to avoid numerical instabilities.
                local_w_t = tf.cast(w_t, tf.float32)
                local_b = tf.cast(b, tf.float32)
                local_inputs = tf.cast(inputs, tf.float32)
                # tf.nn -> <module 'tensorflow.python.ops.nn' from 'PATH/tensorflow/python/ops/nn.pyc'>
                return tf.cast(
                    tf.nn.sampled_softmax_loss(weights=local_w_t,
                                               biases=local_b,
                                               labels=labels,
                                               inputs=local_inputs,
                                               num_sampled=num_samples,
                                               num_classes=self.vocab_size),
                    dtype)

        softmax_loss_function = sampled_loss
        self.softmax_loss_function = softmax_loss_function

        # Create the internal multi-layer cell for our RNN.
        if use_lstm:
            single_cell = core_rnn_cell.BasicLSTMCell(layer_size)
        else:
            single_cell = core_rnn_cell.GRUCell(layer_size)

        if num_layers > 1:
            cell = core_rnn_cell.MultiRNNCell([single_cell] * num_layers)
        else:
            cell = single_cell

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                cell,
                num_encoder_symbols=vocab_size,
                num_decoder_symbols=vocab_size,
                embedding_size=layer_size,
                output_projection=output_projection,
                feed_previous=do_decode,
                dtype=dtype)

        self.seq2seq_f = seq2seq_f
        # Feeds for inputs.
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for i in xrange(buckets[-1][0]):
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(
                                   i)))  # "encoder{0}".format(N) -> 'encoderN'

        for i in xrange(buckets[-1][1] + 1):  # For EOS
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            self.target_weights.append(
                tf.placeholder(dtype, shape=[None],
                               name="weight{0}".format(i)))
        targets = [
            self.decoder_inputs[i + 1]
            for i in xrange(len(self.decoder_inputs) - 1)
        ]  # (i+1) because of GO symbol at the beginning

        # Training outputs and losses (a list(len(buckets) of 1-D batched size tensors)
        if forward_only:
            self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, True),
                softmax_loss_function=softmax_loss_function)
            if output_projection is not None:
                for b in xrange(len(buckets)):
                    self.outputs[b] = [
                        tf.matmul(output, output_projection[0]) +
                        output_projection[1] for output in self.outputs[b]
                    ]
        else:
            self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, False),
                softmax_loss_function=softmax_loss_function)

        params = tf.trainable_variables(
        )  # Returns all variables created with trainable=True
        if not forward_only:
            self.gradient_norms = []
            self.updates = []
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            for b in xrange(len(buckets)):
                gradients = tf.gradients(self.losses[b], params)
                clipped_gradients, global_norm = tf.clip_by_global_norm(
                    gradients, max_gradient_norm
                )  # Clips values of multiple tensors by the ratio of the sum of their norms.
                self.gradient_norms.append(global_norm)
                self.updates.append(
                    opt.apply_gradients(zip(clipped_gradients, params),
                                        global_step=self.global_step)
                )  #An Operation that applies the specified gradients. If global_step was not None, that operation also increments global_step.

        if MI_use:
            self.names = {
                str(x.name).split(":0")[0]: x
                for x in tf.global_variables()
                if 'forward' in str(x.name).split("/")
            }
            self.saver = tf.train.Saver(self.names)
        else:
            self.saver = tf.train.Saver(tf.global_variables())
Exemple #12
0
	def __init__(self, sess, pre_trained_seq2seq, pre_trained_backward, vocab_size, buckets, layer_size, num_layers, 
		max_gradient_norm, candidate_size, learning_rate, learning_rate_decay_factor, 
		use_lstm=False, num_samples=512, forward_only = False, dtype= tf.float32):

		"""Create a Model:
		Similar to the seq2seq_model_rl.py code but it has differences in:
		- loss function
		-
		INPUTS:
			vocab_size: size of vocabulary
			buckets: a list of pairs (I,O), where I specifies maximum input length that 
				will be processed in that bucket, and O specifies maximum output length. Traning 
				instances that have inputs longer than I or outputs longer than O will be pushed 
				to the next bucket and padded accordingly. We assume that the list is sorted.
				** We may not use bucketing for Dialogue.
			layer_size: the number of units in each layer
			num_layers: the number of the layers in the model
			max_gradient_norm : gradients will be clipped to maximally this norm?
			candidate_size : the number of candidates (actions)
			learning_rate : learning rate to start with.
			learning_rate_decay_factor : decay learning rate by this much when needed.
			use_lstm: True -> LSTM cells, False -> GRU cells
			num_samples: the number of samples for sampled softmax
			forward_only : if set, we do not construct the backward pass in the model
			dtype: the data type to use to store internal variables.
		"""
		self.sess = sess
		self.vocab_size = vocab_size
		self.buckets = buckets
		self.buckets_back = [(x[1],x[1]) for x in buckets]
		self.batch_size = """? necessary?"""
		self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype = dtype)
		self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate*learning_rate_decay_factor)
		self.global_step = tf.Variable(0, trainable=False)
		self.pre_trained_seq2seq = pre_trained_seq2seq
		self.pre_trained_backward = pre_trained_backward
		self.bucket_id = len(buckets)-1
		if num_samples > 0 and num_samples < self.vocab_size:
			w_t = tf.get_variable("proj_w_mi",[self.vocab_size, layer_size], dtype = dtype)
			w = tf.transpose(w_t)
			b = tf.get_variable("proj_b_mi", [self.vocab_size], dtype=dtype)
			output_projection = (w,b)

			"""
			def mi_score(states, actions, weights, states_back, actions_back, weights_back, bucket_id):
				#Args:
				#	states:[first utterance, second utterance]
				#	actions: action utterance

				pdb.set_trace()
				#bucket_id = min([b for b in xrange(len(self.buckets)) if self.buckets[b][0] > len(states)])
				states_input = self.sess.run()
				_, _, output_logits = self.pre_trained_seq2seq.step(self.sess, states, actions, weights, bucket_id, True)
				# output_logits: 
				log_prob = []
				for word_idx in xrange(len(actions)):
					tmp = [output_logits[word_idx][batch_idx][actions[word_idx][batch_idx]] - np.log(sum(np.exp(output_logits[word_idx][batch_idx]))) for batch_idx in xrange(batch_size)]
					log_prob.append(np.inner(tmp, weights[word_idx]))

				#bucket_id_back = min([b for b in xrange(len(self.buckets_back)) if self.buckets_back[b][0] > len(states_back)])
				_, _, output_logits_back = self.pre_trained_backward.step(self.sess, actions_back, states_back, weights_back, bucket_id, True)

				log_prob_back = []
				for word_idx in xrange(len(states_back)):
					tmp = [output_logits_back[word_idx][batch_idx][states_back[word_idx][batch_idx]] - np.log(sum(np.exp(output_logits_back[word_idx][batch_idx]))) for batch_idx in xrange(batch_size)]
					log_prob_back.append(np.inner(tmp, weights_back[word_idx]))

				# -log_prob/float(len(action)) - log_prob_back/float(len(state[1]))
				return -sum(log_prob)/float(len(actions)) - log_prob_back/float(len(states_back))

			loss_function = mi_score	
			"""

		if use_lstm:
			single_cell = core_rnn_cell.BasicLSTMCell(layer_size)
		else:
			single_cell = core_rnn_cell.GRUCell(layer_size)

		if num_layers > 1:
			cell = core_rnn_cell.MultiRNNCell([single_cell]*num_layers)
		else:
			cell = single_cell

		def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
			return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell,
			 num_encoder_symbols = vocab_size, num_decoder_symbols=vocab_size, embedding_size = layer_size, 
			 output_projection = output_projection, feed_previous = do_decode, dtype = dtype)
		self.seq2seq_f = seq2seq_f

		self.states, self.states_back = [], []
		self.actions , self.actions_back = [], []
		self.weights, self.weights_back = [], []

		for i in xrange(self.buckets[-1][0]):
			self.states.append(tf.placeholder(tf.int32, shape=[None], name ="state{0}".format(i)))

		for i in xrange(self.buckets_back[-1][1]):
			self.states_back.append(tf.placeholder(tf.int32, shape=[None], name ="state_back{0}".format(i)))

		for i in xrange(self.buckets[-1][1]):
			self.actions.append(tf.placeholder(tf.int32, shape=[None], name ="action{0}".format(i)))
			self.actions_back.append(tf.placeholder(tf.int32, shape=[None], name ="action_back{0}".format(i)))
			self.weights.append(tf.placeholder(dtype, shape=[None], name="weight_rl{0}".format(i)))
			self.weights_back.append(tf.placeholder(dtype, shape=[None], name="weight_rl_back{0}".format(i)))

		#self.losses = loss_function(self.states, self.actions, self.weights, self.states_back, self.actions_back, self.weights_back, self.bucket_id)
		self.losses = []
		for i in xrange(len(buckets)):
			self.losses.append(tf.placeholder(tf.float32, shape = [None], name = "losses{0}".format(i)))

		params = tf.trainable_variables()
		pdb.set_trace()
		if not forward_only:
			self.gradient_norms = []
			self.updates = []
			opt = tf.train.GradientDescentOptimizer(self.learning_rate)
			for b in xrange(len(buckets)):
				gradients = tf.gradients(self.losses[b],params)
				clipped_gradients, global_norm = tf.clip_by_global_norm(gradients, max_gradient_norm) # Clips values of multiple tensors by the ratio of the sum of their norms.
				self.gradient_norms.append(global_norm)
				self.updates.append(opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step)) #An Operation that applies the specified gradients. If global_step was not None, that operation also increments global_step.
				#self.updates.append(opt.minimize(self.losses[b],params))

		self.saver = tf.train.Saver(tf.global_variables())