def _rl_seq2seq_model(self, cell): """ 构建seq2seq模型,返回模型的输出 如果是在测试阶段,需要使用候选采样,通过投影层投影输出以解码 :return: outputs, losses, encoder_state """ output_projection, softmax_loss_function = self._get_sample_loss_fn() # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return rl_seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=self.source_vocab_size, num_decoder_symbols=self.target_vocab_size, embedding_size=self.emb_dim, output_projection=output_projection, feed_previous=do_decode, # 是否把上一轮的预测作为这一轮的输入 || 是否在测试 mc_search=self.mc_search, # TODO(Zhu) 文件位置:seq2seq._argmax_or_mcsearch 什么意思? dtype=self.dtype) outputs, losses, encoder_state = rl_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, self.targets, self.target_weights, self.buckets, self.source_vocab_size, self.batch_size, lambda x, y: seq2seq_f(x, y, tf.where(self.forward_only, True, False)), output_projection=output_projection, softmax_loss_function=softmax_loss_function) # 如果使用了 output_protection,需要投影输出以解码 # 如果forward_only为true的话,outputs 一开始的形状是[bucket_num, num_steps, batch_size, emb_dim或rnn_size] # 如果forward_only为false的话,或者经过投影层转换后 # outputs 的形状是[bucket_num, num_steps或decoder_size, batch_size, target_vocab_size],也就是所有时间步的预测概率 for b in xrange(len(self.buckets)): outputs[b] = [ tf.cond( self.forward_only, lambda: tf.matmul(output, output_projection[0]) + output_projection[1], lambda: output ) for output in outputs[b] ] return outputs, losses, encoder_state
def __init__(self, config, name_scope, forward_only=False, num_samples=512, dtype=tf.float32): # self.scope_name = scope_name # with tf.variable_scope(self.scope_name): source_vocab_size = config.vocab_size target_vocab_size = config.vocab_size emb_dim = config.emb_dim self.buckets = config.buckets self.learning_rate = tf.Variable(float(config.learning_rate), trainable=False, dtype=dtype) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * config.learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.batch_size = config.batch_size self.num_layers = config.num_layers self.max_gradient_norm = config.max_gradient_norm self.mc_search = tf.placeholder(tf.bool, name="mc_search") self.forward_only = tf.placeholder(tf.bool, name="forward_only") self.up_reward = tf.placeholder(tf.bool, name="up_reward") self.reward_bias = tf.get_variable("reward_bias", [1], dtype=tf.float32) # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. if num_samples > 0 and num_samples < target_vocab_size: w_t = tf.get_variable("proj_w", [target_vocab_size, emb_dim], dtype=dtype) w = tf.transpose(w_t) b = tf.get_variable("proj_b", [target_vocab_size], dtype=dtype) output_projection = (w, b) def sampled_loss(inputs, labels): labels = tf.reshape(labels, [-1, 1]) # We need to compute the sampled_softmax_loss using 32bit floats to # avoid numerical instabilities. local_w_t = tf.cast(w_t, tf.float32) local_b = tf.cast(b, tf.float32) local_inputs = tf.cast(inputs, tf.float32) return tf.cast( tf.nn.sampled_softmax_loss(local_w_t, local_b, local_inputs, labels, num_samples, target_vocab_size), dtype) softmax_loss_function = sampled_loss # Create the internal multi-layer cell for our RNN. single_cell = tf.nn.rnn_cell.GRUCell(emb_dim) cell = single_cell if self.num_layers > 1: cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * self.num_layers) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return rl_seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=source_vocab_size, num_decoder_symbols=target_vocab_size, embedding_size=emb_dim, output_projection=output_projection, feed_previous=do_decode, mc_search=self.mc_search, dtype=dtype) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in xrange( self.buckets[-1][0]): # Last bucket is the biggest one. self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(self.buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append( tf.placeholder(dtype, shape=[None], name="weight{0}".format(i))) self.reward = [ tf.placeholder(tf.float32, name="reward_%i" % i) for i in range(len(self.buckets)) ] # Our targets are decoder inputs shifted by one. targets = [ self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1) ] self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, self.buckets, source_vocab_size, self.batch_size, lambda x, y: seq2seq_f(x, y, tf.select(self.forward_only, True, False)), output_projection=output_projection, softmax_loss_function=softmax_loss_function) for b in xrange(len(self.buckets)): self.outputs[b] = [ tf.cond( self.forward_only, lambda: tf.matmul( output, output_projection[0]) + output_projection[1], lambda: output) for output in self.outputs[b] ] if not forward_only: with tf.name_scope("gradient_descent"): self.gradient_norms = [] self.updates = [] self.aj_losses = [] self.gen_params = [ p for p in tf.trainable_variables() if name_scope in p.name ] #opt = tf.train.GradientDescentOptimizer(self.learning_rate) opt = tf.train.AdamOptimizer() for b in xrange(len(self.buckets)): #R = tf.sub(self.reward[b], self.reward_bias) self.reward[b] = self.reward[b] - reward_bias #tf.cond 条件判断语句 adjusted_loss = tf.cond( self.up_reward, lambda: tf.mul(self.losses[b], self.reward[b]), lambda: self.losses[b]) # adjusted_loss = tf.cond(self.up_reward, # lambda: tf.mul(self.losses[b], R), # lambda: self.losses[b]) self.aj_losses.append(adjusted_loss) gradients = tf.gradients(adjusted_loss, self.gen_params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, self.max_gradient_norm) self.gradient_norms.append(norm) self.updates.append( opt.apply_gradients(zip(clipped_gradients, self.gen_params), global_step=self.global_step)) self.gen_variables = [ k for k in tf.global_variables() if name_scope in k.name ] self.saver = tf.train.Saver(self.gen_variables)
def __init__(self, source_vocab_size, target_vocab_size, buckets, size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, use_lstm=False, num_samples=512, forward_only=False, scope_name='gen_seq2seq', dtype=tf.float32): self.scope_name = scope_name with tf.variable_scope(self.scope_name): self.source_vocab_size = source_vocab_size self.target_vocab_size = target_vocab_size self.buckets = buckets self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=dtype) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.batch_size = batch_size self.up_reward = tf.placeholder(tf.bool, name="up_reward") self.en_output_proj = tf.placeholder(tf.bool, name="en_output_proj") # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. def policy_gradient(logit, labels): def softmax(x): return tf.exp(x) / tf.reduce_sum(tf.exp(x), reduction_indices=0) prob = softmax(logit) #token = tf.argmax(logit, 0) return tf.reduce_max(prob) pass #softmax_loss_function = policy_gradient pass if num_samples > 0 and num_samples < self.target_vocab_size: w_t = tf.get_variable("proj_w", [self.target_vocab_size, size], dtype=dtype) w = tf.transpose(w_t) b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype) output_projection = (w, b) def sampled_loss(inputs, labels): labels = tf.reshape(labels, [-1, 1]) # We need to compute the sampled_softmax_loss using 32bit floats to # avoid numerical instabilities. local_w_t = tf.cast(w_t, tf.float32) local_b = tf.cast(b, tf.float32) local_inputs = tf.cast(inputs, tf.float32) return tf.cast( tf.nn.sampled_softmax_loss(local_w_t, local_b, local_inputs, labels, num_samples, self.target_vocab_size), dtype) softmax_loss_function = sampled_loss # softmax_loss_function = control_flow_ops.cond(self.up_reward, # lambda:policy_gradient, # lambda:sampled_loss) softmax_loss_function = policy_gradient #loss_function = tf.select(self.up_reward, policy_gradient, softmax_loss_function) #softmax_loss_function = loss_function # Create the internal multi-layer cell for our RNN. single_cell = tf.nn.rnn_cell.GRUCell(size) if use_lstm: single_cell = tf.nn.rnn_cell.BasicLSTMCell(size) cell = single_cell if num_layers > 1: cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return rl_seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=source_vocab_size, num_decoder_symbols=target_vocab_size, embedding_size=size, output_projection=output_projection, feed_previous=do_decode, dtype=dtype) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in xrange(buckets[-1][0]): # Last bucket is the biggest one. self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append( tf.placeholder(dtype, shape=[None], name="weight{0}".format(i))) # Our targets are decoder inputs shifted by one. targets = [ self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1) ] # Training outputs and losses. if forward_only: self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True), softmax_loss_function=softmax_loss_function) # If we use output projection, we need to project outputs for decoding. if output_projection is not None: for b in xrange(len(buckets)): self.outputs[b] = [ tf.matmul(output, output_projection[0]) + output_projection[1] for output in self.outputs[b] ] else: self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, False), softmax_loss_function=softmax_loss_function) # for j in xrange(len(buckets)): # output_seq = [int(np.argmax(logit, axis=1)) for logit in self.outputs[j]] # # for reinforcement learning # self.force_dec_input = tf.placeholder(tf.bool, name="force_dec_input") # self.en_output_proj = tf.placeholder(tf.bool, name="en_output_proj") # # Training outputs and losses. # #if forward_only: # self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets( # self.encoder_inputs, self.decoder_inputs, targets, # self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, tf.select(self.force_dec_input, False, True)), # softmax_loss_function=softmax_loss_function) # # If we use output projection, we need to project outputs for decoding. # #if output_projection is not None: # for b in xrange(len(buckets)): # self.outputs[b] = [ # control_flow_ops.cond( # self.en_output_proj, # lambda: tf.matmul(output, output_projection[0]) + output_projection[1], # lambda: output # ) # for output in self.outputs[b] # ] # Gradients and SGD update operation for training the model. self.tvars = tf.trainable_variables() #if not forward_only: self.gradient_norms = [] self.updates = [] self.reward = [ tf.placeholder(tf.float32, name="reward_%i" % i) for i in range(len(buckets)) ] opt = tf.train.GradientDescentOptimizer(self.learning_rate) for b in xrange(len(buckets)): adjusted_losses = tf.mul(self.losses[b], self.reward[b]) gradients = tf.gradients(adjusted_losses, self.tvars) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.gradient_norms.append(norm) self.updates.append( opt.apply_gradients(zip(clipped_gradients, self.tvars), global_step=self.global_step)) # self.saver = tf.train.Saver(tf.all_variables()) all_variables = [ k for k in tf.global_variables() if k.name.startswith(self.scope_name) ] self.saver = tf.train.Saver(all_variables)
def __init__(self, config, name_scope, forward_only=False, num_samples=256, dtype=tf.float32): """ 定義了所有seq2seq有關的步驟 1.embedding = 512 , learning rate = 0.5 , 2.Using GRU 3.Using attention 4.forword_only = train or predict 5.Gradient decent using Adam and clipped gradient 6.up_reward """ # self.scope_name = scope_name # with tf.variable_scope(self.scope_name): source_vocab_size = config.vocab_size # 35000 target_vocab_size = config.vocab_size # 35000 emb_dim = config.emb_dim # 512 self.buckets = config.buckets # [(5, 10), (10, 15), (20, 25), (40, 50)] self.learning_rate = tf.Variable(float(config.learning_rate), trainable=False, dtype=dtype) # learning_rate = 0.5 self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * config.learning_rate_decay_factor) # learning_rate_decay_factor self.global_step = tf.Variable(0, trainable=False) self.batch_size = config.batch_size # 128 self.num_layers = config.num_layers # 2 self.max_gradient_norm = config.max_gradient_norm # 5.0 self.mc_search = tf.placeholder( tf.bool, name="mc_search") # boolean for mc_search self.forward_only = tf.placeholder( tf.bool, name="forward_only") #boolean for forward_only self.up_reward = tf.placeholder( tf.bool, name="up_reward") #boolean for up_reward self.reward_bias = tf.get_variable("reward_bias", [1], dtype=tf.float32) # shape=(1,) # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. if num_samples > 0 and num_samples < target_vocab_size: # 256 > 0 & 256 < 35000 w_t = tf.get_variable("proj_w", [target_vocab_size, emb_dim], dtype=dtype) # [35000,512] w = tf.transpose(w_t) # [512 ,35000] b = tf.get_variable("proj_b", [target_vocab_size], dtype=dtype) # [35000] output_projection = (w, b) #( [512 ,35000] , [35000]) def sampled_loss(inputs, labels): labels = tf.reshape(labels, [-1, 1]) # We need to compute the sampled_softmax_loss using 32bit floats to # avoid numerical instabilities. local_w_t = tf.cast(w_t, tf.float32) # w_t 轉成 float local_b = tf.cast(b, tf.float32) # b 轉成 float local_inputs = tf.cast(inputs, tf.float32) # inputs 轉成 float return tf.cast( tf.nn.sampled_softmax_loss(local_w_t, local_b, labels, local_inputs, num_samples, target_vocab_size), dtype) # This is a faster way to train a softmax classifier over a huge number of classes. softmax_loss_function = sampled_loss # Create the internal multi-layer cell for our RNN. single_cell = tf.contrib.rnn.GRUCell(emb_dim) #512 cell = single_cell if self.num_layers > 1: # 2 cell = tf.contrib.rnn.MultiRNNCell( [single_cell] * self.num_layers) # GRU * 2 (512) # The seq2seq function: we use embedding for the input and attention. # 使用 attention def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return rl_seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=source_vocab_size, num_decoder_symbols=target_vocab_size, embedding_size=emb_dim, output_projection=output_projection, feed_previous=do_decode, mc_search=self.mc_search, dtype=dtype) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in xrange(self.buckets[-1] [0]): # Last bucket is the biggest one. # 最後一個 bucket self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) # ex : # [<tf.Tensor 'encoder0:0' shape=(?,) dtype=int32>, # <tf.Tensor 'encoder1:0' shape=(?,) dtype=int32>, # ..... # <tf.Tensor 'encoder39:0' shape=(?,) dtype=int32>] # encoder_inputs 塞最長的問句 for i in xrange(self.buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) # encoder_inputs 塞最長的答句 self.target_weights.append( tf.placeholder(dtype, shape=[None], name="weight{0}".format(i))) # target_weights 塞最長的答句的 weight self.reward = [ tf.placeholder(tf.float32, name="reward_%i" % i) for i in range(len(self.buckets)) ] # ex: # <tf.Tensor 'reward_0:0' shape=<unknown> dtype=float32>, # <tf.Tensor 'reward_1:0' shape=<unknown> dtype=float32>, # <tf.Tensor 'reward_2:0' shape=<unknown> dtype=float32>, # <tf.Tensor 'reward_3:0' shape=<unknown> dtype=float32> # Our targets are decoder inputs shifted by one. targets = [ self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1) ] # model塞入bucket得到ouput,losses,encoder state self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, self.buckets, source_vocab_size, self.batch_size, lambda x, y: seq2seq_f(x, y, tf.where(self.forward_only, True, False)), output_projection=output_projection, softmax_loss_function=softmax_loss_function) for b in xrange(len(self.buckets)): self.outputs[b] = [ tf.cond( # condition # if forward_only=True , store prediction # if forward_only=false, store output self.forward_only, lambda: tf.matmul(output, output_projection[0]) + output_projection[1], lambda: output) for output in self.outputs[b] ] # Gradient descent using Adam if not forward_only: with tf.name_scope("gradient_descent"): self.gradient_norms = [] self.updates = [] self.aj_losses = [] self.gen_params = [ p for p in tf.trainable_variables() if name_scope in p.name ] #opt = tf.train.GradientDescentOptimizer(self.learning_rate) opt = tf.train.AdamOptimizer() for b in xrange(len(self.buckets)): #R = tf.subtract(self.reward[b], self.reward_bias) # self.reward[b] = self.reward[b] - reward_bias adjusted_loss = tf.cond( self.up_reward, # if up_reward = true , multiply losses and reward # if up_reward = true , losses lambda: tf.multiply(self.losses[b], self.reward[b]), lambda: self.losses[b]) # adjusted_loss = tf.cond(self.up_reward, # lambda: tf.multiply(self.losses[b], R), # lambda: self.losses[b]) self.aj_losses.append(adjusted_loss) gradients = tf.gradients(adjusted_loss, self.gen_params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, self.max_gradient_norm) # max_gradient_norm = 5 """ clipped gradients is: 1. 先求所有權重梯度的 root sum square (sumsq_diff) 2. if sumsq_diff > clip_gradient,則求縮放因子 scale_factor = clip_gradient / sumsq_diff 3. scale_factor在(0,1)之間 4. 如果sumsq_diff越大,那缩放因子将越小。 5. 将所有的权重梯度乘以这个缩放因子 6. 保证了在一次迭代更新中,所有权重的梯度的平方和(sumsq_diff)在一个设定范围以内,这个范围就是clip_gradient. """ self.gradient_norms.append(norm) self.updates.append( opt.apply_gradients(zip(clipped_gradients, self.gen_params), global_step=self.global_step)) self.gen_variables = [ k for k in tf.global_variables() if name_scope in k.name ] self.saver = tf.train.Saver(self.gen_variables)
def __init__(self, config, name_scope, forward_only=False, num_samples=256, dtype=tf.float32): # self.scope_name = scope_name # with tf.variable_scope(self.scope_name): with tf.device("/gpu:0"): source_vocab_size = config.vocab_size target_vocab_size = config.vocab_size emb_dim = config.emb_dim word_embedding_size = config.word_embedding dropout = config.keep_prob self.config = config self.buckets = config.buckets self.learning_rate = tf.Variable(float(config.learning_rate), trainable=False, dtype=dtype) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * config.learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.batch_size = config.batch_size self.num_layers = config.num_layers self.max_gradient_norm = config.max_gradient_norm self.mc_search = tf.placeholder(tf.bool, name="mc_search") self.mc_position = tf.placeholder(tf.int32, name="mc_position") self.forward_only = tf.placeholder(tf.bool, name="forward_only") self.teacher_forcing = tf.placeholder(tf.bool, name="teacher_forcing") self.up_reward = tf.placeholder(tf.bool, name="up_reward") self.reward_bias = tf.get_variable("reward_bias", [1], dtype=tf.float32) self.ent_weight = float(config.ent_weight) # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. # if num_samples > 0 and num_samples < target_vocab_size: if num_samples > 0 and num_samples < target_vocab_size: w_t = tf.get_variable("proj_w", [target_vocab_size, emb_dim], dtype=dtype) w = tf.transpose(w_t) b = tf.get_variable("proj_b", [target_vocab_size], dtype=dtype) output_projection = (w, b) def sampled_loss(inputs, labels): labels = tf.reshape(labels, [-1, 1]) # We need to compute the sampled_softmax_loss using 32bit floats to # avoid numerical instabilities. local_w_t = tf.cast(w_t, tf.float32) local_b = tf.cast(b, tf.float32) local_inputs = tf.cast(inputs, tf.float32) return tf.cast( tf.nn.sampled_softmax_loss(local_w_t, local_b, labels, local_inputs, num_samples, target_vocab_size), dtype) # softmax_loss_function = sampled_loss softmax_loss_function = None # Creation of the rnn cell def create_rnn_cell(): encoDecoCell = tf.contrib.rnn.GRUCell( # Or GRUCell, LSTMCell(args.hiddenSize) emb_dim, ) encoDecoCell = tf.contrib.rnn.DropoutWrapper( encoDecoCell, input_keep_prob=1.0, output_keep_prob=dropout) return encoDecoCell single_cell = tf.contrib.rnn.MultiRNNCell( [create_rnn_cell() for _ in range(self.num_layers)], ) # Create the internal multi-layer cell for our RNN. # single_cell = tf.contrib.rnn.GRUCell(emb_dim) # single_cell = tf.contrib.rnn.BasicLSTMCell(emb_dim) cell = single_cell # if self.num_layers > 1: # cell = tf.contrib.rnn.MultiRNNCell([single_cell] * self.num_layers) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return rl_seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=source_vocab_size, num_decoder_symbols=target_vocab_size, embedding_size=word_embedding_size, output_projection=output_projection, feed_previous=do_decode, mc_search=self.mc_search, dtype=dtype, mc_position=self.mc_position) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] self.targets_input = [] self.mc_sents = [] for i in xrange( self.buckets[-1][0]): # Last bucket is the biggest one. self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(self.buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append( tf.placeholder(dtype, shape=[None], name="weight{0}".format(i))) self.targets_input.append( tf.placeholder(tf.int32, shape=[None], name="target{0}".format(i))) # self.reward = [tf.placeholder(tf.float32, name="reward_%i" % i) for i in range(len(self.buckets))] self.reward = [ tf.placeholder(tf.float32, shape=[None, None], name="reward_%i" % i) for i in range(len(self.buckets)) ] # Our targets are decoder inputs shifted by one. # targets = [self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1)] self.outputs, self.losses, self.encoder_state, self.ent, self.mc_sents = rl_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, self.targets_input, self.target_weights, self.reward, self.buckets, source_vocab_size, self.batch_size, lambda x, y: seq2seq_f( x, y, tf.where(self.forward_only, True, False)), output_projection=output_projection, softmax_loss_function=softmax_loss_function) # for b in xrange(len(self.buckets)): self.outputs[b] = [ tf.cond( self.forward_only, lambda: tf.matmul(output, output_projection[ 0]) + output_projection[1], lambda: output) for output in self.outputs[b] ] # # forward_only==False ----> adversarial learning if not forward_only: with tf.name_scope("gradient_descent"): self.gradient_norms = [] self.updates = [] self.aj_losses = [] self.gen_params = [ p for p in tf.trainable_variables() if name_scope in p.name ] # opt = tf.train.GradientDescentOptimizer(self.learning_rate) # ''' opt = tf.train.AdamOptimizer( learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08) # ''' for b in xrange(len(self.buckets)): # R = tf.subtract(self.reward[b], self.reward_bias) # self.reward[b] = self.reward[b] - reward_bias # adjusted_loss = tf.cond(self.up_reward, # lambda:tf.multiply(self.losses[b], self.reward[b]), # lambda: self.losses[b]) adjusted_loss = self.losses[b] adjusted_loss = tf.cond( self.teacher_forcing, lambda: adjusted_loss, lambda: adjusted_loss + self.ent_weight * self.ent[ b]) # adjusted_loss -= self.ent_weight * self.ent[b] # if up_reward==true, lambda:tf.multiply(self.losses[b], self.reward[b]) will be executed # adjusted_loss = tf.cond(self.up_reward, # lambda: tf.multiply(self.losses[b], R), # lambda: self.losses[b]) self.aj_losses.append(adjusted_loss) gradients, variables = zip( *opt.compute_gradients(adjusted_loss)) capped_gradients, _ = tf.clip_by_global_norm( gradients, 5.0) optimizer = opt.apply_gradients( zip(capped_gradients, variables), global_step=self.global_step) self.updates.append(optimizer) # self.updates.append(opt.minimize(adjusted_loss, global_step=self.global_step)) self.gen_variables = [ k for k in tf.global_variables() if name_scope in k.name ] self.saver = tf.train.Saver(self.gen_variables)
def __init__(self, config, use_lstm=False, num_samples=512, forward=False, scope_name='gen_seq2seq', dtype=tf.float32): self.scope_name = scope_name with tf.variable_scope(self.scope_name): self.source_vocab_size = config.vocab_size self.target_vocab_size = config.vocab_size self.buckets = config.buckets self.learning_rate = tf.Variable(float(config.learning_rate), trainable=False, dtype=dtype) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * config.learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.batch_size = config.batch_size self.emb_dim = config.emb_dim self.num_layers = config.num_layers self.max_gradient_norm = config.max_gradient_norm #self.up_reward = tf.placeholder(tf.bool, name="up_reward") self.mc_search = tf.placeholder(tf.bool, name="mc_search") self.forward_only = tf.placeholder(tf.bool, name="forward_only") # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Create the internal multi-layer cell for our RNN. single_cell = tf.nn.rnn_cell.GRUCell(self.emb_dim) if use_lstm: single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.emb_dim) cell = single_cell if self.num_layers > 1: cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * self.num_layers) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return rl_seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=self.source_vocab_size, num_decoder_symbols=self.target_vocab_size, embedding_size=self.emb_dim, output_projection=output_projection, feed_previous=do_decode, mc_search=self.mc_search, dtype=dtype) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in xrange( self.buckets[-1][0]): # Last bucket is the biggest one. self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(self.buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append( tf.placeholder(dtype, shape=[None], name="weight{0}".format(i))) self.reward = [ tf.placeholder(tf.float32, name="reward_%i" % i) for i in range(len(self.buckets)) ] # Our targets are decoder inputs shifted by one. targets = [ self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1) ] self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, self.buckets, self.emb_dim, self.batch_size, lambda x, y: seq2seq_f( x, y, tf.select(self.forward_only, True, False)), output_projection=output_projection, softmax_loss_function=softmax_loss_function) with tf.name_scope("gradient_descent"): self.gradient_norms = [] self.updates = [] self.gen_params = [ p for p in tf.trainable_variables() if self.scope_name in p.name ] opt = tf.train.GradientDescentOptimizer(self.learning_rate) for b in xrange(len(self.buckets)): adjusted_losses = tf.mul(self.losses[b], self.reward[b]) gradients = tf.gradients(adjusted_losses, self.gen_params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, self.max_gradient_norm) self.gradient_norms.append(norm) self.updates.append( opt.apply_gradients(zip(clipped_gradients, self.gen_params), global_step=self.global_step)) self.gen_variables = [ k for k in tf.global_variables() if self.scope_name in k.name ] self.saver = tf.train.Saver(self.gen_variables)