Esempio n. 1
0
    def _rl_seq2seq_model(self, cell):
        """
        构建seq2seq模型,返回模型的输出
        如果是在测试阶段,需要使用候选采样,通过投影层投影输出以解码
        :return: outputs, losses, encoder_state
        """
        output_projection, softmax_loss_function = self._get_sample_loss_fn()

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            return rl_seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                cell,
                num_encoder_symbols=self.source_vocab_size,
                num_decoder_symbols=self.target_vocab_size,
                embedding_size=self.emb_dim,
                output_projection=output_projection,
                feed_previous=do_decode,  # 是否把上一轮的预测作为这一轮的输入 || 是否在测试
                mc_search=self.mc_search,  # TODO(Zhu) 文件位置:seq2seq._argmax_or_mcsearch 什么意思?
                dtype=self.dtype)


        outputs, losses, encoder_state = rl_seq2seq.model_with_buckets(
            self.encoder_inputs, self.decoder_inputs, self.targets, self.target_weights,
            self.buckets, self.source_vocab_size, self.batch_size,
            lambda x, y: seq2seq_f(x, y, tf.where(self.forward_only, True, False)),
            output_projection=output_projection, softmax_loss_function=softmax_loss_function)

        # 如果使用了 output_protection,需要投影输出以解码
        # 如果forward_only为true的话,outputs 一开始的形状是[bucket_num, num_steps, batch_size, emb_dim或rnn_size]
        # 如果forward_only为false的话,或者经过投影层转换后
        # outputs 的形状是[bucket_num, num_steps或decoder_size, batch_size, target_vocab_size],也就是所有时间步的预测概率
        for b in xrange(len(self.buckets)):
            outputs[b] = [
                tf.cond(
                    self.forward_only,
                    lambda: tf.matmul(output, output_projection[0]) + output_projection[1],
                    lambda: output
                )
                for output in outputs[b]
            ]

        return outputs, losses, encoder_state
Esempio n. 2
0
    def __init__(self,
                 config,
                 name_scope,
                 forward_only=False,
                 num_samples=512,
                 dtype=tf.float32):

        # self.scope_name = scope_name
        # with tf.variable_scope(self.scope_name):
        source_vocab_size = config.vocab_size
        target_vocab_size = config.vocab_size
        emb_dim = config.emb_dim

        self.buckets = config.buckets
        self.learning_rate = tf.Variable(float(config.learning_rate),
                                         trainable=False,
                                         dtype=dtype)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * config.learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)
        self.batch_size = config.batch_size
        self.num_layers = config.num_layers
        self.max_gradient_norm = config.max_gradient_norm
        self.mc_search = tf.placeholder(tf.bool, name="mc_search")
        self.forward_only = tf.placeholder(tf.bool, name="forward_only")
        self.up_reward = tf.placeholder(tf.bool, name="up_reward")
        self.reward_bias = tf.get_variable("reward_bias", [1],
                                           dtype=tf.float32)
        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None
        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < target_vocab_size:
            w_t = tf.get_variable("proj_w", [target_vocab_size, emb_dim],
                                  dtype=dtype)
            w = tf.transpose(w_t)
            b = tf.get_variable("proj_b", [target_vocab_size], dtype=dtype)
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                labels = tf.reshape(labels, [-1, 1])
                # We need to compute the sampled_softmax_loss using 32bit floats to
                # avoid numerical instabilities.
                local_w_t = tf.cast(w_t, tf.float32)
                local_b = tf.cast(b, tf.float32)
                local_inputs = tf.cast(inputs, tf.float32)
                return tf.cast(
                    tf.nn.sampled_softmax_loss(local_w_t, local_b,
                                               local_inputs, labels,
                                               num_samples, target_vocab_size),
                    dtype)

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        single_cell = tf.nn.rnn_cell.GRUCell(emb_dim)
        cell = single_cell
        if self.num_layers > 1:
            cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * self.num_layers)

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            return rl_seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                cell,
                num_encoder_symbols=source_vocab_size,
                num_decoder_symbols=target_vocab_size,
                embedding_size=emb_dim,
                output_projection=output_projection,
                feed_previous=do_decode,
                mc_search=self.mc_search,
                dtype=dtype)

        # Feeds for inputs.
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for i in xrange(
                self.buckets[-1][0]):  # Last bucket is the biggest one.
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(i)))
        for i in xrange(self.buckets[-1][1] + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            self.target_weights.append(
                tf.placeholder(dtype, shape=[None],
                               name="weight{0}".format(i)))
        self.reward = [
            tf.placeholder(tf.float32, name="reward_%i" % i)
            for i in range(len(self.buckets))
        ]

        # Our targets are decoder inputs shifted by one.
        targets = [
            self.decoder_inputs[i + 1]
            for i in xrange(len(self.decoder_inputs) - 1)
        ]

        self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets(
            self.encoder_inputs,
            self.decoder_inputs,
            targets,
            self.target_weights,
            self.buckets,
            source_vocab_size,
            self.batch_size,
            lambda x, y: seq2seq_f(x, y,
                                   tf.select(self.forward_only, True, False)),
            output_projection=output_projection,
            softmax_loss_function=softmax_loss_function)

        for b in xrange(len(self.buckets)):
            self.outputs[b] = [
                tf.cond(
                    self.forward_only, lambda: tf.matmul(
                        output, output_projection[0]) + output_projection[1],
                    lambda: output) for output in self.outputs[b]
            ]

        if not forward_only:
            with tf.name_scope("gradient_descent"):
                self.gradient_norms = []
                self.updates = []
                self.aj_losses = []
                self.gen_params = [
                    p for p in tf.trainable_variables() if name_scope in p.name
                ]
                #opt = tf.train.GradientDescentOptimizer(self.learning_rate)
                opt = tf.train.AdamOptimizer()
                for b in xrange(len(self.buckets)):
                    #R =  tf.sub(self.reward[b], self.reward_bias)
                    self.reward[b] = self.reward[b] - reward_bias
                    #tf.cond 条件判断语句
                    adjusted_loss = tf.cond(
                        self.up_reward,
                        lambda: tf.mul(self.losses[b], self.reward[b]),
                        lambda: self.losses[b])

                    # adjusted_loss =  tf.cond(self.up_reward,
                    #                           lambda: tf.mul(self.losses[b], R),
                    #                           lambda: self.losses[b])
                    self.aj_losses.append(adjusted_loss)
                    gradients = tf.gradients(adjusted_loss, self.gen_params)
                    clipped_gradients, norm = tf.clip_by_global_norm(
                        gradients, self.max_gradient_norm)
                    self.gradient_norms.append(norm)
                    self.updates.append(
                        opt.apply_gradients(zip(clipped_gradients,
                                                self.gen_params),
                                            global_step=self.global_step))

        self.gen_variables = [
            k for k in tf.global_variables() if name_scope in k.name
        ]
        self.saver = tf.train.Saver(self.gen_variables)
Esempio n. 3
0
    def __init__(self,
                 source_vocab_size,
                 target_vocab_size,
                 buckets,
                 size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 learning_rate_decay_factor,
                 use_lstm=False,
                 num_samples=512,
                 forward_only=False,
                 scope_name='gen_seq2seq',
                 dtype=tf.float32):

        self.scope_name = scope_name
        with tf.variable_scope(self.scope_name):
            self.source_vocab_size = source_vocab_size
            self.target_vocab_size = target_vocab_size
            self.buckets = buckets
            self.learning_rate = tf.Variable(float(learning_rate),
                                             trainable=False,
                                             dtype=dtype)
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * learning_rate_decay_factor)
            self.global_step = tf.Variable(0, trainable=False)
            self.batch_size = batch_size

            self.up_reward = tf.placeholder(tf.bool, name="up_reward")
            self.en_output_proj = tf.placeholder(tf.bool,
                                                 name="en_output_proj")
            # If we use sampled softmax, we need an output projection.
            output_projection = None
            softmax_loss_function = None

            # Sampled softmax only makes sense if we sample less than vocabulary size.

            def policy_gradient(logit, labels):
                def softmax(x):
                    return tf.exp(x) / tf.reduce_sum(tf.exp(x),
                                                     reduction_indices=0)

                prob = softmax(logit)
                #token = tf.argmax(logit, 0)
                return tf.reduce_max(prob)
                pass

            #softmax_loss_function = policy_gradient
            pass

            if num_samples > 0 and num_samples < self.target_vocab_size:
                w_t = tf.get_variable("proj_w", [self.target_vocab_size, size],
                                      dtype=dtype)
                w = tf.transpose(w_t)
                b = tf.get_variable("proj_b", [self.target_vocab_size],
                                    dtype=dtype)
                output_projection = (w, b)

                def sampled_loss(inputs, labels):
                    labels = tf.reshape(labels, [-1, 1])
                    # We need to compute the sampled_softmax_loss using 32bit floats to
                    # avoid numerical instabilities.
                    local_w_t = tf.cast(w_t, tf.float32)
                    local_b = tf.cast(b, tf.float32)
                    local_inputs = tf.cast(inputs, tf.float32)
                    return tf.cast(
                        tf.nn.sampled_softmax_loss(local_w_t, local_b,
                                                   local_inputs, labels,
                                                   num_samples,
                                                   self.target_vocab_size),
                        dtype)

                softmax_loss_function = sampled_loss

            # softmax_loss_function = control_flow_ops.cond(self.up_reward,
            #                                       lambda:policy_gradient,
            #                                       lambda:sampled_loss)
            softmax_loss_function = policy_gradient

            #loss_function = tf.select(self.up_reward, policy_gradient, softmax_loss_function)

            #softmax_loss_function = loss_function
            # Create the internal multi-layer cell for our RNN.
            single_cell = tf.nn.rnn_cell.GRUCell(size)
            if use_lstm:
                single_cell = tf.nn.rnn_cell.BasicLSTMCell(size)
            cell = single_cell
            if num_layers > 1:
                cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)

            # The seq2seq function: we use embedding for the input and attention.
            def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
                return rl_seq2seq.embedding_attention_seq2seq(
                    encoder_inputs,
                    decoder_inputs,
                    cell,
                    num_encoder_symbols=source_vocab_size,
                    num_decoder_symbols=target_vocab_size,
                    embedding_size=size,
                    output_projection=output_projection,
                    feed_previous=do_decode,
                    dtype=dtype)

            # Feeds for inputs.
            self.encoder_inputs = []
            self.decoder_inputs = []
            self.target_weights = []
            for i in xrange(buckets[-1][0]):  # Last bucket is the biggest one.
                self.encoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="encoder{0}".format(i)))
            for i in xrange(buckets[-1][1] + 1):
                self.decoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="decoder{0}".format(i)))
                self.target_weights.append(
                    tf.placeholder(dtype,
                                   shape=[None],
                                   name="weight{0}".format(i)))

            # Our targets are decoder inputs shifted by one.
            targets = [
                self.decoder_inputs[i + 1]
                for i in xrange(len(self.decoder_inputs) - 1)
            ]

            # Training outputs and losses.
            if forward_only:
                self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets(
                    self.encoder_inputs,
                    self.decoder_inputs,
                    targets,
                    self.target_weights,
                    buckets,
                    lambda x, y: seq2seq_f(x, y, True),
                    softmax_loss_function=softmax_loss_function)
                # If we use output projection, we need to project outputs for decoding.
                if output_projection is not None:
                    for b in xrange(len(buckets)):
                        self.outputs[b] = [
                            tf.matmul(output, output_projection[0]) +
                            output_projection[1] for output in self.outputs[b]
                        ]
            else:
                self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets(
                    self.encoder_inputs,
                    self.decoder_inputs,
                    targets,
                    self.target_weights,
                    buckets,
                    lambda x, y: seq2seq_f(x, y, False),
                    softmax_loss_function=softmax_loss_function)

            # for j in xrange(len(buckets)):
            #     output_seq = [int(np.argmax(logit, axis=1)) for logit in self.outputs[j]]
            # # for reinforcement learning
            # self.force_dec_input = tf.placeholder(tf.bool, name="force_dec_input")
            # self.en_output_proj = tf.placeholder(tf.bool, name="en_output_proj")
            # # Training outputs and losses.
            # #if forward_only:
            # self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets(
            #       self.encoder_inputs, self.decoder_inputs, targets,
            #       self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, tf.select(self.force_dec_input, False, True)),
            #       softmax_loss_function=softmax_loss_function)
            #   # If we use output projection, we need to project outputs for decoding.
            #   #if output_projection is not None:
            # for b in xrange(len(buckets)):
            #   self.outputs[b] = [
            #     control_flow_ops.cond(
            #       self.en_output_proj,
            #       lambda: tf.matmul(output, output_projection[0]) + output_projection[1],
            #       lambda: output
            #     )
            #     for output in self.outputs[b]
            #   ]

            # Gradients and SGD update operation for training the model.
            self.tvars = tf.trainable_variables()
            #if not forward_only:
            self.gradient_norms = []
            self.updates = []
            self.reward = [
                tf.placeholder(tf.float32, name="reward_%i" % i)
                for i in range(len(buckets))
            ]
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            for b in xrange(len(buckets)):
                adjusted_losses = tf.mul(self.losses[b], self.reward[b])
                gradients = tf.gradients(adjusted_losses, self.tvars)
                clipped_gradients, norm = tf.clip_by_global_norm(
                    gradients, max_gradient_norm)
                self.gradient_norms.append(norm)
                self.updates.append(
                    opt.apply_gradients(zip(clipped_gradients, self.tvars),
                                        global_step=self.global_step))

            # self.saver = tf.train.Saver(tf.all_variables())
            all_variables = [
                k for k in tf.global_variables()
                if k.name.startswith(self.scope_name)
            ]
            self.saver = tf.train.Saver(all_variables)
Esempio n. 4
0
    def __init__(self,
                 config,
                 name_scope,
                 forward_only=False,
                 num_samples=256,
                 dtype=tf.float32):
        """
        定義了所有seq2seq有關的步驟
        
        1.embedding = 512 , learning rate = 0.5 , 
        2.Using GRU
        3.Using attention
        4.forword_only = train or predict
        5.Gradient decent using Adam and clipped gradient
        6.up_reward
        
        
        
        
        """
        # self.scope_name = scope_name
        # with tf.variable_scope(self.scope_name):
        source_vocab_size = config.vocab_size  # 35000
        target_vocab_size = config.vocab_size  # 35000
        emb_dim = config.emb_dim  # 512

        self.buckets = config.buckets  # [(5, 10), (10, 15), (20, 25), (40, 50)]
        self.learning_rate = tf.Variable(float(config.learning_rate),
                                         trainable=False,
                                         dtype=dtype)  # learning_rate = 0.5
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate *
            config.learning_rate_decay_factor)  # learning_rate_decay_factor
        self.global_step = tf.Variable(0, trainable=False)
        self.batch_size = config.batch_size  # 128
        self.num_layers = config.num_layers  # 2
        self.max_gradient_norm = config.max_gradient_norm  # 5.0

        self.mc_search = tf.placeholder(
            tf.bool, name="mc_search")  # boolean for mc_search
        self.forward_only = tf.placeholder(
            tf.bool, name="forward_only")  #boolean for forward_only
        self.up_reward = tf.placeholder(
            tf.bool, name="up_reward")  #boolean for up_reward
        self.reward_bias = tf.get_variable("reward_bias", [1],
                                           dtype=tf.float32)  # shape=(1,)

        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None
        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < target_vocab_size:  # 256 > 0 & 256 < 35000
            w_t = tf.get_variable("proj_w", [target_vocab_size, emb_dim],
                                  dtype=dtype)  # [35000,512]
            w = tf.transpose(w_t)  # [512 ,35000]
            b = tf.get_variable("proj_b", [target_vocab_size],
                                dtype=dtype)  # [35000]
            output_projection = (w, b)  #( [512 ,35000] , [35000])

            def sampled_loss(inputs, labels):
                labels = tf.reshape(labels, [-1, 1])
                # We need to compute the sampled_softmax_loss using 32bit floats to
                # avoid numerical instabilities.
                local_w_t = tf.cast(w_t, tf.float32)  # w_t 轉成 float
                local_b = tf.cast(b, tf.float32)  # b 轉成 float
                local_inputs = tf.cast(inputs, tf.float32)  # inputs 轉成 float
                return tf.cast(
                    tf.nn.sampled_softmax_loss(local_w_t, local_b, labels,
                                               local_inputs, num_samples,
                                               target_vocab_size), dtype)
                # This is a faster way to train a softmax classifier over a huge number of classes.

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        single_cell = tf.contrib.rnn.GRUCell(emb_dim)  #512
        cell = single_cell
        if self.num_layers > 1:  # 2
            cell = tf.contrib.rnn.MultiRNNCell(
                [single_cell] * self.num_layers)  # GRU * 2 (512)

        # The seq2seq function: we use embedding for the input and attention.
        # 使用 attention
        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            return rl_seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                cell,
                num_encoder_symbols=source_vocab_size,
                num_decoder_symbols=target_vocab_size,
                embedding_size=emb_dim,
                output_projection=output_projection,
                feed_previous=do_decode,
                mc_search=self.mc_search,
                dtype=dtype)

        # Feeds for inputs.
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for i in xrange(self.buckets[-1]
                        [0]):  # Last bucket is the biggest one. # 最後一個 bucket
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(i)))
#            ex :
#            [<tf.Tensor 'encoder0:0' shape=(?,) dtype=int32>,
#             <tf.Tensor 'encoder1:0' shape=(?,) dtype=int32>,
#             .....
#             <tf.Tensor 'encoder39:0' shape=(?,) dtype=int32>]
# encoder_inputs 塞最長的問句
        for i in xrange(self.buckets[-1][1] + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            # encoder_inputs 塞最長的答句
            self.target_weights.append(
                tf.placeholder(dtype, shape=[None],
                               name="weight{0}".format(i)))
            # target_weights 塞最長的答句的 weight
        self.reward = [
            tf.placeholder(tf.float32, name="reward_%i" % i)
            for i in range(len(self.buckets))
        ]
        #         ex:
        #         <tf.Tensor 'reward_0:0' shape=<unknown> dtype=float32>,
        #         <tf.Tensor 'reward_1:0' shape=<unknown> dtype=float32>,
        #         <tf.Tensor 'reward_2:0' shape=<unknown> dtype=float32>,
        #         <tf.Tensor 'reward_3:0' shape=<unknown> dtype=float32>

        # Our targets are decoder inputs shifted by one.

        targets = [
            self.decoder_inputs[i + 1]
            for i in xrange(len(self.decoder_inputs) - 1)
        ]

        # model塞入bucket得到ouput,losses,encoder state
        self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets(
            self.encoder_inputs,
            self.decoder_inputs,
            targets,
            self.target_weights,
            self.buckets,
            source_vocab_size,
            self.batch_size,
            lambda x, y: seq2seq_f(x, y,
                                   tf.where(self.forward_only, True, False)),
            output_projection=output_projection,
            softmax_loss_function=softmax_loss_function)

        for b in xrange(len(self.buckets)):
            self.outputs[b] = [
                tf.cond(
                    # condition
                    # if forward_only=True , store prediction
                    # if forward_only=false, store output
                    self.forward_only,
                    lambda: tf.matmul(output, output_projection[0]) +
                    output_projection[1],
                    lambda: output) for output in self.outputs[b]
            ]

        # Gradient descent using Adam

        if not forward_only:
            with tf.name_scope("gradient_descent"):
                self.gradient_norms = []
                self.updates = []
                self.aj_losses = []
                self.gen_params = [
                    p for p in tf.trainable_variables() if name_scope in p.name
                ]
                #opt = tf.train.GradientDescentOptimizer(self.learning_rate)
                opt = tf.train.AdamOptimizer()
                for b in xrange(len(self.buckets)):
                    #R =  tf.subtract(self.reward[b], self.reward_bias)
                    # self.reward[b] = self.reward[b] - reward_bias
                    adjusted_loss = tf.cond(
                        self.up_reward,
                        # if up_reward = true , multiply losses and reward
                        # if up_reward = true , losses
                        lambda: tf.multiply(self.losses[b], self.reward[b]),
                        lambda: self.losses[b])

                    # adjusted_loss =  tf.cond(self.up_reward,
                    #                           lambda: tf.multiply(self.losses[b], R),
                    #                           lambda: self.losses[b])
                    self.aj_losses.append(adjusted_loss)
                    gradients = tf.gradients(adjusted_loss, self.gen_params)
                    clipped_gradients, norm = tf.clip_by_global_norm(
                        gradients,
                        self.max_gradient_norm)  # max_gradient_norm = 5
                    """
                     clipped gradients is:
                     1. 先求所有權重梯度的 root sum square (sumsq_diff)
                     2. if sumsq_diff > clip_gradient,則求縮放因子 scale_factor = clip_gradient / sumsq_diff
                     3. scale_factor在(0,1)之間
                     4. 如果sumsq_diff越大,那缩放因子将越小。
                     5. 将所有的权重梯度乘以这个缩放因子
                     6. 保证了在一次迭代更新中,所有权重的梯度的平方和(sumsq_diff)在一个设定范围以内,这个范围就是clip_gradient.
                     
                    """

                    self.gradient_norms.append(norm)
                    self.updates.append(
                        opt.apply_gradients(zip(clipped_gradients,
                                                self.gen_params),
                                            global_step=self.global_step))

        self.gen_variables = [
            k for k in tf.global_variables() if name_scope in k.name
        ]
        self.saver = tf.train.Saver(self.gen_variables)
Esempio n. 5
0
    def __init__(self,
                 config,
                 name_scope,
                 forward_only=False,
                 num_samples=256,
                 dtype=tf.float32):

        # self.scope_name = scope_name
        # with tf.variable_scope(self.scope_name):
        with tf.device("/gpu:0"):
            source_vocab_size = config.vocab_size
            target_vocab_size = config.vocab_size
            emb_dim = config.emb_dim
            word_embedding_size = config.word_embedding
            dropout = config.keep_prob
            self.config = config
            self.buckets = config.buckets
            self.learning_rate = tf.Variable(float(config.learning_rate),
                                             trainable=False,
                                             dtype=dtype)
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * config.learning_rate_decay_factor)
            self.global_step = tf.Variable(0, trainable=False)
            self.batch_size = config.batch_size
            self.num_layers = config.num_layers
            self.max_gradient_norm = config.max_gradient_norm
            self.mc_search = tf.placeholder(tf.bool, name="mc_search")
            self.mc_position = tf.placeholder(tf.int32, name="mc_position")
            self.forward_only = tf.placeholder(tf.bool, name="forward_only")
            self.teacher_forcing = tf.placeholder(tf.bool,
                                                  name="teacher_forcing")
            self.up_reward = tf.placeholder(tf.bool, name="up_reward")
            self.reward_bias = tf.get_variable("reward_bias", [1],
                                               dtype=tf.float32)
            self.ent_weight = float(config.ent_weight)
            # If we use sampled softmax, we need an output projection.
            output_projection = None
            softmax_loss_function = None
            # Sampled softmax only makes sense if we sample less than vocabulary size.
            # if num_samples > 0 and num_samples < target_vocab_size:
            if num_samples > 0 and num_samples < target_vocab_size:
                w_t = tf.get_variable("proj_w", [target_vocab_size, emb_dim],
                                      dtype=dtype)
                w = tf.transpose(w_t)
                b = tf.get_variable("proj_b", [target_vocab_size], dtype=dtype)
                output_projection = (w, b)

                def sampled_loss(inputs, labels):
                    labels = tf.reshape(labels, [-1, 1])
                    # We need to compute the sampled_softmax_loss using 32bit floats to
                    # avoid numerical instabilities.
                    local_w_t = tf.cast(w_t, tf.float32)
                    local_b = tf.cast(b, tf.float32)
                    local_inputs = tf.cast(inputs, tf.float32)
                    return tf.cast(
                        tf.nn.sampled_softmax_loss(local_w_t, local_b, labels,
                                                   local_inputs, num_samples,
                                                   target_vocab_size), dtype)

                # softmax_loss_function = sampled_loss
                softmax_loss_function = None

            # Creation of the rnn cell
            def create_rnn_cell():
                encoDecoCell = tf.contrib.rnn.GRUCell(  # Or GRUCell, LSTMCell(args.hiddenSize)
                    emb_dim, )
                encoDecoCell = tf.contrib.rnn.DropoutWrapper(
                    encoDecoCell,
                    input_keep_prob=1.0,
                    output_keep_prob=dropout)
                return encoDecoCell

            single_cell = tf.contrib.rnn.MultiRNNCell(
                [create_rnn_cell() for _ in range(self.num_layers)], )
            # Create the internal multi-layer cell for our RNN.
            # single_cell = tf.contrib.rnn.GRUCell(emb_dim)
            # single_cell = tf.contrib.rnn.BasicLSTMCell(emb_dim)
            cell = single_cell

            # if self.num_layers > 1:
            #     cell = tf.contrib.rnn.MultiRNNCell([single_cell] * self.num_layers)

            # The seq2seq function: we use embedding for the input and attention.
            def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
                return rl_seq2seq.embedding_attention_seq2seq(
                    encoder_inputs,
                    decoder_inputs,
                    cell,
                    num_encoder_symbols=source_vocab_size,
                    num_decoder_symbols=target_vocab_size,
                    embedding_size=word_embedding_size,
                    output_projection=output_projection,
                    feed_previous=do_decode,
                    mc_search=self.mc_search,
                    dtype=dtype,
                    mc_position=self.mc_position)

            # Feeds for inputs.
            self.encoder_inputs = []
            self.decoder_inputs = []
            self.target_weights = []
            self.targets_input = []
            self.mc_sents = []
            for i in xrange(
                    self.buckets[-1][0]):  # Last bucket is the biggest one.
                self.encoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="encoder{0}".format(i)))
            for i in xrange(self.buckets[-1][1] + 1):
                self.decoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="decoder{0}".format(i)))
                self.target_weights.append(
                    tf.placeholder(dtype,
                                   shape=[None],
                                   name="weight{0}".format(i)))
                self.targets_input.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="target{0}".format(i)))

            # self.reward = [tf.placeholder(tf.float32, name="reward_%i" % i) for i in range(len(self.buckets))]
            self.reward = [
                tf.placeholder(tf.float32,
                               shape=[None, None],
                               name="reward_%i" % i)
                for i in range(len(self.buckets))
            ]

            # Our targets are decoder inputs shifted by one.
            # targets = [self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1)]

            self.outputs, self.losses, self.encoder_state, self.ent, self.mc_sents = rl_seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                self.targets_input,
                self.target_weights,
                self.reward,
                self.buckets,
                source_vocab_size,
                self.batch_size,
                lambda x, y: seq2seq_f(
                    x, y, tf.where(self.forward_only, True, False)),
                output_projection=output_projection,
                softmax_loss_function=softmax_loss_function)
            #
            for b in xrange(len(self.buckets)):
                self.outputs[b] = [
                    tf.cond(
                        self.forward_only,
                        lambda: tf.matmul(output, output_projection[
                            0]) + output_projection[1], lambda: output)
                    for output in self.outputs[b]
                ]

            #
            # forward_only==False  ----> adversarial learning
            if not forward_only:
                with tf.name_scope("gradient_descent"):
                    self.gradient_norms = []
                    self.updates = []
                    self.aj_losses = []
                    self.gen_params = [
                        p for p in tf.trainable_variables()
                        if name_scope in p.name
                    ]
                    # opt = tf.train.GradientDescentOptimizer(self.learning_rate)
                    # '''
                    opt = tf.train.AdamOptimizer(
                        learning_rate=self.learning_rate,
                        beta1=0.9,
                        beta2=0.999,
                        epsilon=1e-08)
                    # '''
                    for b in xrange(len(self.buckets)):
                        # R =  tf.subtract(self.reward[b], self.reward_bias)
                        # self.reward[b] = self.reward[b] - reward_bias
                        # adjusted_loss = tf.cond(self.up_reward,
                        #                           lambda:tf.multiply(self.losses[b], self.reward[b]),
                        #                           lambda: self.losses[b])

                        adjusted_loss = self.losses[b]
                        adjusted_loss = tf.cond(
                            self.teacher_forcing, lambda: adjusted_loss,
                            lambda: adjusted_loss + self.ent_weight * self.ent[
                                b])
                        # adjusted_loss -= self.ent_weight * self.ent[b]
                        # if up_reward==true, lambda:tf.multiply(self.losses[b], self.reward[b]) will be executed

                        # adjusted_loss =  tf.cond(self.up_reward,
                        #                           lambda: tf.multiply(self.losses[b], R),
                        #                           lambda: self.losses[b])
                        self.aj_losses.append(adjusted_loss)
                        gradients, variables = zip(
                            *opt.compute_gradients(adjusted_loss))
                        capped_gradients, _ = tf.clip_by_global_norm(
                            gradients, 5.0)
                        optimizer = opt.apply_gradients(
                            zip(capped_gradients, variables),
                            global_step=self.global_step)
                        self.updates.append(optimizer)
                        # self.updates.append(opt.minimize(adjusted_loss, global_step=self.global_step))

            self.gen_variables = [
                k for k in tf.global_variables() if name_scope in k.name
            ]
            self.saver = tf.train.Saver(self.gen_variables)
Esempio n. 6
0
    def __init__(self,
                 config,
                 use_lstm=False,
                 num_samples=512,
                 forward=False,
                 scope_name='gen_seq2seq',
                 dtype=tf.float32):

        self.scope_name = scope_name
        with tf.variable_scope(self.scope_name):
            self.source_vocab_size = config.vocab_size
            self.target_vocab_size = config.vocab_size
            self.buckets = config.buckets
            self.learning_rate = tf.Variable(float(config.learning_rate),
                                             trainable=False,
                                             dtype=dtype)
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * config.learning_rate_decay_factor)
            self.global_step = tf.Variable(0, trainable=False)
            self.batch_size = config.batch_size
            self.emb_dim = config.emb_dim
            self.num_layers = config.num_layers
            self.max_gradient_norm = config.max_gradient_norm

            #self.up_reward = tf.placeholder(tf.bool, name="up_reward")
            self.mc_search = tf.placeholder(tf.bool, name="mc_search")
            self.forward_only = tf.placeholder(tf.bool, name="forward_only")

            # If we use sampled softmax, we need an output projection.
            output_projection = None
            softmax_loss_function = None

            # Create the internal multi-layer cell for our RNN.
            single_cell = tf.nn.rnn_cell.GRUCell(self.emb_dim)
            if use_lstm:
                single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.emb_dim)
            cell = single_cell
            if self.num_layers > 1:
                cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] *
                                                   self.num_layers)

            # The seq2seq function: we use embedding for the input and attention.
            def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
                return rl_seq2seq.embedding_attention_seq2seq(
                    encoder_inputs,
                    decoder_inputs,
                    cell,
                    num_encoder_symbols=self.source_vocab_size,
                    num_decoder_symbols=self.target_vocab_size,
                    embedding_size=self.emb_dim,
                    output_projection=output_projection,
                    feed_previous=do_decode,
                    mc_search=self.mc_search,
                    dtype=dtype)

            # Feeds for inputs.
            self.encoder_inputs = []
            self.decoder_inputs = []
            self.target_weights = []
            for i in xrange(
                    self.buckets[-1][0]):  # Last bucket is the biggest one.
                self.encoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="encoder{0}".format(i)))
            for i in xrange(self.buckets[-1][1] + 1):
                self.decoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="decoder{0}".format(i)))
                self.target_weights.append(
                    tf.placeholder(dtype,
                                   shape=[None],
                                   name="weight{0}".format(i)))
            self.reward = [
                tf.placeholder(tf.float32, name="reward_%i" % i)
                for i in range(len(self.buckets))
            ]

            # Our targets are decoder inputs shifted by one.
            targets = [
                self.decoder_inputs[i + 1]
                for i in xrange(len(self.decoder_inputs) - 1)
            ]

            self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                self.buckets,
                self.emb_dim,
                self.batch_size,
                lambda x, y: seq2seq_f(
                    x, y, tf.select(self.forward_only, True, False)),
                output_projection=output_projection,
                softmax_loss_function=softmax_loss_function)

            with tf.name_scope("gradient_descent"):
                self.gradient_norms = []
                self.updates = []
                self.gen_params = [
                    p for p in tf.trainable_variables()
                    if self.scope_name in p.name
                ]
                opt = tf.train.GradientDescentOptimizer(self.learning_rate)
                for b in xrange(len(self.buckets)):
                    adjusted_losses = tf.mul(self.losses[b], self.reward[b])
                    gradients = tf.gradients(adjusted_losses, self.gen_params)
                    clipped_gradients, norm = tf.clip_by_global_norm(
                        gradients, self.max_gradient_norm)
                    self.gradient_norms.append(norm)
                    self.updates.append(
                        opt.apply_gradients(zip(clipped_gradients,
                                                self.gen_params),
                                            global_step=self.global_step))

            self.gen_variables = [
                k for k in tf.global_variables() if self.scope_name in k.name
            ]
            self.saver = tf.train.Saver(self.gen_variables)