예제 #1
0
    def build_training_process(self):
        c = tf.constant(0.8, dtype=tf.float32)
        # if self.wider_seg_deepr > 0, then get wide_side_obj, else wider_entropy = 0
        wider_side_obj, wider_entropy = tf.cond(
            tf.greater(self.wider_seg_deeper,
                       0), lambda: self.get_wider_side_obj(), lambda:
            (tf.constant(0.0, dtype=tf.float32),
             tf.constant(0.0, dtype=tf.float32)))
        wider_side_obj = tf.reduce_sum(wider_side_obj *
                                       tf.minimum(c, self.wider_rho))
        batch_size = array_ops.shape(self.reward)[0]
        deeper_side_obj, deeper_entropy = tf.cond(
            self.has_deeper, lambda: self.get_deeper_side_obj(), lambda:
            (tf.constant(0.0, dtype=tf.float32),
             tf.constant(0.0, dtype=tf.float32)))
        deeper_side_obj = tf.reduce_sum(deeper_side_obj *
                                        tf.minimum(c, self.deeper_rho))
        self.obj = wider_side_obj + deeper_side_obj
        entropy_term = wider_entropy * tf.cast(self.wider_seg_deeper, tf.float32) + \
                       deeper_entropy * tf.cast(batch_size - self.wider_seg_deeper, tf.float32)
        entropy_term /= tf.cast(batch_size, tf.float32)

        g = -self.obj - self.entropy_penalty * entropy_term

        optimizer = BasicModel.build_optimizer(self.learning_rate,
                                               self.opt_config[0],
                                               self.opt_config[1])
        self.train_step = [optimizer.minimize(g)]

        wq = tf.reshape(self.wider_actor.selected_q, [-1])
        w_loss = tf.losses.mean_squared_error(self.wider_qrets, wq)
        self.update_wider_q = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(loss=w_loss)

        dq = tf.reshape(self.deeper_actor.selected_q, [-1])
        d_loss = tf.losses.mean_squared_error(self.deeper_qrets, dq)
        self.update_deeper_q = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(loss=d_loss)
        # add baseline to training step
        if self.baseline_actor is not None:
            self.build_baseline_network()
예제 #2
0
    def build_training_process(self):
        wider_side_obj, wider_entropy = tf.cond(
            tf.greater(self.wider_seg_deeper,
                       0), lambda: self.get_wider_side_obj(), lambda:
            (tf.constant(0.0, dtype=tf.float32),
             tf.constant(0.0, dtype=tf.float32)))
        batch_size = array_ops.shape(self.reward)[0]
        deeper_side_obj, deeper_entropy = tf.cond(
            self.has_deeper, lambda: self.get_deeper_side_obj(), lambda:
            (tf.constant(0.0, dtype=tf.float32),
             tf.constant(0.0, dtype=tf.float32)))
        self.obj = wider_side_obj + deeper_side_obj
        entropy_term = wider_entropy * tf.cast(self.wider_seg_deeper, tf.float32) + \
                       deeper_entropy * tf.cast(batch_size - self.wider_seg_deeper, tf.float32)
        entropy_term /= tf.cast(batch_size, tf.float32)

        optimizer = BasicModel.build_optimizer(self.learning_rate,
                                               self.opt_config[0],
                                               self.opt_config[1])
        # print "in build, reward = {}".format(self.reward)
        self.train_step = optimizer.minimize(-self.obj - self.entropy_penalty *
                                             entropy_term)
예제 #3
0
    def build_training_process(self):
        # if self.wider_seg_deepr > 0, then get wide_side_obj, else wider_entropy = 0
        wider_side_obj, wider_entropy = tf.cond(
            tf.greater(self.wider_seg_deeper, 0),
            lambda: self.get_wider_side_obj(),
            lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))
        )
        batch_size = array_ops.shape(self.reward)[0]
        deeper_side_obj, deeper_entropy = tf.cond(
            self.has_deeper,
            lambda: self.get_deeper_side_obj(),
            lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))
        )
        self.obj = wider_side_obj + deeper_side_obj
        entropy_term = wider_entropy * tf.cast(self.wider_seg_deeper, tf.float32) + \
                       deeper_entropy * tf.cast(batch_size - self.wider_seg_deeper, tf.float32)
        entropy_term /= tf.cast(batch_size, tf.float32)

        optimizer = BasicModel.build_optimizer(self.learning_rate, self.opt_config[0], self.opt_config[1])
        self.train_step = [optimizer.minimize(- self.obj - self.entropy_penalty * entropy_term)]
        # add baseline to training step
        if self.baseline_actor is not None:
            self.build_baseline_network()