def build_training_process(self): c = tf.constant(0.8, dtype=tf.float32) # if self.wider_seg_deepr > 0, then get wide_side_obj, else wider_entropy = 0 wider_side_obj, wider_entropy = tf.cond( tf.greater(self.wider_seg_deeper, 0), lambda: self.get_wider_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))) wider_side_obj = tf.reduce_sum(wider_side_obj * tf.minimum(c, self.wider_rho)) batch_size = array_ops.shape(self.reward)[0] deeper_side_obj, deeper_entropy = tf.cond( self.has_deeper, lambda: self.get_deeper_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))) deeper_side_obj = tf.reduce_sum(deeper_side_obj * tf.minimum(c, self.deeper_rho)) self.obj = wider_side_obj + deeper_side_obj entropy_term = wider_entropy * tf.cast(self.wider_seg_deeper, tf.float32) + \ deeper_entropy * tf.cast(batch_size - self.wider_seg_deeper, tf.float32) entropy_term /= tf.cast(batch_size, tf.float32) g = -self.obj - self.entropy_penalty * entropy_term optimizer = BasicModel.build_optimizer(self.learning_rate, self.opt_config[0], self.opt_config[1]) self.train_step = [optimizer.minimize(g)] wq = tf.reshape(self.wider_actor.selected_q, [-1]) w_loss = tf.losses.mean_squared_error(self.wider_qrets, wq) self.update_wider_q = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(loss=w_loss) dq = tf.reshape(self.deeper_actor.selected_q, [-1]) d_loss = tf.losses.mean_squared_error(self.deeper_qrets, dq) self.update_deeper_q = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(loss=d_loss) # add baseline to training step if self.baseline_actor is not None: self.build_baseline_network()
def build_training_process(self): wider_side_obj, wider_entropy = tf.cond( tf.greater(self.wider_seg_deeper, 0), lambda: self.get_wider_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))) batch_size = array_ops.shape(self.reward)[0] deeper_side_obj, deeper_entropy = tf.cond( self.has_deeper, lambda: self.get_deeper_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))) self.obj = wider_side_obj + deeper_side_obj entropy_term = wider_entropy * tf.cast(self.wider_seg_deeper, tf.float32) + \ deeper_entropy * tf.cast(batch_size - self.wider_seg_deeper, tf.float32) entropy_term /= tf.cast(batch_size, tf.float32) optimizer = BasicModel.build_optimizer(self.learning_rate, self.opt_config[0], self.opt_config[1]) # print "in build, reward = {}".format(self.reward) self.train_step = optimizer.minimize(-self.obj - self.entropy_penalty * entropy_term)
def build_training_process(self): # if self.wider_seg_deepr > 0, then get wide_side_obj, else wider_entropy = 0 wider_side_obj, wider_entropy = tf.cond( tf.greater(self.wider_seg_deeper, 0), lambda: self.get_wider_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32)) ) batch_size = array_ops.shape(self.reward)[0] deeper_side_obj, deeper_entropy = tf.cond( self.has_deeper, lambda: self.get_deeper_side_obj(), lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32)) ) self.obj = wider_side_obj + deeper_side_obj entropy_term = wider_entropy * tf.cast(self.wider_seg_deeper, tf.float32) + \ deeper_entropy * tf.cast(batch_size - self.wider_seg_deeper, tf.float32) entropy_term /= tf.cast(batch_size, tf.float32) optimizer = BasicModel.build_optimizer(self.learning_rate, self.opt_config[0], self.opt_config[1]) self.train_step = [optimizer.minimize(- self.obj - self.entropy_penalty * entropy_term)] # add baseline to training step if self.baseline_actor is not None: self.build_baseline_network()