예제 #1
0
파일: ppo.py 프로젝트: XinyiYS/reaver
    def loss_fn(self, policy=None, value=None):
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")
        logli_old = tf.placeholder(tf.float32, [None], name="logli_old")
        value_old = tf.placeholder(tf.float32, [None], name="value_old")

        if not self.subenvs:
            ratio = tf.exp(self.policy.logli - logli_old)
            clipped_ratio = tf.clip_by_value(ratio, 1 - self.clip_ratio,
                                             1 + self.clip_ratio)

            value_err = (self.value - returns)**2
            if self.clip_value > 0.0:
                clipped_value = tf.clip_by_value(self.value,
                                                 value_old - self.clip_value,
                                                 value_old + self.clip_value)
                clipped_value_err = (clipped_value - returns)**2
                value_err = tf.maximum(value_err, clipped_value_err)

            policy_loss = -tf.reduce_mean(
                tf.minimum(adv * ratio, adv * clipped_ratio))
            value_loss = tf.reduce_mean(value_err) * self.value_coef
            entropy_loss = tf.reduce_mean(
                self.policy.entropy) * self.entropy_coef

        else:
            assert policy is not None and value is not None, "Missing variables representing <policy> and <value>"

            ratio = tf.exp(policy.logli - logli_old)
            clipped_ratio = tf.clip_by_value(ratio, 1 - self.clip_ratio,
                                             1 + self.clip_ratio)

            value_err = (value - returns)**2
            if self.clip_value > 0.0:
                clipped_value = tf.clip_by_value(value,
                                                 value_old - self.clip_value,
                                                 value_old + self.clip_value)
                clipped_value_err = (clipped_value - returns)**2
                value_err = tf.maximum(value_err, clipped_value_err)

            policy_loss = -tf.reduce_mean(
                tf.minimum(adv * ratio, adv * clipped_ratio))
            value_loss = tf.reduce_mean(value_err) * self.value_coef
            entropy_loss = tf.reduce_mean(policy.entropy) * self.entropy_coef

        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        return full_loss, [policy_loss, value_loss,
                           entropy_loss], [adv, returns, logli_old, value_old]
예제 #2
0
파일: utils.py 프로젝트: novaintrovert/mesh
def learning_rate_schedule_noam(train_steps,
                                warmup_steps=10000,
                                linear_decay_fraction=0.1,
                                multiplier=1.0):
    """Noam's favorite learning-rate schedule.

  (rsqrt(max(step_num, warmup_steps))
   * multiplier
   * min(1.0, (train_steps-step_num)/(train_steps*linear_decay_fraction)))

  Args:
    train_steps: a number
    warmup_steps: a number
    linear_decay_fraction: a number
    multiplier: a number
  Returns:
    a tf.scalar
  """
    train_steps = float(train_steps)
    step_num = tf.cast(tf.get_global_step(), tf.float32)
    learning_rate = tf.math.rsqrt(tf.maximum(step_num, warmup_steps))
    learning_rate *= multiplier
    if linear_decay_fraction > 0:
        learning_rate *= tf.minimum(1.0, (train_steps - step_num) /
                                    (train_steps * linear_decay_fraction))
    return learning_rate
예제 #3
0
    def loss_fn(self):
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")
        logli_old = tf.placeholder(tf.float32, [None], name="logli_old")

        ratio = tf.exp(self.policy.logli - logli_old)
        clipped_ratio = tf.clip_by_value(ratio, 1-self.clip_ratio, 1+self.clip_ratio)

        policy_loss = -tf.reduce_mean(tf.minimum(adv * ratio, adv * clipped_ratio))
        # TODO clip value loss
        value_loss = tf.reduce_mean((self.value - returns)**2) * self.value_coef
        entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef
        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        return full_loss, [policy_loss, value_loss, entropy_loss], [adv, returns, logli_old]
예제 #4
0
    def loss_fn(self):
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")
        logli_old = tf.placeholder(tf.float32, [None], name="logli_old")

        ratio = tf.exp(self.policy.logli - logli_old)
        clipped_ratio = tf.clip_by_value(ratio, 1 - self.clip_ratio,
                                         1 + self.clip_ratio)

        policy_loss = -tf.reduce_mean(
            tf.minimum(adv * ratio, adv * clipped_ratio))
        # TODO clip value loss
        value_loss = tf.reduce_mean(
            (self.value - returns)**2) * self.value_coef
        entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef
        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        return full_loss, [policy_loss, value_loss,
                           entropy_loss], [adv, returns, logli_old]
예제 #5
0
def pad_or_clip(mesh_inputs, view_indices_2d_inputs, pad_or_clip_size):
    """Pads and clips the points and correspondences."""
    if standard_fields.InputDataFields.point_positions not in mesh_inputs:
        return
    num_valid_points = tf.shape(
        mesh_inputs[standard_fields.InputDataFields.point_positions])[0]
    if pad_or_clip_size:
        num_valid_points = tf.minimum(num_valid_points, pad_or_clip_size)
        for key in sorted(mesh_inputs.keys()):
            num_channels = mesh_inputs[key].get_shape().as_list()[1]
            mesh_inputs[key] = shape_utils.pad_or_clip_nd(
                tensor=mesh_inputs[key],
                output_shape=[pad_or_clip_size, num_channels])
        for key in sorted(view_indices_2d_inputs):
            num_images = view_indices_2d_inputs[key].get_shape().as_list()[0]
            if num_images is None:
                num_images = tf.shape(view_indices_2d_inputs[key])[0]
            view_indices_2d_inputs[key] = shape_utils.pad_or_clip_nd(
                tensor=(view_indices_2d_inputs[key] + 1),
                output_shape=[num_images, pad_or_clip_size, 2]) - 1
    mesh_inputs[
        standard_fields.InputDataFields.num_valid_points] = num_valid_points