コード例 #1
0
  def get_support_set_softmax(self, logits, class_ids):
    """Softmax normalize over the support set.

    Args:
      logits: [N_k, H*W, Q] dimensional tensor.
      class_ids: [N_k] tensor giving the support-set-id of each image.

    Returns:
      Softmax-ed x over the support set.

    softmax(x) = np.exp(x) / np.reduce_sum(np.exp(x), axis)
    """
    max_logit = tf.reduce_max(logits, axis=1, keepdims=True)
    max_logit = tf.math.unsorted_segment_max(max_logit, class_ids,
                                             tf.reduce_max(class_ids) + 1)
    max_logit = tf.gather(max_logit, class_ids)
    logits_reduc = logits - max_logit

    exp_x = tf.exp(logits_reduc)
    sum_exp_x = tf.reduce_sum(exp_x, axis=1, keepdims=True)
    sum_exp_x = tf.math.unsorted_segment_sum(sum_exp_x, class_ids,
                                             tf.reduce_max(class_ids) + 1)
    log_sum_exp_x = tf.log(sum_exp_x)
    log_sum_exp_x = tf.gather(log_sum_exp_x, class_ids)

    norm_logits = logits_reduc - log_sum_exp_x
    softmax = tf.exp(norm_logits)
    return softmax
コード例 #2
0
ファイル: ppo.py プロジェクト: XinyiYS/reaver
    def loss_fn(self, policy=None, value=None):
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")
        logli_old = tf.placeholder(tf.float32, [None], name="logli_old")
        value_old = tf.placeholder(tf.float32, [None], name="value_old")

        if not self.subenvs:
            ratio = tf.exp(self.policy.logli - logli_old)
            clipped_ratio = tf.clip_by_value(ratio, 1 - self.clip_ratio,
                                             1 + self.clip_ratio)

            value_err = (self.value - returns)**2
            if self.clip_value > 0.0:
                clipped_value = tf.clip_by_value(self.value,
                                                 value_old - self.clip_value,
                                                 value_old + self.clip_value)
                clipped_value_err = (clipped_value - returns)**2
                value_err = tf.maximum(value_err, clipped_value_err)

            policy_loss = -tf.reduce_mean(
                tf.minimum(adv * ratio, adv * clipped_ratio))
            value_loss = tf.reduce_mean(value_err) * self.value_coef
            entropy_loss = tf.reduce_mean(
                self.policy.entropy) * self.entropy_coef

        else:
            assert policy is not None and value is not None, "Missing variables representing <policy> and <value>"

            ratio = tf.exp(policy.logli - logli_old)
            clipped_ratio = tf.clip_by_value(ratio, 1 - self.clip_ratio,
                                             1 + self.clip_ratio)

            value_err = (value - returns)**2
            if self.clip_value > 0.0:
                clipped_value = tf.clip_by_value(value,
                                                 value_old - self.clip_value,
                                                 value_old + self.clip_value)
                clipped_value_err = (clipped_value - returns)**2
                value_err = tf.maximum(value_err, clipped_value_err)

            policy_loss = -tf.reduce_mean(
                tf.minimum(adv * ratio, adv * clipped_ratio))
            value_loss = tf.reduce_mean(value_err) * self.value_coef
            entropy_loss = tf.reduce_mean(policy.entropy) * self.entropy_coef

        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        return full_loss, [policy_loss, value_loss,
                           entropy_loss], [adv, returns, logli_old, value_old]
コード例 #3
0
ファイル: tf_utils.py プロジェクト: suhridbuddha/ntsa
def gauss_kernel2D(x, Dx, Dy, gamma=1.):
    h_size = (x.get_shape()[-1].value) // 2

    x = tf.expand_dims(x, axis=-1)
    if x.get_shape().ndims < 4:
        Dx = tf.reshape(Dx, (1, 1, -1))
        Dy = tf.reshape(Dy, (1, 1, -1))
        x1, x2 = x[:, :h_size], x[:, h_size:]
    else:
        Dy = tf.reshape(Dy, (1, 1, 1, 1, -1))
        Dx = tf.reshape(Dx, (1, 1, 1, 1, -1))
        x1, x2 = x[:, :, :, :h_size], x[:, :, :, h_size:]
    gauss_kernel = tf.exp(-gamma * tf.square(x1 - Dx)) + tf.exp(- gamma * tf.square(x2 - Dy))
    return gauss_kernel
コード例 #4
0
ファイル: tf_utils.py プロジェクト: suhridbuddha/ntsa
def gauss_kernel(x, D, gamma=1.):
    x = tf.expand_dims(x, axis=-1)
    if x.get_shape().ndims < 4:
        D = tf.reshape(D, (1, 1, -1))
    else:
        D = tf.reshape(D, (1, 1, 1, 1, -1))

    return tf.exp(- gamma * tf.square(x - D))
コード例 #5
0
  def call(self, observation, step_type=(), network_state=()):
    del step_type  # unused.
    output = tf.cast(tf.nest.flatten(observation)[0], tf.float32)
    for layer in self._mlp_layers:
      output = layer(output)

    shift, log_scale_diag = tf.split(output, 2, axis=-1)
    log_scale_diag = tf.clip_by_value(log_scale_diag, -20, 2)

    base_distribution = tfp.distributions.MultivariateNormalDiag(
        loc=shift,  scale_diag=tf.exp(log_scale_diag))
    distribution = SquashToSpecDistribution(
        base_distribution, self._single_action_spec)

    distribution = tf.nest.pack_sequence_as(self.output_spec, [distribution])
    return distribution, network_state
コード例 #6
0
    def sample(self, mean, log_b2, training=False):
        """sample
        Sampling z from Z ~ Laplacian(μ,b) 
        Y ~ N(0,1)
        V ~ Exponential(1) = Gamma(1,1)
        z = μ + by(2v)^1/2
        """
        if not training:
            return mean

        # Exponential is special case of Gamma 
        # Exponential(λ) = Gamma(1,λ)
        exponential = tf.random.gamma(tf.shape(mean), alpha=1, beta=1)
        gaussian = tf.random.normal(tf.shape(mean), mean=0.0, stddev=1.0)
        
        return mean + tf.exp(0.5*log_b2)*tf.sqrt(2*exponential)*gaussian
コード例 #7
0
ファイル: ppo.py プロジェクト: stjordanis/reaver-pysc2
    def loss_fn(self):
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")
        logli_old = tf.placeholder(tf.float32, [None], name="logli_old")

        ratio = tf.exp(self.policy.logli - logli_old)
        clipped_ratio = tf.clip_by_value(ratio, 1-self.clip_ratio, 1+self.clip_ratio)

        policy_loss = -tf.reduce_mean(tf.minimum(adv * ratio, adv * clipped_ratio))
        # TODO clip value loss
        value_loss = tf.reduce_mean((self.value - returns)**2) * self.value_coef
        entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef
        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        return full_loss, [policy_loss, value_loss, entropy_loss], [adv, returns, logli_old]
コード例 #8
0
ファイル: ppo.py プロジェクト: wowoyang/reaver-pysc2
    def loss_fn(self):
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")
        logli_old = tf.placeholder(tf.float32, [None], name="logli_old")

        ratio = tf.exp(self.policy.logli - logli_old)
        clipped_ratio = tf.clip_by_value(ratio, 1 - self.clip_ratio,
                                         1 + self.clip_ratio)

        policy_loss = -tf.reduce_mean(
            tf.minimum(adv * ratio, adv * clipped_ratio))
        # TODO clip value loss
        value_loss = tf.reduce_mean(
            (self.value - returns)**2) * self.value_coef
        entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef
        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        return full_loss, [policy_loss, value_loss,
                           entropy_loss], [adv, returns, logli_old]
コード例 #9
0
    def sample(self, mean, log_var, training=False):
        if not training:
            return mean

        noise = tf.random.normal(tf.shape(mean), mean=0.0, stddev=1.0)
        return mean + tf.exp(0.5 * log_var) * noise