def sample_conditional_a(self, resid_image, var_so_far, pixel_1d):

        is_on = (pixel_1d < (self.n_discrete_latent - 1)).float()

        # pass through galaxy encoder
        pixel_2d = self.one_galaxy_vae.pixel_1d_to_2d(pixel_1d)
        z_mean, z_var = self.one_galaxy_vae.enc(resid_image, pixel_2d)

        # sample z
        q_z = Normal(z_mean, z_var.sqrt())
        z_sample = q_z.rsample()

        # kl term for continuous latent vars
        log_q_z = q_z.log_prob(z_sample).sum(1)
        p_z = Normal(torch.zeros_like(z_sample), torch.ones_like(z_sample))
        log_p_z = p_z.log_prob(z_sample).sum(1)
        kl_z = is_on * (log_q_z - log_p_z)

        # run through decoder
        recon_mean, recon_var = self.one_galaxy_vae.dec(is_on, pixel_2d, z_sample)

        # NOTE: we will have to the recon means once we do more detections
        # recon_means = recon_mean + image_so_far
        # recon_vars = recon_var + var_so_far

        return recon_mean, recon_var, is_on, kl_z
Ejemplo n.º 2
0
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate Expected Improvement on the candidate set X.

        Args:
            X: A `b1 x ... bk x 1 x d`-dim batched tensor of `d`-dim design points.
                Expected Improvement is computed for each point individually,
                i.e., what is considered are the marginal posteriors, not the
                joint.

        Returns:
            A `b1 x ... bk`-dim tensor of Expected Improvement values at the
            given design points `X`.
        """
        self.best_f = self.best_f.to(X)
        posterior = self.model.posterior(X)
        self._validate_single_output_posterior(posterior)
        mean = posterior.mean
        # deal with batch evaluation and broadcasting
        view_shape = mean.shape[:-2] if mean.dim() >= X.dim() else X.shape[:-2]
        mean = mean.view(view_shape)
        sigma = posterior.variance.clamp_min(1e-9).sqrt().view(view_shape)
        u = (mean - self.best_f.expand_as(mean)) / sigma
        if not self.maximize:
            u = -u
        normal = Normal(torch.zeros_like(u), torch.ones_like(u))
        ucdf = normal.cdf(u)
        updf = torch.exp(normal.log_prob(u))
        ei = sigma * (updf + u * ucdf)
        return ei
Ejemplo n.º 3
0
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate Constrained Expected Improvement on the candidate set X.

        Args:
            X: A `(b) x 1 x d`-dim Tensor of `(b)` t-batches of `d`-dim design
                points each.

        Returns:
            A `(b)`-dim Tensor of Expected Improvement values at the given
            design points `X`.
        """
        posterior = self.model.posterior(X)
        means = posterior.mean.squeeze(dim=-2)  # (b) x t
        sigmas = posterior.variance.squeeze(dim=-2).sqrt().clamp_min(1e-9)  # (b) x t

        # (b) x 1
        mean_obj = means[..., [self.objective_index]]
        sigma_obj = sigmas[..., [self.objective_index]]
        u = (mean_obj - self.best_f.expand_as(mean_obj)) / sigma_obj
        if not self.maximize:
            u = -u
        normal = Normal(
            torch.zeros(1, device=u.device, dtype=u.dtype),
            torch.ones(1, device=u.device, dtype=u.dtype),
        )
        ei_pdf = torch.exp(normal.log_prob(u))  # (b) x 1
        ei_cdf = normal.cdf(u)
        ei = sigma_obj * (ei_pdf + u * ei_cdf)
        prob_feas = self._compute_prob_feas(X=X, means=means, sigmas=sigmas)
        ei = ei.mul(prob_feas)
        return ei.squeeze(dim=-1)
Ejemplo n.º 4
0
    def forward(self,
                state: torch.Tensor,
                rsample: bool = True) -> Tuple[torch.Tensor, torch.Tensor]:
        nn_out = self.nn(state)
        means = nn_out[:, self.action_dim:]
        logstds = nn_out[:, :self.action_dim]
        stds = logstds.exp()

        action_dist = Normal(means, stds)
        action = action_dist.rsample() if rsample else means

        # Stabler version of log-probability calculation
        # Reference: Spinning Up implementation of SAC
        logprobs = action_dist.log_prob(action).sum(-1, keepdims=True)
        logprobs -= (2 * (np.log(2) - action - F.softplus(-2 * action))).sum(
            -1, keepdims=True)

        action = torch.tanh(action)

        return action, logprobs
Ejemplo n.º 5
0
 def log_prob(self, x, context, should_sum=True, feedback=None):
     mean, std = self.cond_dist_params(context, feedback=feedback)
     dist = Normal(torch.zeros(mean.shape, device=mean.device), torch.ones(std.shape, device=std.device))
     adjusted_x = (x - mean) / std
     adjusted_a = (0 - mean) / std
     log_gx = dist.log_prob(adjusted_x)
     log_c = ((1 - dist.cdf(adjusted_a)) * std).log()
     log_prob = log_gx - log_c
     # return sum_except_batch(dist.log_prob((x - mean).abs()))
     '''
     # Folded normal distribution
     mean, std = self.cond_dist_params(context)
     dist1 = Normal(mean, std)
     dist2 = Normal(-mean, std)
     log_prob = (dist1.log_prob(x).exp() + dist2.log_prob(x).exp()).log()
     '''
     if should_sum:
         return sum_except_batch(log_prob)
     else:
         return log_prob
Ejemplo n.º 6
0
 def UsSs(self, hessian_approx, X=None, y=None):
     arg_check(hessian_approx)
     X, y = (torch.from_numpy(X) if X is not None else self.Xt,
             torch.from_numpy(y) if y is not None else self.y)
     Us = list()
     Ss = np.ones(len(y))
     for xi, yi in zip(X, y):
         self.model.zero_grad()
         output = self.model.forward(xi)
         if hessian_approx == 'g':
             likelihood = Normal(output.flatten(), self.sigma_noise)
             loss = - likelihood.log_prob(yi)
             loss.backward()
             Us.append(self.model.gradient)
         elif hessian_approx == 'J':
             output.backward()
             Us.append(self.model.gradient)
         elif hessian_approx == 'H':
             raise NotImplementedError
     return np.stack(Us), Ss if hessian_approx == 'g' else self.bn * Ss
    def sample(self,
               obs,
               prev_acts,
               rnn_hidden_states,
               available_actions=None):
        # TODO: review this method
        means, log_stds, h_outs = self.forward(obs, prev_acts,
                                               rnn_hidden_states)
        stds = log_stds.exp()
        normal = Normal(means, stds)
        x_t = normal.rsample()
        y_t = torch.tanh(x_t)

        sampled_actions = y_t * self.action_scale + self.action_bias
        log_probs = normal.log_prob(x_t)
        log_probs -= torch.log(self.action_scale * (1 - y_t.pow(2)) + epsilon)
        log_probs = log_probs.sum(2, keepdim=True)
        means = torch.tanh(means) * self.action_scale + self.action_bias

        return sampled_actions, log_probs, means, h_outs
Ejemplo n.º 8
0
    def forward(self, state: torch.Tensor) -> torch.Tensor:
        x = F.relu(self.hidden1(state))
        x = F.relu(self.hidden2(x))

        mu =  self.mu_layer(x).tanh()

        log_std = self.log_std_layer(x).tanh()
        log_std = self.log_std_min  + 0.5 * (
            self.log_std_max - self.log_std_min
            ) * (log_std + 1)
        std = torch.exp(log_std)

        dist = Normal(mu, std)
        z = dist.rsample()

        action = z.tanh()
        log_prob = dist.log_prob(z) - torch.log(1 - action.pow(2) + 1e-7)
        log_prob = log_prob.sum(-1, keepdim=True)

        return action, log_prob
Ejemplo n.º 9
0
    def sample(self, mean: t.tensor, action=None):
        """
        You must call this function to sample an action and its log probability
        during forward().

        Args:
            mean: Probability tensor of shape ``[batch, action_num]``,
                usually produced by a softmax layer.
            action: The action to be evaluated. set to ``None`` if you are sampling
                a new batch of actions.

        Returns:
            Action tensor of shape ``[batch, action_dim]``,
            Action log probability tensor of shape ``[batch, 1]``.
        """
        self.action_param = mean
        dist = Normal(loc=mean, scale=t.exp(self.action_log_std))
        if action is None:
            action = dist.sample()
        return action, dist.log_prob(action).sum(dim=1, keepdims=True)
Ejemplo n.º 10
0
class TanhNormal(Distribution):
    def __init__(self, loc, scale):
        super().__init__()
        self.normal = Normal(loc, scale)

    def sample(self):
        return torch.tanh(self.normal.sample())

    def rsample(self):
        return torch.tanh(self.normal.rsample())

    # Calculates log probability of value using the change-of-variables technique (uses log1p = log(1 + x) for extra numerical stability)
    def log_prob(self, value):
        inv_value = (torch.log1p(value) - torch.log1p(-value)) / 2  # artanh(y)
        return self.normal.log_prob(inv_value) - torch.log1p(
            -value.pow(2) + 1e-6)  # log p(f^-1(y)) + log |det(J(f^-1(y)))|

    @property
    def mean(self):
        return torch.tanh(self.normal.mean)
Ejemplo n.º 11
0
    def log_prob_for_single_animal(self, inputs, **kwargs):
        """
        calculate the log prob for inputs[1:] based on inputs[:-1]
        :param inputs: (T, 2)
        :param kwargs:
        :return: (T-1, K)
        """

        T, d = inputs.shape
        assert d == 2, d
        # get the mu and cov based on the observations except the last one
        mu, cov = self.get_mu_and_cov_for_single_animal(inputs[:-1], **kwargs)
        # mean: (T-1, K, 2), covariance (T-1, K, 2)
        m = Normal(mu, torch.sqrt(cov))
        # evaluated the observations except the first one. (T-1, 1, 2)
        log_prob = m.log_prob(inputs[1:, None])  # (T-1, K, 2)
        log_prob = torch.sum(log_prob, dim=-1)
        assert log_prob.shape == (T - 1, self.K), log_prob.shape

        return log_prob
Ejemplo n.º 12
0
class TanhNormal(Distribution):
    def __init__(self, normal_mean, normal_std):
        super().__init__()
        self.normal_mean = normal_mean
        self.normal_std = normal_std
        self.standard_normal = Normal(
            torch.zeros_like(self.normal_mean, device=DEVICE),
            torch.ones_like(self.normal_std, device=DEVICE))
        self.normal = Normal(normal_mean, normal_std)

    def log_prob(self, pre_tanh):
        log_det = 2 * np.log(2) + logsigmoid(2 * pre_tanh) + logsigmoid(
            -2 * pre_tanh)
        result = self.normal.log_prob(pre_tanh) - log_det
        return result

    def rsample(self):
        pretanh = self.normal_mean + self.normal_std * self.standard_normal.sample(
        )  #重参数
        return torch.tanh(pretanh), pretanh
Ejemplo n.º 13
0
    def sample_with_logp(self, x):
        mu, log_std = self.forward(x)
        std = torch.exp(log_std)

        # print("mu",mu)
        # print("log_std", log_std)
        # print("std",std)

        normal = Normal(mu, std)
        x_t = normal.rsample()
        # print("x_t", x_t)
        logp = normal.log_prob(x_t)
        # print("logp 1",logp)

        y_t = torch.tanh(x_t)
        logp -= torch.log(1 - torch.pow(y_t, 2) + 1e-6)
        # print("y_t", y_t)
        # print("logp 2",logp)

        return y_t, logp
Ejemplo n.º 14
0
 def sample(self, state):
     '''
     :param state: (batch_num, state_dim)
     :return: action: (batch_num, action_dim, option_num)
     log_prob: (batch_num, option_num)
     mean_mat: (batch_num, action_dim, option_num)
     '''
     mean_mat, log_std_mat = self.forward(state)
     std_mat = log_std_mat.exp()
     normal = Normal(mean_mat, std_mat)
     x_t = normal.rsample(
     )  # for reparameterization trick (mean + std * N(0,1))
     y_t = torch.tanh(x_t)
     action = y_t * self.action_scale + self.action_bias
     log_prob = normal.log_prob(x_t)  # log(pi(at|st))
     # Enforcing Action Bound, because the Gaussian distribution changes from (-inf, inf) to (-1, 1)
     log_prob -= torch.log(self.action_scale * (1 - y_t.pow(2)) + epsilon)
     log_prob = log_prob.sum(1, keepdim=True)
     mean_mat = torch.tanh(mean_mat) * self.action_scale + self.action_bias
     return action, log_prob, mean_mat
Ejemplo n.º 15
0
    def fwd_step(self, z):
        z = self.cast(z)

        z_ω = self.flt_ω(z)  # ω only observes PD params
        (μ_ω, σ_ω) = self.ω(z_ω)
        dist_ω = Normal(μ_ω, σ_ω)
        a_ω = dist_ω.rsample()
        z_ = self.env.step_batch(z, a_ω.detach())  # ω-step
        z_lst = [z_]
        for t in range(1, self.n):
            z_ = self.env.step_batch(z_, torch.zeros_like(
                a_ω))  # n state propagations with no PD update
            z_lst.append(z_)

        z_q = self.flt_q(z, z_lst)  # q does not observe PD_t+n
        (μ_q, σ_q) = self.q(z_q)
        dist_q = Normal(μ_q, σ_q)

        self.it += 1
        return (dist_q.log_prob(a_ω) - dist_ω.log_prob(a_ω)).sum(-1)
Ejemplo n.º 16
0
def mix_gaussian_loss_1d(x, l):
    """ log-likelihood for mixture of continuous Gaussians, assumes the data has been rescaled to [-1,1] interval
    Args:
        x (Tensor): Target (B x D x D x 1) (B batch size, D dimensions, 1 channel (B/W image)
        y_hat (Tensor): Predictive distribution, (B x D x D x 3*nr_mix) (B batch size, D dimensions, 3 * number of mixture components channels)
        log_scale_min (float): Log scale minimum value
        reduce (bool): If True, the losses are averaged or summed for each minibatch.
    Returns
        Tensor: loss
    """
    x = x.permute(0, 2, 3, 1)
    l = l.permute(0, 2, 3, 1)
    xs = [int(y) for y in x.size()]
    ls = [int(y) for y in l.size()]
    # here and below: unpacking the params of the mixture of logistics
    nr_mix = int(ls[-1] / 3)

    logit_probs = l[:, :, :, :nr_mix]
    # l = l[:, :, :, nr_mix:].contiguous().view(xs + [nr_mix * 2]) # 2 for mean, scale
    means = l[:, :, :, nr_mix:2 * nr_mix]
    log_scales = torch.clamp(l[:, :, :, 2 * nr_mix:3 * nr_mix], min=-7.)
    # here and below: getting the means and adjusting them based on preceding
    # sub-pixels

    x = x.expand_as(means)

    #x = x.contiguous()
    #x = x.unsqueeze(-1) + Variable(torch.zeros(xs + [nr_mix]).cuda(), requires_grad=False)

    # means = torch.cat((means[:, :, :, 0, :].unsqueeze(3), m2, m3), dim=3)
    centered_x = x - means
    dist = Normal(loc=0., scale=torch.exp(log_scales))
    # do we need to add a trick to avoid log(0)?
    log_probs = dist.log_prob(centered_x)
    if nr_mix > 1:
        log_probs = log_probs + F.log_softmax(logit_probs, -1)
    if nr_mix == 1:
        return -log_sum_exp(log_probs)  ## ??
        # return -torch.sum(log_sum_exp(log_probs))
    else:
        return -log_sum_exp(log_probs)
Ejemplo n.º 17
0
 def update(self, optim, trajectory):
     states = torch.stack(trajectory["states"]).float()
     actions = torch.stack(trajectory["actions"]).float()
     next_states = torch.stack(trajectory["next_states"]).float()
     beta_log_probs = torch.stack(trajectory["log_probs"]).float()
     rewards = torch.stack(trajectory["rewards"]).float()
     values = torch.stack(trajectory["values"]).float()
     masks = torch.stack(trajectory["dones"])
     returns = self.__Tensor(rewards.size(0),1)
     deltas = self.__Tensor(rewards.size(0),1)
     advantages = self.__Tensor(rewards.size(0),1)
     prev_return = 0
     prev_value = 0
     prev_advantage = 0
     for i in reversed(range(rewards.size(0))):
         if masks[i] == 0:
             next_action, _, v_next = self.select_action(next_states[i])
             state_action = torch.cat([next_states[i], next_action], dim=0)
             state_inf = next_states[i]+self.__dynamics(state_action)
             _, _, v_inf = self.select_action(state_inf)
             v_fin = v_next.detach()+self.__gamma*v_inf.detach()
         else:
             v_fin = 0
         returns[i] = rewards[i]+self.__gamma*(prev_return*masks[i]+v_fin)
         deltas[i] = rewards[i]+self.__gamma*(prev_value*masks[i]+v_fin)-values.data[i]
         advantages[i] = deltas[i]+self.__gamma*self.__lmbd*prev_advantage*masks[i]
         prev_return = returns[i, 0]
         prev_value = values.data[i, 0]
         prev_advantage = advantages[i, 0]
     advantages = (advantages-advantages.mean())/(advantages.std()+1e-10)
     returns = (returns-returns.mean())/(returns.std()+1e-10)
     mu_pi, logvar_pi, _ = self.__pi(states)
     dist_pi = Normal(mu_pi, logvar_pi.exp().sqrt())
     pi_log_probs = dist_pi.log_prob(actions)
     ratio = (pi_log_probs-beta_log_probs.detach()).sum(dim=1, keepdim=True).exp()
     optim.zero_grad()
     actor_loss = -torch.min(ratio*advantages, torch.clamp(ratio, 1-self.__eps, 1+self.__eps)*advantages).mean()
     critic_loss = F.smooth_l1_loss(values, returns)
     loss = actor_loss+critic_loss
     loss.backward(retain_graph=True)
     optim.step()
Ejemplo n.º 18
0
class SigmoidNormal(Distribution):
    """
        Represent distribution of X where
            X ~ sigmoid(Z)
            Z ~ N(mean, std)
        Note: this is not very numerically stable.
        """
    def __init__(self, normal_mean, normal_std, epsilon=1e-6):
        super(SigmoidNormal, self).__init__()
        """
        :param normal_mean: Mean of the normal distribution
        :param normal_std: Std of the normal distribution
        :param epsilon: Numerical stability epsilon when computing log-prob.
        """
        self.normal = Normal(normal_mean, normal_std)
        self.epsilon = epsilon

    def sample_n(self, n, return_pre_sigmoid_value=False):
        z = self.normal.sample_n(n)
        if return_pre_sigmoid_value:
            return F.sigmoid(z), z
        else:
            return F.sigmoid(z)

    def log_prob(self, value, pre_sigmoid_value=None):
        """
        :param value: some value, x
        :param pre_sigmoid_value: arcsigmoid(x)
        :return:
        """
        if pre_sigmoid_value is None:
            pre_sigmoid_value = torch.log((value) / (1 - value))
        return self.normal.log_prob(pre_sigmoid_value) - torch.log(
            value * (1 - value) + self.epsilon)

    def sample(self, return_pre_sigmoid_value=False):
        z = self.normal.sample()
        if return_pre_sigmoid_value:
            return F.sigmoid(z), z
        else:
            return F.sigmoid(z)
    def log_px_z(self, tensors, z):
        """
            Usage reserved for Annealed Importance sampling
        """
        n_latent = z.shape[-1]
        z_prior = Normal(torch.zeros(n_latent), torch.ones(n_latent))
        log_pz = z_prior.log_prob(z).sum(-1)

        x = tensors[0]
        px_mean, px_var, qz_m, qz_v, _, log_qz_given_x = self.inference(
            x, n_samples=1, reparam=True)
        # Following step required to be consistent with below method
        z = z.unsqueeze(0)
        _, log_pxz, _ = self.log_ratio(x,
                                       px_mean,
                                       px_var,
                                       log_qz_given_x,
                                       z,
                                       return_full=True)
        log_pxz = log_pxz.squeeze()
        return log_pxz - log_pz
Ejemplo n.º 20
0
    def forward(self, state, action=None):
        x = state
        x = self.actor_bn(x)
        for l in self.actor_linears:
            x = l(x)
            x = self.relu(x)
        mu = self.tanh(self.mu(x))
        log_var = -self.relu(self.log_var(x))
        sigmas = log_var.exp().sqrt()
        dists = Normal(mu, sigmas)
        if action is None:
            action = dists.sample()
        log_prob = dists.log_prob(action).sum(dim=-1, keepdim=True)

        x = state
        x = self.critic_bn(x)
        for l in self.critic_linears:
            x = l(x)
            x = self.relu(x)
        v = self.v(x)
        return action, log_prob, dists.entropy(), v
Ejemplo n.º 21
0
    def _loss_function(self, x_context, y_context, x_target, y_target):
        """

        :param x_context:  [batch, N_con, x_dim]
        :param y_context:  [batch, N_con, y_dim]
        :param x_target:   [batch, N_tar, x_dim]
        :param y_target:  [batch, N_tar, x_dim]
        :return:
        """
        x_all = torch.cat([x_context, x_target], dim=-2)
        y_all = torch.cat([y_context, y_target], dim=-2)

        z_all_mu, z_all_sigma = self._calc_zparam(x_all, y_all) # z_all [batch, z_dim]
        z_c_mu, z_c_sigma = self._calc_zparam(x_context, y_context) # z_c [batch, z_dim]
        z_all_sample = self._sample_z(z_all_mu, z_all_sigma, self.N_z) # z_all_sample [batch, N_z, z_dim]
        y_t_mu, y_t_sigma = self.model.Decoder(z_all_sample, x_target) # y_t [batch, N_t, N_z, y_dim]
        y_normal = Normal(loc=y_t_mu, scale=y_t_sigma)
        y_target_exd = y_target.unsqueeze(dim=-2).expand(-1, -1, self.N_z, -1)  # to [batch, N_t,  N_z, y_dim]
        loglik = -y_normal.log_prob(y_target_exd).sum(dim=[-1,-3]).mean() # sum for N_t, y_dim, mean for N_z, batch
        kldiv = NeuralProcessTrainer.calc_kldiv_gaussian(z_all_mu, z_all_sigma, z_c_mu, z_c_sigma) # sum for z_dim, mean for batch
        return loglik, kldiv
Ejemplo n.º 22
0
 def get_action(self, inputs, std_scale=None, epsilon=1e-6, mean_pi=False, probs=False, entropy=False):
     mean, log_std = self(inputs)
     if mean_pi:
         return T.tanh(mean)
     std = log_std.exp()
     if std_scale is not None:
         std *= std_scale
     mu = Normal(mean, std)
     z = mu.rsample()
     action = T.tanh(z)
     if not probs:
         return action * self.action_scaling
     else:
         if action.shape == (self.action_dim,):
             action = action.reshape((1, self.action_dim))
         log_probs = (mu.log_prob(z) - T.log(1 - action.pow(2) + epsilon)).sum(1, keepdim=True)
         if not entropy:
             return action * self.action_scaling, log_probs
         else:
             entropy = mu.entropy()
             return action * self.action_scaling, log_probs, entropy
Ejemplo n.º 23
0
    def get_probs(self, z, q):
        # Calculate log q(z|x)
        log_qz_x = q.log_prob(z).sum(dim=1)

        # Calculate log p(z)
        pz = Normal(loc=torch.zeros_like(z), scale=1)
        log_pz = pz.log_prob(z).sum(dim=1)

        # Calculate log q(z)
        batch_size = len(z)
        mat_log_qz = BtcvaeLoss.matrix_log_density_gaussian(z, q)
        if self.is_mss:
            log_iw_mat = BtcvaeLoss.log_importance_weight_matrix(
                batch_size, self.dataset_size)
            log_iw_mat = torch.unsqueeze(log_iw_mat, dim=-1).to(z.device)
            mat_log_qz = mat_log_qz + log_iw_mat
        log_qz = torch.logsumexp(mat_log_qz.sum(dim=2), dim=1, keepdim=False)
        log_prod_qzi = torch.logsumexp(mat_log_qz, dim=1,
                                       keepdim=False).sum(dim=1)

        return log_pz, log_qz, log_prod_qzi, log_qz_x
Ejemplo n.º 24
0
        def _slow(mu, std, x):
            log_probs = []

            # Iterate over all density components
            for i in range(self.num_density):
                # Retrieve means and stds
                mu_i = mu[:, i, :]
                std_i = std[:, i, :]
                # Thresholding std, if std is 0, it leads to NaN loss. 
                # std_i = torch.clamp(std_i, min=min_std, max=std_i.max().item())
                # Create Gaussian distribution
                dist = Normal(loc=mu_i, scale=std_i)
                # Calculate the log-probability
                logp = dist.log_prob(x)
                # Record the log probability for current density
                log_probs.append(logp)

            # Stack log-probabilities with shape [N, K, D]
            log_probs = torch.stack(log_probs, dim=1)
            
            return log_probs
Ejemplo n.º 25
0
    def select_action(self,state):

        state = torch.from_numpy(state).float().unsqueeze(0) # just to make it a Tensor obj
        # get mean and std
        mean, std = self.policy(state)

        # create normal distribution
        normal = Normal(mean, std)

        # sample action
        action = normal.sample()

        # get log prob of that action
        ln_prob = normal.log_prob(action)
        ln_prob = ln_prob.sum()
	# squeeze action into [-1,1]
        action = torch.tanh(action)
        # turn actions into numpy array
        action = action.numpy()

        return action[0], ln_prob #, mean, std
Ejemplo n.º 26
0
def tile_map_prior(prior: ImagePrior, tile_map):
    # Source probabilities
    dist_sources = Poisson(torch.tensor(prior.mean_sources))
    log_prob_no_source = dist_sources.log_prob(torch.tensor(0))
    log_prob_one_source = dist_sources.log_prob(torch.tensor(1))
    log_prob_source = (tile_map["n_sources"] == 0) * log_prob_no_source + (
        tile_map["n_sources"] == 1) * log_prob_one_source

    # Binary probabilities
    galaxy_log_prob = torch.tensor(0.7).log()
    star_log_prob = torch.tensor(0.3).log()
    log_prob_binary = (galaxy_log_prob * tile_map["galaxy_bools"] +
                       star_log_prob * tile_map["star_bools"])

    # Galaxy probabiltiies
    gal_dist = Normal(0.0, 1.0)
    galaxy_probs = gal_dist.log_prob(
        tile_map["galaxy_params"]) * tile_map["galaxy_bools"]

    # prob_normalized =
    return log_prob_source.sum() + log_prob_binary.sum() + galaxy_probs.sum()
Ejemplo n.º 27
0
def select_action_continuous(state, policy: SimplePolicyContinuous,
                             training_info: TrainingInfo, env: gym.Env):
    """
    Given a policy which outputs a mean and a standard deviation, constructs a Normal distributions and then
    returns an action sampled from that distribution. This functions also logs the entropy of the distribution
    and the log probability of the sampled action.
    """
    # Get distribution
    state = prepare_state(state)
    mu, sigma = policy.forward(state)

    # Sample action and remember its log probability
    n = Normal(mu, sigma)
    action = n.sample()
    action = tensor_clamp(action, env.action_space.low, env.action_space.high)

    # This is not very clean. TODO: clean this up
    training_info.log_probs.append(n.log_prob(action).sum())
    training_info.entropies.append(n.entropy())

    return action
Ejemplo n.º 28
0
def _choose_action(loc, scale):
    """sample an action from gaussian distribution, given its parameters

    Parameters
    ----------
    loc : torch.tensor
        the mean parameter
    scale : torch.tensor
        the scale parameter

    Returns
    -------
    type
        Description of returned object.

    """
    m = Normal(loc, scale)
    a_t = m.sample()
    log_prob_a_t = m.log_prob(a_t)
    ent_t = gaussian_entropy(m)
    return a_t, log_prob_a_t, ent_t
Ejemplo n.º 29
0
 def forward(self, state, action=None):
     a = t.relu(self.fc1(state))
     a = t.relu(self.fc2(a))
     mu = self.mu_head(a)
     sigma = softplus(self.sigma_head(a))
     dist = Normal(mu, sigma)
     act = (atanh(action / self.action_range)
            if action is not None
            else dist.rsample())
     act_entropy = dist.entropy()
     # the suggested way to confine your actions within a valid range
     # is not clamping, but remapping the distribution
     act_log_prob = dist.log_prob(act)
     act_tanh = t.tanh(act)
     act = act_tanh * self.action_range
     # the distribution remapping process used in the original essay.
     act_log_prob -= t.log(self.action_range *
                           (1 - act_tanh.pow(2)) +
                           1e-6)
     act_log_prob = act_log_prob.sum(1, keepdim=True)
     return act, act_log_prob, act_entropy
Ejemplo n.º 30
0
    def __call__(self, x_sample, x):
        if self.layer_outputs is None:
            raise ValueError("The model needs to return the latent space "
                             "distribution parameters z_mu, z_var.")
        if self.use_distributions:
            p = x_sample
            q = self.layer_outputs["q"]
        else:
            z_mu = self.layer_outputs["z_mu"]
            z_var = self.layer_outputs["z_var"]
            p = Normal(x_sample, 0.5)
            q = Normal(z_mu, z_var.pow(0.5))

        # reconstruction loss: log likelihood
        ll_loss = - p.log_prob(x).sum(-1, keepdim=True)
        # regularization loss: KL divergence
        kl_loss = kl_divergence(q, Normal(0, 1)).sum(-1, keepdim=True)

        combined_loss = ll_loss + kl_loss

        return combined_loss, {"ll_loss": ll_loss, "kl_loss": kl_loss}
Ejemplo n.º 31
0
def loss_function(pi, sigma, mu, target):
    """ Calculate the loss for our MDN.

    This is the negative-log likelihood - based on 
    https://github.com/sagelywizard/pytorch-mdn and adapted to use
    torch.distributions.Normal

    The original paper:

    http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.120.5685&rep=rep1&type=pdf
    """
    sequence = pi.size()[1]

    target = target.view(-1, sequence, 1, VAE.z_size)
    normal_distributions = Normal(mu, sigma)

    # log_prob(y) ... log of pdf at value y
    log_probabilities = torch.exp(pi * normal_distributions.log_prob(target))

    result = -torch.log(torch.sum(log_probabilities, dim=2))
    return torch.mean(result)
Ejemplo n.º 32
0
    def forward(self, state: np.ndarray, goal: np.ndarray, deterministic=False, compute_log_prob=True) -> Tuple[torch.Tensor, torch.Tensor]:
        """ Returns the actions and their log probs as a torch Tensors (gradients can be computed)"""
        if self.has_goal:
            state, goal = get_tensor(state), get_tensor(goal)
            total_input = torch.cat([state, goal], dim=-1)  # Concatenate to format [states | goals]
        else:
            total_input = get_tensor(state)

        hidden_state = self.layers.forward(total_input)
        mu = self.mu_layer(hidden_state)
        log_std = self.sigma_layer(hidden_state)
        log_std = LOG_SIGMA_MIN + (LOG_SIGMA_MAX - LOG_SIGMA_MIN) * (torch.tanh(log_std) + 1) / 2.0
        # log_std = torch.clamp(log_std, LOG_SIGMA_MIN, LOG_SIGMA_MAX)
        std = torch.exp(log_std)

        policy_distribution = Normal(mu, std)
        actions = mu if deterministic else policy_distribution.rsample()

        if compute_log_prob:
            # Exact source: https://github.com/openai/spinningup/blob/master/spinup/algos/pytorch/sac/core.py#L54
            # "Compute logprob from Gaussian, and then apply correction for Tanh squashing.
            # NOTE: The correction formula is a little bit magic. To get an understanding
            # of where it comes from, check out the original SAC paper (arXiv 1801.01290)
            # and look in appendix C. This is a more numerically-stable equivalent to Eq 21.
            # Try deriving it yourself as a (very difficult) exercise. :)"
            log_prob = policy_distribution.log_prob(actions).sum(axis=-1)
            try:
                log_prob -= (2 * (np.log(2) - actions - F.softplus(-2 * actions))).sum(axis=1)
            except IndexError:
                log_prob -= (2 * (np.log(2) - actions - F.softplus(-2 * actions))).sum()
        else:
            log_prob = None

        actions = torch.tanh(actions)  # The log_prob above takes into account this "tanh squashing"
        action_center = (self.action_high + self.action_low) / 2
        action_range = (self.action_high - self.action_low) / 2
        actions_in_range = action_center + actions * action_range

        # print(f"Mu {mu}\t sigma {std}\tactions {actions}\taction_in_range {actions_in_range}")
        return actions_in_range, log_prob
Ejemplo n.º 33
0
class GaussianSeparatedPolicy(nn.Module):
    def __init__(self, input_dim, action_dim, hidden_layer=[64, 64]):
        super(GaussianSeparatedPolicy, self).__init__()
        actor_layer_size = [input_dim] + hidden_layer
        mu_feature_layers = nn.ModuleList([])
        std_feature_layers = nn.ModuleList([])
        for i in range(len(actor_layer_size) - 1):
            mu_feature_layers.append(
                nn.Linear(actor_layer_size[i], actor_layer_size[i + 1]))
            mu_feature_layers.append(nn.ReLU())
            std_feature_layers.append(
                nn.Linear(actor_layer_size[i], actor_layer_size[i + 1]))
            std_feature_layers.append(nn.ReLU())
        self.mu_body = nn.Sequential(*mu_feature_layers)
        self.std_body = nn.Sequential(*std_feature_layers)
        self.mu_head = nn.Sequential(nn.Linear(hidden_layer[-1], action_dim),
                                     nn.Tanh())
        self.std_head = nn.Sequential(nn.Linear(hidden_layer[-1], action_dim),
                                      nn.Softplus())

        critic_layer_size = [input_dim] + hidden_layer
        critic_layers = nn.ModuleList([])
        for i in range(len(critic_layer_size) - 1):
            critic_layers.append(
                nn.Linear(critic_layer_size[i], critic_layer_size[i + 1]))
            critic_layers.append(nn.ReLU())
        critic_layers.append(nn.Linear(hidden_layer[-1], 1))
        self.critic = nn.Sequential(*critic_layers)

    def forward(self, x, action=None):
        mu = self.mu_head(self.mu_body(x))
        std = self.std_head(self.std_body(x))
        self.dist = Normal(mu, std)
        if action is None:
            action = self.dist.sample()
        action_log_prob = self.dist.log_prob(action).sum(-1)
        entropy = self.dist.entropy().sum(-1)
        value = self.critic(x)

        return action, action_log_prob, value.squeeze(-1), entropy