def get_reinforce_ps_loss(phi, p0, reinforce=False):
    # returns pseudoloss: loss whose gradient is unbiased for the
    # true gradient

    d = len(p0)
    e_b = sigmoid(phi)

    bn_rv = Bernoulli(probs=torch.ones(d) * e_b)
    binary_samples = bn_rv.sample().detach()
    # binary_samples = (torch.rand(d) > e_b).float().detach()

    if reinforce:
        binary_samples_ = bn_rv.sample().detach()
        baseline = torch.sum((binary_samples_ - p0)**2)

    else:
        baseline = 0.0

    sampled_loss = torch.sum((binary_samples - p0)**2)

    # probs, draw_array = get_all_probs(e_b, d)
    # losses_array = get_losses_from_draw_array(draw_array, p0)
    #
    # cat_rv = Categorical(probs)
    # indx = cat_rv.sample()
    # binary_samples = draw_array[indx]
    # sampled_loss = losses_array[indx]
    #
    sampled_log_q = get_bernoulli_log_prob(e_b, binary_samples)

    ps_loss = (sampled_loss - baseline).detach() * sampled_log_q

    return ps_loss
Esempio n. 2
0
	def add_noise_to_canv(self, canv, **kwargs):


		if self.noise_method == 'blur':
			canv = self.noise_blur(canv)

		elif self.noise_method == 'peaky':
			canv = self.noise_peaky(canv)

		elif self.noise_method == 'peaky_blur':
			prop_orig = np.random.rand()
			canv = canv*prop_orig + (1 - prop_orig)*self.noise_blur(self.noise_peaky(canv))

		elif self.noise_method == 'bern':

			p_subtract = 0.1*np.random.rand()
			p_add = 0.1*np.random.rand()
			#print(p_subtract, p_add)
			bern_noise_subtract = Bernoulli(p_subtract*torch.ones(self.canv_shape))
			bern_noise_add = Bernoulli(p_add*torch.ones(self.canv_shape))

			canv = canv - bern_noise_subtract.sample()
			canv = canv.clamp(0.0, 1.0)

			canv = canv + bern_noise_add.sample()
			canv = canv.clamp(0.0, 1.0)

		else:
			canv = self.noise_gaussian(canv, **kwargs)


		canv = canv.clamp(0.0, 1.0)
		return canv
def get_reinforce_ps_loss(phi, p0, reinforce = False):
    # returns pseudoloss: loss whose gradient is unbiased for the
    # true gradient

    d = len(p0)
    e_b = sigmoid(phi)

    bn_rv = Bernoulli(probs = torch.ones(d) * e_b)
    binary_samples = bn_rv.sample().detach()
    # binary_samples = (torch.rand(d) > e_b).float().detach()

    if reinforce:
        binary_samples_ = bn_rv.sample().detach()
        baseline = torch.sum((binary_samples_ - p0)**2)

    else:
        baseline = 0.0

    sampled_loss = torch.sum((binary_samples - p0)**2)

    # probs, draw_array = get_all_probs(e_b, d)
    # losses_array = get_losses_from_draw_array(draw_array, p0)
    #
    # cat_rv = Categorical(probs)
    # indx = cat_rv.sample()
    # binary_samples = draw_array[indx]
    # sampled_loss = losses_array[indx]
    #
    sampled_log_q = get_bernoulli_log_prob(e_b, binary_samples)

    ps_loss = (sampled_loss - baseline).detach() * sampled_log_q

    return ps_loss
Esempio n. 4
0
 def test_bernoulli_shape_tensor_params(self):
     bernoulli = Bernoulli(torch.Tensor([[0.6, 0.3], [0.6, 0.3], [0.6, 0.3]]))
     self.assertEqual(bernoulli._batch_shape, torch.Size((3, 2)))
     self.assertEqual(bernoulli._event_shape, torch.Size(()))
     self.assertEqual(bernoulli.sample().size(), torch.Size((3, 2)))
     self.assertEqual(bernoulli.sample((3, 2)).size(), torch.Size((3, 2, 3, 2)))
     self.assertEqual(bernoulli.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2)))
     self.assertRaises(ValueError, bernoulli.log_prob, self.tensor_sample_2)
Esempio n. 5
0
 def test_bernoulli_shape_scalar_params(self):
     bernoulli = Bernoulli(0.3)
     self.assertEqual(bernoulli._batch_shape, torch.Size())
     self.assertEqual(bernoulli._event_shape, torch.Size())
     self.assertEqual(bernoulli.sample().size(), torch.Size((1,)))
     self.assertEqual(bernoulli.sample((3, 2)).size(), torch.Size((3, 2)))
     self.assertRaises(ValueError, bernoulli.log_prob, self.scalar_sample)
     self.assertEqual(bernoulli.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2)))
     self.assertEqual(bernoulli.log_prob(self.tensor_sample_2).size(), torch.Size((3, 2, 3)))
Esempio n. 6
0
 def backward_pass(self, hid, temperature=1):
     """Computes samples of visible neurons."""
     vis_logits = hid @ self._W.T + self._bv
     vis_probs = torch.sigmoid(vis_logits / temperature)
     bernoulli = Bernoulli(vis_probs)
     vis = bernoulli.sample()
     return vis
Esempio n. 7
0
 def forward_pass(self, vis, temperature=1):
     """Computes samples of hidden neurons."""
     hid_logits = vis @ self._W + self._bh
     hid_probs = torch.sigmoid(hid_logits / temperature)
     bernoulli = Bernoulli(hid_probs)
     hid = bernoulli.sample()
     return hid
Esempio n. 8
0
    def forward(self, h_t, eps=0.):
        """
        Parameters
        ----------
        h_t : torch.float
            hidden state of RNN at timestep t.
        eps : float
            epsilon -- controls the explore/exploit trade-off during training.
            this value is decreased as training progresses.

        Returns
        -------
        halt : torch.long
            binary halting decision (If 0, wait. If 1: halt).
        log_pi : torch.float
            log probability of the selected action.
        -torch.log(probs) : torch.float
            negative log probability of the halting probability.
        """
        probs = torch.sigmoid(self.fc(
            h_t.detach()))  # Compute halting-probability
        probs = (1 - eps) * probs + eps * torch.FloatTensor(
            [0.5])  # Add randomness according to eps
        m = Bernoulli(
            probs=probs
        )  # Define bernoulli distribution parameterized with predicted probability
        halt = m.sample()  # Sample action
        log_pi = m.log_prob(halt)  # Compute log probability for optimization
        return halt, log_pi, -torch.log(probs)
Esempio n. 9
0
def be_sample(segm: Tensor) -> Tuple[Tensor, Mask]:

    dist = Bernoulli(segm)
    mask_sample = dist.sample()
    L: Tensor = (segm * mask_sample + (1 - segm) * (1 - mask_sample)).log().sum() / segm.numel()

    return L,  Mask(mask_sample)
Esempio n. 10
0
def tensor_to_probabilistic_action_dict(
        env: gym.Env, x: torch.Tensor) -> Tuple[OrderedDict, torch.Tensor]:
    actions = env.action_space.noop()
    log_probs = [0 for _ in range(10)]

    m_1 = Normal(x[0], x[2])
    camera_action_1 = m_1.rsample()
    log_prob_1 = m_1.log_prob(camera_action_1)
    log_probs[0] = log_prob_1 if not torch.isnan(
        log_prob_1).any() else torch.tensor(0.0, device=DEVICE)

    m_2 = Normal(x[1], x[3])
    camera_action_2 = m_2.rsample()
    log_prob_2 = m_2.log_prob(camera_action_2)
    log_probs[1] = log_prob_2 if not torch.isnan(
        log_prob_2).any() else torch.tensor(0.0, device=DEVICE)

    actions['camera'] = (float(camera_action_1.item()),
                         float(camera_action_2.item()))

    for idx, action in enumerate(BINARY_CONSTANTS, start=4):
        m = Bernoulli(x[idx])
        sampled_action = m.sample()
        log_probs[idx - 2] = m.log_prob(sampled_action)
        actions[action] = int(sampled_action)

    # print(log_probs)

    return actions, torch.stack(log_probs).sum()
Esempio n. 11
0
    def resample_Zik(self, X, Z, A, i, k):
        '''
        m = number of observations not including
            Z_ik containing feature k

        Prior: p(z_ik=1) = m / (N-1)
        Posterior combines the prior with the likelihood:
        p(z_ik=1|Z_-nk,A,X) propto p(z_ik=1)p(X|Z,A)
        '''
        N, D = X.size()
        Z_k = Z[:, k]
        # Called m_-nk in the paper
        m = Z_k.sum() - Z_k[i]

        # If Z_nk were 0
        Z_if_0 = Z.clone()
        Z_if_0[i, k] = 0
        log_prior_if_0 = (1 - (m / (N - 1))).log()
        log_likelihood_if_0 = self.log_likelihood_given_ZA(X, Z_if_0, A)
        log_score_if_0 = log_prior_if_0 + log_likelihood_if_0

        # If Z_nk were 1
        Z_if_1 = Z.clone()
        Z_if_1[i, k] = 1
        log_prior_if_1 = (m / (N - 1)).log()
        log_likelihood_if_1 = self.log_likelihood_given_ZA(X, Z_if_1, A)
        log_score_if_1 = log_prior_if_1 + log_likelihood_if_1

        # Exp, Normalize, Sample
        log_scores = torch.cat((log_score_if_0, log_score_if_1), 0)
        probs = self.renormalize_log_probs(log_scores)
        p_znk = Bern(probs[1])
        return p_znk.sample()
Esempio n. 12
0
 def _adapted_sampling(self, shape, device, dtype):
     """
         The Bernoulli sampling function, used to sample from batch
     """
     _bernoulli = Bernoulli(torch.tensor(float(self.p), device=device, dtype=dtype))
     target = _bernoulli.sample((shape,)).bool()
     return target
    def forward(self, x, gamma):
        # shape: (bsize, channels, height, width)

        if self.training:
            batch_size, channels, height, width = x.shape
            bernoulli = Bernoulli(gamma)
            mask = bernoulli.sample(
                (
                    batch_size,
                    channels,
                    height - (self.block_size - 1),
                    width - (self.block_size - 1),
                )
            )
            if torch.cuda.is_available():
                mask = mask.cuda()
            block_mask = self._compute_block_mask(mask)
            countM = (
                block_mask.size()[0]
                * block_mask.size()[1]
                * block_mask.size()[2]
                * block_mask.size()[3]
            )
            count_ones = block_mask.sum()

            return block_mask * x * (countM / count_ones)
        else:
            return x
Esempio n. 14
0
 def make_move(self, x):
     # self.states.append(x)
     probability = self.forward(x)
     probability = Bernoulli(probability)
     move = probability.sample()
     # self.log_probs.append(probability.log_prob(move))
     return move.item()
Esempio n. 15
0
 def sample(self, epoch, num=64):
     z = torch.randn(num, self.latent_dim)
     x_probs = self.decode(z)
     dist = Bernoulli(x_probs)
     x_sample = dist.sample()
     save_image(x_sample.view(num, 1, 28, 28),
                'results/epoch_{}_samples.png'.format(epoch))
Esempio n. 16
0
def simulate_data(model, batch_size=10, n_batch=1):
    """Simulate data from the VAE model. Sample from the 
  joint distribution p(z)p(x|z). This is equivalent to
  sampling from p(x)p(z|x), i.e. z is from the posterior.

  Bidirectional Monte Carlo only works on simulated data,
  where we could obtain exact posterior samples.

  Args:
      model: VAE model for simulation
      batch_size: batch size for simulated data
      n_batch: number of batches

  Returns:
      iterator that loops over batches of torch Tensor pair x, z
  """

    # shorter aliases

    batches = []
    for i in range(n_batch):
        # assume prior for VAE is unit Gaussian
        z = torch.randn(batch_size, model.latent_dim).cuda()
        x_logits = model.decode(z)
        if isinstance(x_logits, tuple):
            x_logits = x_logits[0]
        x_bernoulli_dist = Bernoulli(probs=x_logits.sigmoid())
        x = x_bernoulli_dist.sample().data

        paired_batch = (x, z)
        batches.append(paired_batch)

    return iter(batches)
Esempio n. 17
0
def act(batch_states, theta, values):
    batch_states = torch.from_numpy(batch_states).long()
    probs = torch.sigmoid(theta)[batch_states]
    m = Bernoulli(1 - probs)
    actions = m.sample()
    log_probs_actions = m.log_prob(actions)
    return actions.numpy().astype(int), log_probs_actions, values[batch_states]
    def forward(self, x, gamma):
        """give a 4-d tensor, apply dropblock

        Args:
            x (torch.Tensor): (batch_size, num_channel, h, w)
            gamma (float): the probability of each upper left corner of a block to be zeroed out
                            a rough estimate of how to set this value is given in the dropblock paper

        Returns:
            torch.Tensor: x with each channel's (block_size, block_size) blocks randomly zeroed out.
        """
        if self.training:
            batch_size, channels, height, width = x.shape

            bernoulli = Bernoulli(gamma)
            # mask is indicators of the upper left corner of the blocks to be zeroed out
            mask = bernoulli.sample(
                sample_shape=(batch_size, channels,
                              height - (self.block_size - 1),
                              width - (self.block_size - 1))).to(x.device)
            #print((x.sample[-2], x.sample[-1]))
            block_mask = self._compute_block_mask(mask)
            #print (block_mask.size())
            #print (x.size())
            countM = block_mask.size()[0] * block_mask.size(
            )[1] * block_mask.size()[2] * block_mask.size()[3]
            count_ones = block_mask.sum()

            return block_mask * x * (countM / count_ones)
        else:
            return x
Esempio n. 19
0
    def get_action_from_actor(self, state, deterministic=False):
        """Given the state, produces an action, the probability of the action, the log probability of the action, and
        the argmax action"""
        action_probabilities = self.actor_local(
            state)  # output size should be [B*H*W]
        action_probabilities = F.sigmoid(
            action_probabilities)  # make sure the probs are in range [0,1]

        B, _, _ = action_probabilities.shape
        action_probabilities = action_probabilities.view(B, -1)
        # TODO leave this to future process; seems it will get the index
        max_probability_action = torch.argmax(action_probabilities, dim=-1)

        assert action_probabilities.size()[
            1, 2] == self.action_size, "Actor output the wrong size"
        if deterministic:
            # using deteministic policy during test time
            action = action_probabilities(action_probabilities > 0.5).cpu()
        else:
            # using stochastic policy during traning time
            action_distribution = Bernoulli(
                action_probabilities
            )  # this creates a distribution to sample from
            action = action_distribution.sample().cpu(
            )  # sample the discrete action and copy it to cpu

        # Have to deal with situation of 0.0 probabilities because we can't do log 0
        z = action_probabilities == 0.0
        z = z.float() * 1e-8
        log_action_probabilities = torch.log(action_probabilities + z)

        return action, (action_probabilities,
                        log_action_probabilities), max_probability_action
Esempio n. 20
0
    def forward(self, x):
        prev_h = [self.agent(x)]
        prev_h.extend(
            [torch.zeros_like(prev_h[0]) for _ in range(self.num_layers - 1)])
        prev_c = [torch.zeros_like(prev_h[0])
                  for _ in range(self.num_layers)]  # only used for LSTM

        input = torch.stack([self.sos_embedding] * x.size(0))

        symb_seq = []
        stop_seq = []
        symb_logits = []
        stop_logits = []
        symb_entropy = []
        stop_entropy = []

        for step in range(self.max_len):
            for i, layer in enumerate(self.cells):
                e_t = float(self.training) * (
                    self.noise_loc + self.noise_scale *
                    torch.randn_like(prev_h[0]).to(prev_h[0]))
                if isinstance(layer, nn.LSTMCell):
                    h_t, c_t = layer(input, (prev_h[i], prev_c[i]))
                    c_t = c_t + e_t
                    prev_c[i] = c_t
                else:
                    h_t = layer(input, prev_h[i])
                    h_t = h_t + e_t
                prev_h[i] = h_t
                input = h_t

            symb_probs = F.softmax(self.output_symbol(h_t), dim=1)
            stop_probs = torch.sigmoid(
                torch.squeeze(self.whether_to_stop(h_t), 1))
            symb_distr = Categorical(probs=symb_probs)
            stop_distr = Bernoulli(probs=stop_probs)
            symb = symb_distr.sample() if self.training else symb_probs.argmax(
                dim=1)
            stop = stop_distr.sample() if self.training else (
                stop_probs > 0.5).float()
            symb_logits.append(symb_distr.log_prob(symb))
            stop_logits.append(stop_distr.log_prob(stop))
            symb_entropy.append(symb_distr.entropy())
            stop_entropy.append(stop_distr.entropy())
            symb_seq.append(symb)
            stop_seq.append(stop)

            input = self.embedding(symb)

        symb_seq = torch.stack(symb_seq).permute(1, 0)
        stop_seq = torch.stack(stop_seq).permute(1, 0).long()
        symb_logits = torch.stack(symb_logits).permute(1, 0)
        stop_logits = torch.stack(stop_logits).permute(1, 0)
        symb_entropy = torch.stack(symb_entropy).permute(1, 0)
        stop_entropy = torch.stack(stop_entropy).permute(1, 0)

        logits = (symb_logits, stop_logits)
        entropy = (symb_entropy, stop_entropy)

        return symb_seq, stop_seq, logits, entropy
Esempio n. 21
0
	def sample(self, x_index, init_hidden, use_cuda):
		lstm_i2h_h0, lstm_i2h_c0, lstm_h2s_h0, lstm_h2s_c0 = init_hidden
		batch_size, seq_len = x_index.size()

		x = torch.transpose(self.embed(x_index), 1, 0)

		hidden_i2h , (_, _) = self.lstm_i2h(x, (lstm_i2h_h0, lstm_i2h_c0))

		if use_cuda:
			z = torch.zeros((seq_len, batch_size)).cuda()
		else:
			z = torch.zeros((seq_len, batch_size))

		z = Variable(z)

		s_h2s_h = lstm_h2s_h0
		s_h2s_c = lstm_h2s_c0

		for i in range(seq_len):
			cur_p_z = self.h2o(torch.cat((hidden_i2h[i], s_h2s_h[0]), dim=1))

			cur_p_z = F.sigmoid(torch.squeeze(cur_p_z, 1))
			m = Bernoulli(cur_p_z)
			z[i] = m.sample()


			cat_hidden_z = torch.unsqueeze(torch.cat((hidden_i2h[i], torch.unsqueeze(z[i], 1)), dim=1), 0)
			_, (s_h2s_h, s_h2s_c) = self.lstm_h2s(cat_hidden_z, (s_h2s_h, s_h2s_c))


		return torch.transpose(z, 1, 0)
Esempio n. 22
0
    def sample_mask(cls, p, n):
        """Returns the mask of the weights"""

        bn = Bernoulli(p)
        mask = bn.sample((n, 1))

        return mask
Esempio n. 23
0
    def forward(self, x, hidden=None):
        seq_len, batch_size = x.size(0), x.size(1)
        if hidden is None:
            hidden = (torch.zeros(self.first_dim_hidden, batch_size,
                                  self.hidden_size).to(x.device),
                      torch.zeros(self.first_dim_hidden, batch_size,
                                  self.hidden_size).to(x.device))

        dist_params, actions = [], []
        for element in x:
            output, hidden_ = self.decoder(element.unsqueeze(0), hidden)
            pred = self.predictor(output)
            dist_params.append(pred)

            sampler = Bernoulli(pred)
            selected_prev_y = sampler.sample()

            actions.append(selected_prev_y)
            # bug ? below line was output, _ = ...
            output, hidden = self.hierarchical_decoder(element.unsqueeze(0),
                                                       hidden)
            hidden = [h_ + h * pred for h_, h in zip(hidden_, hidden)]

        params = torch.stack(dist_params, dim=0).view(seq_len, batch_size)
        actions = torch.stack(actions, dim=0).view(seq_len, batch_size)
        return params, actions
Esempio n. 24
0
class Dropout(Module):
    """Dropout module"

    Attributes
    ----------
    p : float
        Probability for activation to be ignored
    """
    def __init__(self, p=0.5):
        super().__init__()
        from torch.distributions import Bernoulli
        self.p = p
        self._bernoulli = Bernoulli(1 - self.p)
        self._sample = None

    def forward(self, input):
        if self.training:
            self._sample = self._bernoulli.sample(
                input.shape).float() / (1 - self.p)
            input = input * self._sample
        return input

    def backward(self, gradient):
        if self.training:
            gradient = gradient * self._sample
        return gradient
    def forward(self, state):
        policy_p = self.fn_approximator(state)
        policy_p = F.sigmoid(policy_p)

        try:
            stochastic_policy = Bernoulli(policy_p)

            actions = stochastic_policy.sample()

            log_probs = stochastic_policy.log_prob(actions)
        except RuntimeError as e:
            logging.debug(
                'Runtime error occured. policy_p was {}'.format(policy_p))
            logging.debug('State was: {}'.format(state))
            logging.debug('Function approximator return was: {}'.format(
                self.fn_approximator(state)))
            logging.debug(
                'This has occured before when parameters of the network became NaNs.'
            )
            logging.debug(
                'Check learning rate, or change eps in adaptive gradient descent methods.'
            )
            raise RuntimeError(
                'BernoulliPolicy returned nan information. Logger level with DEBUG will have more '
                'information')
        return actions, log_probs
Esempio n. 26
0
def select_action(policy, state: int = 0, mode = "param"):
    """
    Select an action (0 or 1) by running policy model and choosing based on the probabilities in state
    input:
      policy: nn.module, the policy specified
      state: int, the numerical encode of the state
    return:
      action: int, the action (0 or 1) selected
    """
    assert mode in ["param", "NN"]
    with torch.no_grad():

        # convert state to one hot
        state = get_one_hot(state, 6)

        # from policy create a probablistic distribution
        action_prob = policy(state)
        if mode == "param":
            # by the probablity obtained, create a categorical distribution
            action_prob = torch.clamp(action_prob, min = 0.0, max = 1.0)
            c = Bernoulli(action_prob)
        else:
            # by the probablity obtained, create a categorical distribution
            c = Categorical(action_prob)
        # sample from this distribution
        action = c.sample()
        return action.item()
Esempio n. 27
0
class DropBlock(nn.Module):
    def __init__(self, drop_prob, block_size, feat_size):
        super(DropBlock, self).__init__()

        assert feat_size > block_size, \
            "block_size can't exceed feat_size"

        self.drop_prob = drop_prob
        self.block_size = block_size
        self.feat_size = feat_size
        self.gamma = self._compute_gamma()
        self.bernouli = Bernoulli(self.gamma)

    def forward(self, x):
        # shape: (bsize, channels, height, width)

        assert x.dim() == 4, \
            "Expected input with 4 dimensions (bsize, channels, height, width)"

        if not self.training:
            return x
        else:
            mask = self.bernouli.sample((x.shape[-2], x.shape[-1]))
            block_mask = self._compute_block_mask(mask)
            out = x * block_mask[None, None, :, :]
            out = out * block_mask.numel() / block_mask.sum()
            return out

    def _compute_block_mask(self, mask):
        height, width = mask.shape

        non_zero_idxs = mask.nonzero()
        nr_blocks = non_zero_idxs.shape[0]

        offsets = torch.stack([
            torch.arange(self.block_size).view(-1, 1).expand(
                self.block_size, self.block_size).reshape(-1),
            torch.arange(self.block_size).repeat(self.block_size)
        ]).t()

        non_zero_idxs = non_zero_idxs.repeat(self.block_size**2, 1)
        offsets = offsets.repeat(1, nr_blocks).view(-1, 2)

        block_idxs = non_zero_idxs + offsets
        padded_mask = F.pad(mask, (0, self.block_size, 0, self.block_size))

        padded_mask[block_idxs[:, 0], block_idxs[:, 1]] = 1.
        block_mask = padded_mask[:height, :width]

        return 1 - block_mask

    def _compute_gamma(self):
        return (self.drop_prob / (self.block_size ** 2)) * \
               ((self.feat_size ** 2) / ((self.feat_size - self.block_size + 1) ** 2))

    def set_drop_probability(self, drop_prob):
        self.drop_prob = drop_prob
        self.gamma = self._compute_gamma()
        self.bernouli = Bernoulli(self.gamma)
Esempio n. 28
0
    def act(self, obs, actor, critic):
        obs = torch.from_numpy(obs).long()
        prob = torch.sigmoid(actor)[obs]
        bernoulli = Bernoulli(1 - prob)
        action = bernoulli.sample()
        logprob = bernoulli.log_prob(action)

        return action.numpy().astype(int), logprob, critic[obs]
Esempio n. 29
0
def predict_edges(G_t, adjacencies, prev_i):
    edge_weights = adjacencies.data.squeeze().sigmoid()
    # print(edge_weights)
    edge_preds = []
    m = Bernoulli(edge_weights)
    preds = m.sample()
    G_t[:, :prev_i + 1] = torch.narrow(preds, 0, 0, prev_i + 1)
    return G_t
Esempio n. 30
0
def train(epoch):

    agent.train()
    rnet.train()

    matches, rewards, policies = [], [], []
    for batch_idx, (inputs, targets) in tqdm.tqdm(enumerate(trainloader), total=len(trainloader)):

        inputs, targets = Variable(inputs), Variable(targets).cuda(async=True)
        if not args.parallel:
            inputs = inputs.cuda()

        probs, value = agent(inputs)

        #---------------------------------------------------------------------#

        policy_map = probs.data.clone()
        policy_map[policy_map<0.5] = 0.0
        policy_map[policy_map>=0.5] = 1.0
        policy_map = Variable(policy_map)

        probs = probs*args.alpha + (1-probs)*(1-args.alpha)
        distr = Bernoulli(probs)
        policy = distr.sample()

        v_inputs = Variable(inputs.data, volatile=True)
        preds_map = rnet.forward(v_inputs, policy_map)
        preds_sample = rnet.forward(inputs, policy)

        reward_map, _ = get_reward(preds_map, targets, policy_map.data)
        reward_sample, match = get_reward(preds_sample, targets, policy.data)

        advantage = reward_sample - reward_map
        # advantage = advantage.expand_as(policy)
        loss = -distr.log_prob(policy).sum(1, keepdim=True) * Variable(advantage)
        loss = loss.sum()

        #---------------------------------------------------------------------#
        loss += F.cross_entropy(preds_sample, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        matches.append(match.cpu())
        rewards.append(reward_sample.cpu())
        policies.append(policy.data.cpu())

    accuracy, reward, sparsity, variance, policy_set = utils.performance_stats(policies, rewards, matches)

    log_str = 'E: %d | A: %.3f | R: %.2E | S: %.3f | V: %.3f | #: %d'%(epoch, accuracy, reward, sparsity, variance, len(policy_set))
    print log_str

    log_value('train_accuracy', accuracy, epoch)
    log_value('train_reward', reward, epoch)
    log_value('train_sparsity', sparsity, epoch)
    log_value('train_variance', variance, epoch)
    log_value('train_unique_policies', len(policy_set), epoch)
Esempio n. 31
0
class RewardHighVelocity(gym.RewardWrapper):
    """Wrapper to modify environment rewards of 'Cheetah','Walker' and
    'Hopper'.

    Penalizes with certain probability if velocity of the agent is greater
    than a predefined max velocity.
    Parameters
    ----------
    kwargs: dict
    with keys:
    'prob_vel_penal': prob of penalization
    'cost_vel': cost of penalization
    'max_vel': max velocity

    Methods
    -------
    step(action): next_state, reward, done, info
    execute a step in the environment.
    """
    def __init__(self, env, **kwargs):
        super(RewardHighVelocity, self).__init__(env)
        self.penal_v_distr = Bernoulli(kwargs['prob_vel_penal'])
        self.penal = kwargs['cost_vel']
        self.max_vel = kwargs['max_vel']
        allowed_envs = ['Cheetah', 'Hopper', 'Walker']
        assert(any(e in self.env.unwrapped.spec.id for e in allowed_envs)), \
            'Env {self.env.unwrapped.spec.id} not allowed for RewardWrapper'

    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        vel = info['x_velocity']
        info['risky_state'] = vel > self.max_vel
        info['angle'] = self.env.sim.data.qpos[2]

        if 'Cheetah' in self.env.unwrapped.spec.id:
            return (observation, self.new_reward(reward, info), done, info)
        if 'Walker' in self.env.unwrapped.spec.id:
            return (observation, self.new_reward(reward, info), done, info)
        if 'Hopper' in self.env.unwrapped.spec.id:
            return (observation, self.new_reward(reward, info), done, info)

    def new_reward(self, reward, info):
        if 'Cheetah' in self.env.unwrapped.spec.id:
            forward_reward = info['reward_run']
        else:
            forward_reward = info['x_velocity']

        penal = info['risky_state'] * \
            self.penal_v_distr.sample().item() * self.penal

        # If penalty applied, substract the forward_reward from total_reward
        # original_reward = rew_healthy + forward_reward - cntrl_cost
        new_reward = penal + reward + (penal != 0) * (-forward_reward)
        return new_reward

    @property
    def name(self):
        return f'{self.__class__.__name__}{self.env}'
    def forward(self, image):
        latent_means = self.encoder.forward(image)

        bernoulli_rv = Bernoulli(latent_means)
        bernoulli_samples = bernoulli_rv.sample().detach()

        image_mean = self.decoder.forward(bernoulli_samples)

        return image_mean, latent_means, bernoulli_samples