Example #1
0
    def act(self, state_tensor
            ):  # state is a batch of tensors rather than a joint state
        # value, mu, cov = self.value_action_predictor(state_tensor)
        # dist = MultivariateNormal(mu, cov)
        # actions = dist.sample()
        # action_log_probs = dist.log_prob(actions)
        # action_to_take = [ActionXY(action[0], action[1]) for action in actions.cpu().numpy()]

        value, alpha_beta_1, alpha_beta_2 = self.value_action_predictor(
            state_tensor)
        vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1])
        vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1])
        actions = torch.cat(
            [vx_dist.sample().unsqueeze(1),
             vy_dist.sample().unsqueeze(1)],
            dim=1)
        action_log_probs = vx_dist.log_prob(
            actions[:, 0]).unsqueeze(1) + vy_dist.log_prob(
                actions[:, 1]).unsqueeze(1)
        action_to_take = [
            ActionXY(action[0] * 2 - 1, action[1] * 2 - 1)
            for action in actions.cpu().numpy()
        ]

        return value, actions, action_log_probs, action_to_take
Example #2
0
 def __init__(self, alpha=1.0, lam=RANDOM, reformulate=False):
     super(RMixup, self).__init__()
     self.alpha = alpha
     self.lam = lam
     self.reformulate = reformulate
     self.distrib = Beta(self.alpha,
                         self.alpha) if not reformulate else Beta(
                             self.alpha + 1, self.alpha)
Example #3
0
 def forward(self, nsmpl, return_z=False):
     zero = torch.zeros_like(self.ref)  # Proper device.
     one = torch.ones_like(self.ref)  # Proper device.
     #      one = torch.ones_like(self.ref)
     #      mix = 2 * Bernoulli(.2 * one[0]).sample([nsmpl]) - 1.
     #      mu = torch.ger(mix, one) # Array of +/-1.
     #      sd = one.expand([nsmpl,-1])
     #      z = Normal(mu, sd).sample()
     z = Normal(zero, one).sample([nsmpl])
     a, b = self.detfwd(z)
     if return_z: return z, Beta(a, b).rsample()
     else: return Beta(a, b).rsample()
Example #4
0
    def optimize_epoch(self, num_epochs):
        if self.optimizer is None:
            raise ValueError('Learning rate is not set!')
        if self.data_loader is None:
            # convert action into indices
            self.data_loader = DataLoader(self.memory,
                                          self.batch_size,
                                          shuffle=True)
        average_value_loss = 0
        average_policy_loss = 0
        for epoch in range(num_epochs):
            value_loss = 0
            policy_loss = 0
            logging.debug('{}-th epoch starts'.format(epoch))
            for data in self.data_loader:
                inputs, values, _, actions = data
                self.optimizer.zero_grad()
                # # outputs_val, outputs_mu, outputs_cov = self.model(inputs)
                # action_log_probs = MultivariateNormal(outputs_mu, outputs_cov).log_prob(actions)
                outputs_val, alpha_beta_1, alpha_beta_2 = self.model(inputs)
                vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1])
                vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1])
                p = torch.Tensor([1 + 1e-6]).to(self.device)
                q = torch.Tensor([1e-8]).to(self.device)
                action_log_probs = (vx_dist.log_prob(actions[:, 0] / p + q)).unsqueeze(1) +\
                                    (vy_dist.log_prob(actions[:, 1] / p + q)).unsqueeze(1)

                values = values.to(self.device)
                dist_entropy = vx_dist.entropy().mean() + vy_dist.entropy(
                ).mean()

                loss1 = self.criterion_val(outputs_val, values)
                loss2 = -action_log_probs.mean()
                loss = loss1 + loss2 - dist_entropy * self.entropy_coef
                # loss = loss1 + loss2
                loss.backward()
                self.optimizer.step()
                value_loss += loss1.data.item()
                policy_loss += loss2.data.item()
            logging.debug('{}-th epoch ends'.format(epoch))
            average_value_loss = value_loss / len(self.memory)
            average_policy_loss = policy_loss / len(self.memory)
            self.writer.add_scalar('IL/average_value_loss', average_value_loss,
                                   epoch)
            self.writer.add_scalar('IL/average_policy_loss',
                                   average_policy_loss, epoch)
            logging.info('Average value, policy loss in epoch %d: %.2E, %.2E',
                         epoch, average_value_loss, average_policy_loss)

        return average_value_loss
Example #5
0
def generate_data(num_obs):
    # domain = [False, True]
    prior = {'A': torch.tensor([1., 10.]),
             'B': torch.tensor([[10., 1.],
                                [1., 10.]]),
             'C': torch.tensor([[10., 1.],
                                [1., 10.]])}
    CPDs = {'p_A': Beta(prior['A'][0], prior['A'][1]).sample(),
            'p_B': Beta(prior['B'][:, 0], prior['B'][:, 1]).sample(),
            'p_C': Beta(prior['C'][:, 0], prior['C'][:, 1]).sample(),
            }
    data = {'A': Bernoulli(torch.ones(num_obs) * CPDs['p_A']).sample()}
    data['B'] = Bernoulli(torch.gather(CPDs['p_B'], 0, data['A'].type(torch.long))).sample()
    data['C'] = Bernoulli(torch.gather(CPDs['p_C'], 0, data['B'].type(torch.long))).sample()
    return prior, CPDs, data
Example #6
0
    def optimize(self, train_data, test_data, epochs=30, bsz=256):
        # The initial learning rates are set to avoid the parameters
        # to blow up. If they are higher no learning takes place.
        optimizer = \
              torch.optim.Adam(self.parameters(), lr=0.001)
        sched = torch.optim.lr_scheduler.MultiStepLR(optimizer, [29])

        batches = DataLoader(dataset=train_data, batch_size=bsz, shuffle=True)
        test_set = DataLoader(dataset=test_data, batch_size=bsz, shuffle=True)

        best = float('inf')

        for ep in range(epochs):
            batch_loss = 0.0
            self.train()
            for bno, data in enumerate(batches):
                atac = torch.clamp(data[:, :50], min=.001, max=.9999)
                hic = data[:, 50:]
                # Shrink 5% of the entries.
                shrink = torch.ones_like(hic, device=data.device)
                idx = torch.rand(shrink.shape, device=data.device) < .05
                shrink[idx] = torch.rand(shrink.shape, device=data.device)[idx]
                # Random factor.
                #rfact = .8 + .4 * torch.rand(1, device=data.device)
                (a, b) = self(hic * shrink)
                #(a,b) = self(hic)
                loss = -torch.mean(Beta(a, b).log_prob(atac))
                batch_loss += float(loss)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            sched.step()

            # Test data.
            self.eval()
            with torch.no_grad():
                test_rcst = 0.0
                for sno, data in enumerate(test_set):
                    hic = data[:, 50:]
                    atac = torch.clamp(data[:, :50], min=.001, max=.9999)
                    (a, b) = self(hic)
                    test_rcst -= float(torch.mean(Beta(a, b).log_prob(atac)))

            # Print logs on stderr.
            if test_rcst / sno < best: best = test_rcst / sno
            sys.stderr.write('%d\t%f\t%f\t%f\n' % \
                  (ep, batch_loss / bno, test_rcst / sno, best))
Example #7
0
 def __init__(self,
              model: nn.Module,
              optimizer: Optimizer,
              loss_f: Callable,
              temperature: float,
              beta: float,
              consistency_weight: float,
              *,
              reporters: Optional[_ReporterBase
                                  or List[_ReporterBase]] = None,
              scheduler: Optional[Scheduler] = None,
              verb=True,
              use_cudnn_benchmark=True,
              report_accuracy_topk: Optional[int or List[int]] = None,
              **kwargs):
     super(MixMatchTrainer,
           self).__init__(model,
                          optimizer,
                          loss_f,
                          reporters=reporters,
                          scheduler=scheduler,
                          verb=verb,
                          use_cudnn_benchmark=use_cudnn_benchmark,
                          **kwargs)
     self.temperature = temperature
     self.beta = Beta(beta, beta)
     self.consistency_weight = consistency_weight
     if report_accuracy_topk is not None and not isinstance(
             report_accuracy_topk, Iterable):
         report_accuracy_topk = [report_accuracy_topk]
     self._report_topk = report_accuracy_topk
 def begin_batch(self):
     "Updates alpha as a function of the training percentage."
     # we do the partial application here (and not in the constructor) to avoid a pickle ambiguity error on learn.export
     # due to the fact that the partially applied function as the same name as the original function
     alpha = self.scheduler(self.alpha_min, self.alpha_max)(self.pct_train)
     self.distrib = Beta(tensor(alpha), tensor(alpha))
     super().begin_batch()
 def __init__(self,
              model: nn.Module,
              optimizer: Optimizer,
              loss_f: Callable,
              consistency_weight: float,
              alpha: float,
              beta: float,
              *,
              reporters: Optional[_ReporterBase
                                  or List[_ReporterBase]] = None,
              scheduler: Optional[Scheduler] = None,
              verb=True,
              use_cudnn_benchmark=True,
              report_accuracy_topk: Optional[int or List[int]] = None,
              **kwargs):
     teacher = deepcopy(model)
     model = {'student': model, 'teacher': teacher}
     super(InterpolationConsistencyTrainer,
           self).__init__(model,
                          optimizer,
                          loss_f,
                          reporters=reporters,
                          scheduler=scheduler,
                          verb=verb,
                          use_cudnn_benchmark=use_cudnn_benchmark,
                          **kwargs)
     self.consistency_weight = consistency_weight
     self.alpha = alpha
     self.beta = Beta(beta, beta)
     if report_accuracy_topk is not None and not isinstance(
             report_accuracy_topk, Iterable):
         report_accuracy_topk = [report_accuracy_topk]
     self._report_topk = report_accuracy_topk
Example #10
0
    def __getitem__(self, idx):
        # idx only acts as a counter while generating batches.
        prob = 0.5 * torch.ones([self.input_seq_len, self.seq_width],
                                dtype=torch.float64)
        seq = Binomial(1, prob).sample()
        # Extra input channel for providing priority value
        input_seq = torch.zeros([self.input_seq_len, self.seq_width + 1])
        input_seq[:self.input_seq_len, :self.seq_width] = seq

        # torch's Uniform function draws samples from the half-open interval
        # [low, high) but in the paper the priorities are drawn from [-1,1].
        # This minor difference is being ignored here as supposedly it doesn't
        # affects the task.
        if not self.uniform:
            alpha = torch.tensor([2.0])
            beta = torch.tensor([5.0])
            if self.random_distr:
                alpha_beta_gen = Uniform(torch.tensor([0.0]),
                                         torch.tensor([100.0]))
                alpha = alpha_beta_gen.sample()
                beta = alpha_beta_gen.sample()
            priority = Beta(alpha, beta)
        else:
            priority = Uniform(torch.tensor([-1.0]), torch.tensor([1.0]))

        for i in range(self.input_seq_len):
            input_seq[i, self.seq_width] = priority.sample()

        sorted_index = torch.sort(input_seq[:, -1], descending=True)[1]
        target_seq = input_seq[sorted_index][:self.target_seq_len, :self.
                                             seq_width]

        return {'input': input_seq, 'target': target_seq}
Example #11
0
    def get_distribution(self, params1, params2):
        if self.distribution == "normal":
            dist = Normal(params1, params2)
        elif self.distribution == "beta":
            dist = Beta(params1, params2)

        return dist
def probabilty_s_given_y(theta, s, y, l, k, ratio_agreement=0.95, model=1):
    if model == 1:
        eq = torch.eq(k.view(-1, 1).long(), y.long()).double().t()
        r = ratio_agreement * eq.squeeze() + (1 - ratio_agreement) * (
            1 - eq.squeeze())
        eq = torch.stack([eq, 1 - eq]).squeeze().t()
        params = (theta * eq).sum(1)
        probability = 1
        for i in range(k.shape[0]):
            m = Beta(r[i] * params[i] / (r[i] + 1), params[i] / (r[i] + 1))
            probability *= torch.exp(m.log_prob(
                s[:, i].double(), )) * l[:, i].double() + (1 -
                                                           l[:, i]).double()
    elif model == 2:
        eq = torch.eq(k.view(-1, 1).long(), y.long()).double().t()
        eq = torch.stack([eq, 1 - eq]).squeeze().t()
        params = (theta * eq).sum(1)
        probability = 1
        for i in range(k.shape[0]):
            m = HalfNormal(params[i])
            probability *= (
                (1 - torch.exp(m.log_prob(s[:, i].double()))) * eq[i, 0] +
                (torch.exp(m.log_prob(s[:, i].double()))) *
                (1 - eq[i, 0])) * l[:, i].double() + (1 - l[:, i]).double()
    return probability
Example #13
0
    def mixup_data(x: torch.FloatTensor,
                   y: torch.LongTensor,
                   alpha: float = 1.0):

        if not len(x) == len(y):
            raise ValueError(
                "The size of `x` and `y` must match in the first dim.")

        if alpha > 0.:
            alpha = float(alpha)
            beta_dist = Beta(torch.tensor([alpha]), torch.tensor([alpha]))
            lam = beta_dist.sample().item()
        else:
            lam = 1.

        batch_size, num_channels, _, _ = x.size()
        index = torch.randperm(batch_size).to(x.device)

        # For WM811K, the input tensors `x` have two channels, where
        # the first channel has values of either one (for fail) or zero (for pass),
        # while the second channel has values of either one (for valid bins) or zeros (null bins).
        if num_channels == 2:
            mixed_x0 = \
                lam * x[:, 0, :, :] + (1 - lam) * x[index, 0, :, :]  # (B, H, W)
            mixed_x1 = (x[:, 1, :, :] + x[index, 1, :, :])  # (B, H, W)
            mixed_x1 = torch.clamp(mixed_x1, min=0, max=1)  # (B, H, W)
            mixed_x = torch.stack([mixed_x0, mixed_x1], dim=1)  # (B, 2, H, W)
        else:
            raise NotImplementedError

        y_a, y_b = y, y[index]
        return mixed_x, y_a, y_b, lam
Example #14
0
def get_random_domainess(cur_iter, total_iter, batch):
    alpha = np.exp((cur_iter - (0.5 * total_iter)) / (0.25 * total_iter))
    distribution = Beta(alpha, 1)
    z = distribution.sample((batch, 1))
    z2 = z * torch.rand(1)
    output = torch.cat([1 - z, z2, z - z2], dim=1)
    return output
Example #15
0
def augmentAndMix(x_orig, k, alpha, preprocess):
    # k : number of chains
    # alpha : sampling constant

    x_temp = x_orig  # back up for skip connection

    x_aug = torch.zeros_like(preprocess(x_orig))
    mixing_weight_dist = Dirichlet(torch.empty(k).fill_(alpha))
    mixing_weights = mixing_weight_dist.sample()

    for i in range(k):
        sampled_augs = random.sample(augmentations, k)
        aug_chain_length = random.choice(range(1, k + 1))
        aug_chain = sampled_augs[:aug_chain_length]

        for aug in aug_chain:
            severity = random.choice(range(1, 6))
            x_temp = aug(x_temp, severity)

        x_aug += mixing_weights[i] * preprocess(x_temp)

    skip_conn_weight_dist = Beta(torch.tensor([alpha]), torch.tensor([alpha]))
    skip_conn_weight = skip_conn_weight_dist.sample()

    x_augmix = skip_conn_weight * x_aug + (
        1 - skip_conn_weight) * preprocess(x_orig)

    return x_augmix
Example #16
0
 def forward(self, ob, mu, K, sampled=True, z_old=None, beta_old=None):
     q = probtorch.Trace()
     S, B, N, D = ob.shape
     ob_mu = ob.unsqueeze(2).repeat(
         1, 1, K, 1, 1) - mu.unsqueeze(-2).repeat(1, 1, 1, N, 1)
     q_probs = F.softmax(
         self.pi_log_prob(ob_mu).squeeze(-1).transpose(-1, -2), -1)
     if sampled:
         z = cat(q_probs).sample()
         _ = q.variable(cat, probs=q_probs, value=z, name='states')
         mu_expand = torch.gather(
             mu, -2,
             z.argmax(-1).unsqueeze(-1).repeat(1, 1, 1, D))
         q_angle_con1 = self.angle_log_con1(ob - mu_expand).exp()
         q_angle_con0 = self.angle_log_con0(ob - mu_expand).exp()
         beta = Beta(q_angle_con1, q_angle_con0).sample()
         q.beta(q_angle_con1, q_angle_con0, value=beta, name='angles')
     else:
         _ = q.variable(cat, probs=q_probs, value=z_old, name='states')
         mu_expand = torch.gather(
             mu, -2,
             z_old.argmax(-1).unsqueeze(-1).repeat(1, 1, 1, D))
         q_angle_con1 = self.angle_log_con1(ob - mu_expand).exp()
         q_angle_con0 = self.angle_log_con0(ob - mu_expand).exp()
         q.beta(q_angle_con1, q_angle_con0, value=beta_old, name='angles')
     return q
Example #17
0
    def _rejection_sample_wood(loc: torch.Tensor, concentration: torch.Tensor,
                               w: torch.Tensor):
        """
        The acceptance-rejection sampling scheme from Wood (1994).

        Based on TensorFlow's implementation:
        https://github.com/tensorflow/probability/blob/v0.11.1/tensorflow_probability/python/distributions/von_mises_fisher.py#L421

        and the implementation from "Spherical Latent Spaces for Stable Variational Autoencoders" by Jiacheng Xu, Greg Durrett
        https://github.com/jiacheng-xu/vmf_vae_nlp/blob/master/NVLL/distribution/vmf_only.py#L92
        """
        m = loc.shape[-1]

        b = (m - 1) / (2 * concentration + torch.sqrt((4 *
                                                       (concentration**2)) +
                                                      (m - 1)**2))
        x = (1 - b) / (1 + b)
        c = concentration * x + (m - 1) * torch.log(1 - x**2)

        # Sampling should accept a scalar `w` for each training example.
        done = torch.zeros(w.shape, dtype=torch.bool, device=loc.device)
        while not done.all():
            epsilon = Beta(0.5 * (m - 1), 0.5 * (m - 1)).sample(w.shape)
            w_prime = (1 - (1 + b) * epsilon) / (1 - (1 - b) * epsilon)

            u = Uniform(0.0 + 1e-6, 1.0).sample(w.shape)

            accept = concentration * w_prime + (
                m - 1) * torch.log(1 - x * w_prime) - c >= torch.log(u)

            if accept.any():
                w = torch.where(accept, w_prime, w)
                done = done | accept

        return w
Example #18
0
    def train_step(
            self, sample, model, criterion, optimizer, update_num, ignore_grad=False):
        model.train()
        model.set_num_updates(update_num)

        shuffled_ids = np.array(list(range(len(sample["id"]))))
        np.random.shuffle(shuffled_ids)

        net_input_a = sample["net_input"]
        net_input_b = {"src_tokens": net_input_a["src_tokens"][shuffled_ids],
                "prev_output_tokens": net_input_a["prev_output_tokens"][shuffled_ids],
                "src_lengths": net_input_a["src_lengths"][shuffled_ids]}
        pair_sample = {
                "id": sample["id"],
                "nsentences": sample["nsentences"],
                "ntokens": sample["ntokens"],
                "net_input_a": net_input_a,
                "net_input_b": net_input_b,
                "target_a": sample["target"],
                "target_b": sample["target"][shuffled_ids],
        }

        dist = Beta(self.args.alpha, self.args.alpha)
        bsz = len(shuffled_ids)
        lambda_ = dist.sample(sample_shape=[bsz]).to("cuda")
        lambda_ = torch.max(lambda_, 1 - lambda_)
        if self.args.fp16:
            lambda_ = lambda_.half()
        loss, sample_size, logging_output = criterion(model, pair_sample, lambda_=lambda_)


        if ignore_grad:
            loss *= 0
        optimizer.backward(loss)
        return loss, sample_size, logging_output
Example #19
0
def reinforce(env, policy_estimator, num_episodes=2000, batch_size=10, gamma=0.99):    
    total_rewards = []
    days_counter = []    
    batch_rewards = []
    batch_states = []
    batch_actions = []
    counter = 0       
    ep = 0
    days = 0
    
    while ep < num_episodes:
        # print(ep)
        s_0 = env.reset()
        days = 0
        states = []
        rewards = []
        actions = []
        done = False
        
        while done == False:     
            if days > 1000:
                print(days)
            
            processed_state = process(s_0, 50000)            
            a, b = policy_estimator.foward(processed_state)    
            distribution = Beta(a, b) 
            action = distribution.sample().detach().numpy() 
            s_1, r, done, _ = env.step(action)                       
            states.append(processed_state)
            rewards.append(r)
            actions.append(action)               
            days += 1
            counter += 1
            s_0 = s_1
               
            
        ep += 1 
        total_rewards.append(sum(rewards))            
        days_counter.append(days)
        
        if counter > 256 and done:  
#             print("reached")            
            returns = discount_rewards(rewards, gamma)              
            batch_states.extend(states)
            batch_rewards.extend(returns)
            batch_actions.extend(actions)          
            
            state_tensor = torch.FloatTensor(batch_states)
            reward_tensor = torch.FloatTensor(batch_rewards)
            a_tnsr, b_tnsr = policy_estimator.foward(state_tensor)
            action_tensor = torch.FloatTensor(batch_actions)
            policy_estimator.update(a_tnsr, b_tnsr, action_tensor, reward_tensor)
            
            batch_rewards = []
            batch_actions = []
            batch_states = []            
            counter = 0
#             print("finished")
    return total_rewards, days_counter
Example #20
0
 def select_action(self, state, deterministic, reparameterize=False):
     alpha, beta = self.forward(state)
     dist = Beta(concentration1=alpha, concentration0=beta)
     if reparameterize:
         action = dist.rsample()  # (bsize, action_dim)
     else:
         action = dist.sample()  # (bsize, action_dim)
     return action, dist
Example #21
0
def test2():
    """
    beta distribution is a family of continuous random variables defined in the range of 0 and 1.
    :return:
    """
    from torch.distributions.beta import Beta
    dist = Beta(torch.tensor([0.5]), torch.tensor(0.5))
    dist.sample()  # >>> tensor([0.0594])
Example #22
0
 def sample_action(self, s):
     s_T = T.tensor(s).unsqueeze(0)
     act = self.forward(s_T)
     c1 = F.sigmoid(act[:, :self.act_dim]) * 5
     c2 = F.sigmoid(act[:, self.act_dim:]) * 5
     beta_dist = Beta(c1, c2)
     rnd_act = beta_dist.sample()
     return rnd_act.detach().squeeze(0).numpy()
Example #23
0
 def __init__(self, k=3, alpha=1, severity=3):
     super(AugMix, self).__init__()
     self.k = k
     self.alpha = alpha
     self.severity = severity
     self.dirichlet = Dirichlet(torch.full(torch.Size([k]), alpha, dtype=torch.float32))
     self.beta = Beta(alpha, alpha)
     self.augs = augmentations
     self.kl = nn.KLDivLoss(reduction='batchmean')
Example #24
0
def get_lambda(self,
        batch_size):
    """
        Sample lambda given batch size.
    """
    dist = Beta(self.args.alpha, self.args.alpha)
    lambda_ = dist.sample(sample_shape=[bsz]).to("cuda")
    lambda_ = torch.max(lambda_, 1 - lambda_)
    return lambda_
def generate_data(num_obs):
    # domain = [False, True]
    prior = {
        "A": torch.tensor([1.0, 10.0]),
        "B": torch.tensor([[10.0, 1.0], [1.0, 10.0]]),
        "C": torch.tensor([[10.0, 1.0], [1.0, 10.0]]),
    }
    CPDs = {
        "p_A": Beta(prior["A"][0], prior["A"][1]).sample(),
        "p_B": Beta(prior["B"][:, 0], prior["B"][:, 1]).sample(),
        "p_C": Beta(prior["C"][:, 0], prior["C"][:, 1]).sample(),
    }
    data = {"A": Bernoulli(torch.ones(num_obs) * CPDs["p_A"]).sample()}
    data["B"] = Bernoulli(
        torch.gather(CPDs["p_B"], 0, data["A"].type(torch.long))).sample()
    data["C"] = Bernoulli(
        torch.gather(CPDs["p_C"], 0, data["B"].type(torch.long))).sample()
    return prior, CPDs, data
def get_log_probs(pi, actions, dist_type):
    if dist_type == 'gauss':
        mean, std = pi
        log_prob = Normal(mean, std).log_prob(actions)
    elif dist_type == 'beta':
        alpha, beta = pi
        log_prob = Beta(alpha, beta).log_prob(actions)

    return log_prob
def select_actions(pi, dist_type):
    if dist_type == 'gauss':
        mean, std = pi
        actions = Normal(mean, std).sample()
    elif dist_type == 'beta':
        alpha, beta = pi
        actions = Beta(alpha.detach().cpu(), beta.detach().cpu()).sample()

    return actions.detach().cpu().numpy()[0]
Example #28
0
    def __init__(self, alpha, num_classes):
        super(BatchMixupLayer, self).__init__()

        assert isinstance(alpha, float)
        assert isinstance(num_classes, int)

        self.alpha = alpha
        self.num_classes = num_classes

        self.Beta = Beta(self.alpha, self.alpha)
Example #29
0
    def __call__(self, sample: Dict[str, Any]) -> Dict[str, Any]:
        """
        Args:
            sample: the batch data.
        """
        assert len(sample["target"]) % 2 == 0, "Batch size should be even"

        if torch.is_tensor(sample["input"]) and sample["input"].ndim == 4:
            # This is the simple case of image data batch (i.e. 4D tensor).
            # We support more advanved joint mixup and cutmix in this case.
            if self.mode == "elem":
                lam = self._mix_elem(sample["input"])
            elif self.mode == "pair":
                lam = self._mix_pair(sample["input"])
            else:
                lam = self._mix_batch(sample["input"])

            sample["target"] = mixup_target(
                sample["target"],
                self.num_classes,
                lam=lam,
                smoothing=self.label_smoothing,
            )
        else:
            # This is the complex case of video data batch (i.e. 5D tensor) or more complex
            # data batch. We only support mixup augmentation in batch mode.
            if sample["target"].ndim == 1:
                assert (
                    self.num_classes is not None
                ), "num_classes is expected for 1D target"

                off_value = self.label_smoothing / self.num_classes
                on_value = 1.0 - self.label_smoothing + off_value

                sample["target"] = one_hot(
                    sample["target"],
                    self.num_classes,
                    on_value=on_value,
                    off_value=off_value,
                    device=sample["target"].device,
                )
            else:
                assert (
                    sample["target"].ndim == 2
                ), "target tensor shape must be 1D or 2D"

            c = Beta(self.mixup_alpha, self.mixup_alpha).sample()

            sample["target"] = c * sample["target"] + (1.0 - c) * sample["target"].flip(
                0
            )
            sample["input"] = _recursive_mixup(sample["input"], c)

        return sample
Example #30
0
    def log_probs(self, batch_states, batch_actions):
        # Get action means from policy
        act = self.forward(batch_states)

        # Calculate probabilities
        c1 = F.sigmoid(act[:, :, self.act_dim]) * 5
        c2 = F.sigmoid(act[:, :, self.act_dim:]) * 5

        beta_dist = Beta(c1, c2)
        log_probs = beta_dist.log_prob(batch_actions)
        return log_probs.sum(1, keepdim=True)