Example #1
0
def augmentAndMix(x_orig, k, alpha, preprocess):
    # k : number of chains
    # alpha : sampling constant

    x_temp = x_orig  # back up for skip connection

    x_aug = torch.zeros_like(preprocess(x_orig))
    mixing_weight_dist = Dirichlet(torch.empty(k).fill_(alpha))
    mixing_weights = mixing_weight_dist.sample()

    for i in range(k):
        sampled_augs = random.sample(augmentations, k)
        aug_chain_length = random.choice(range(1, k + 1))
        aug_chain = sampled_augs[:aug_chain_length]

        for aug in aug_chain:
            severity = random.choice(range(1, 6))
            x_temp = aug(x_temp, severity)

        x_aug += mixing_weights[i] * preprocess(x_temp)

    skip_conn_weight_dist = Beta(torch.tensor([alpha]), torch.tensor([alpha]))
    skip_conn_weight = skip_conn_weight_dist.sample()

    x_augmix = skip_conn_weight * x_aug + (
        1 - skip_conn_weight) * preprocess(x_orig)

    return x_augmix
Example #2
0
 def __init__(self,
              model: nn.Module,
              optimizer: Optimizer,
              loss_f: Callable,
              temperature: float,
              beta: float,
              consistency_weight: float,
              *,
              reporters: Optional[_ReporterBase
                                  or List[_ReporterBase]] = None,
              scheduler: Optional[Scheduler] = None,
              verb=True,
              use_cudnn_benchmark=True,
              report_accuracy_topk: Optional[int or List[int]] = None,
              **kwargs):
     super(MixMatchTrainer,
           self).__init__(model,
                          optimizer,
                          loss_f,
                          reporters=reporters,
                          scheduler=scheduler,
                          verb=verb,
                          use_cudnn_benchmark=use_cudnn_benchmark,
                          **kwargs)
     self.temperature = temperature
     self.beta = Beta(beta, beta)
     self.consistency_weight = consistency_weight
     if report_accuracy_topk is not None and not isinstance(
             report_accuracy_topk, Iterable):
         report_accuracy_topk = [report_accuracy_topk]
     self._report_topk = report_accuracy_topk
 def __init__(self,
              model: nn.Module,
              optimizer: Optimizer,
              loss_f: Callable,
              consistency_weight: float,
              alpha: float,
              beta: float,
              *,
              reporters: Optional[_ReporterBase
                                  or List[_ReporterBase]] = None,
              scheduler: Optional[Scheduler] = None,
              verb=True,
              use_cudnn_benchmark=True,
              report_accuracy_topk: Optional[int or List[int]] = None,
              **kwargs):
     teacher = deepcopy(model)
     model = {'student': model, 'teacher': teacher}
     super(InterpolationConsistencyTrainer,
           self).__init__(model,
                          optimizer,
                          loss_f,
                          reporters=reporters,
                          scheduler=scheduler,
                          verb=verb,
                          use_cudnn_benchmark=use_cudnn_benchmark,
                          **kwargs)
     self.consistency_weight = consistency_weight
     self.alpha = alpha
     self.beta = Beta(beta, beta)
     if report_accuracy_topk is not None and not isinstance(
             report_accuracy_topk, Iterable):
         report_accuracy_topk = [report_accuracy_topk]
     self._report_topk = report_accuracy_topk
Example #4
0
    def __getitem__(self, idx):
        # idx only acts as a counter while generating batches.
        prob = 0.5 * torch.ones([self.input_seq_len, self.seq_width],
                                dtype=torch.float64)
        seq = Binomial(1, prob).sample()
        # Extra input channel for providing priority value
        input_seq = torch.zeros([self.input_seq_len, self.seq_width + 1])
        input_seq[:self.input_seq_len, :self.seq_width] = seq

        # torch's Uniform function draws samples from the half-open interval
        # [low, high) but in the paper the priorities are drawn from [-1,1].
        # This minor difference is being ignored here as supposedly it doesn't
        # affects the task.
        if not self.uniform:
            alpha = torch.tensor([2.0])
            beta = torch.tensor([5.0])
            if self.random_distr:
                alpha_beta_gen = Uniform(torch.tensor([0.0]),
                                         torch.tensor([100.0]))
                alpha = alpha_beta_gen.sample()
                beta = alpha_beta_gen.sample()
            priority = Beta(alpha, beta)
        else:
            priority = Uniform(torch.tensor([-1.0]), torch.tensor([1.0]))

        for i in range(self.input_seq_len):
            input_seq[i, self.seq_width] = priority.sample()

        sorted_index = torch.sort(input_seq[:, -1], descending=True)[1]
        target_seq = input_seq[sorted_index][:self.target_seq_len, :self.
                                             seq_width]

        return {'input': input_seq, 'target': target_seq}
Example #5
0
def get_random_domainess(cur_iter, total_iter, batch):
    alpha = np.exp((cur_iter - (0.5 * total_iter)) / (0.25 * total_iter))
    distribution = Beta(alpha, 1)
    z = distribution.sample((batch, 1))
    z2 = z * torch.rand(1)
    output = torch.cat([1 - z, z2, z - z2], dim=1)
    return output
Example #6
0
    def train_step(
            self, sample, model, criterion, optimizer, update_num, ignore_grad=False):
        model.train()
        model.set_num_updates(update_num)

        shuffled_ids = np.array(list(range(len(sample["id"]))))
        np.random.shuffle(shuffled_ids)

        net_input_a = sample["net_input"]
        net_input_b = {"src_tokens": net_input_a["src_tokens"][shuffled_ids],
                "prev_output_tokens": net_input_a["prev_output_tokens"][shuffled_ids],
                "src_lengths": net_input_a["src_lengths"][shuffled_ids]}
        pair_sample = {
                "id": sample["id"],
                "nsentences": sample["nsentences"],
                "ntokens": sample["ntokens"],
                "net_input_a": net_input_a,
                "net_input_b": net_input_b,
                "target_a": sample["target"],
                "target_b": sample["target"][shuffled_ids],
        }

        dist = Beta(self.args.alpha, self.args.alpha)
        bsz = len(shuffled_ids)
        lambda_ = dist.sample(sample_shape=[bsz]).to("cuda")
        lambda_ = torch.max(lambda_, 1 - lambda_)
        if self.args.fp16:
            lambda_ = lambda_.half()
        loss, sample_size, logging_output = criterion(model, pair_sample, lambda_=lambda_)


        if ignore_grad:
            loss *= 0
        optimizer.backward(loss)
        return loss, sample_size, logging_output
Example #7
0
    def mixup_data(x: torch.FloatTensor,
                   y: torch.LongTensor,
                   alpha: float = 1.0):

        if not len(x) == len(y):
            raise ValueError(
                "The size of `x` and `y` must match in the first dim.")

        if alpha > 0.:
            alpha = float(alpha)
            beta_dist = Beta(torch.tensor([alpha]), torch.tensor([alpha]))
            lam = beta_dist.sample().item()
        else:
            lam = 1.

        batch_size, num_channels, _, _ = x.size()
        index = torch.randperm(batch_size).to(x.device)

        # For WM811K, the input tensors `x` have two channels, where
        # the first channel has values of either one (for fail) or zero (for pass),
        # while the second channel has values of either one (for valid bins) or zeros (null bins).
        if num_channels == 2:
            mixed_x0 = \
                lam * x[:, 0, :, :] + (1 - lam) * x[index, 0, :, :]  # (B, H, W)
            mixed_x1 = (x[:, 1, :, :] + x[index, 1, :, :])  # (B, H, W)
            mixed_x1 = torch.clamp(mixed_x1, min=0, max=1)  # (B, H, W)
            mixed_x = torch.stack([mixed_x0, mixed_x1], dim=1)  # (B, 2, H, W)
        else:
            raise NotImplementedError

        y_a, y_b = y, y[index]
        return mixed_x, y_a, y_b, lam
Example #8
0
def reinforce(env, policy_estimator, num_episodes=2000, batch_size=10, gamma=0.99):    
    total_rewards = []
    days_counter = []    
    batch_rewards = []
    batch_states = []
    batch_actions = []
    counter = 0       
    ep = 0
    days = 0
    
    while ep < num_episodes:
        # print(ep)
        s_0 = env.reset()
        days = 0
        states = []
        rewards = []
        actions = []
        done = False
        
        while done == False:     
            if days > 1000:
                print(days)
            
            processed_state = process(s_0, 50000)            
            a, b = policy_estimator.foward(processed_state)    
            distribution = Beta(a, b) 
            action = distribution.sample().detach().numpy() 
            s_1, r, done, _ = env.step(action)                       
            states.append(processed_state)
            rewards.append(r)
            actions.append(action)               
            days += 1
            counter += 1
            s_0 = s_1
               
            
        ep += 1 
        total_rewards.append(sum(rewards))            
        days_counter.append(days)
        
        if counter > 256 and done:  
#             print("reached")            
            returns = discount_rewards(rewards, gamma)              
            batch_states.extend(states)
            batch_rewards.extend(returns)
            batch_actions.extend(actions)          
            
            state_tensor = torch.FloatTensor(batch_states)
            reward_tensor = torch.FloatTensor(batch_rewards)
            a_tnsr, b_tnsr = policy_estimator.foward(state_tensor)
            action_tensor = torch.FloatTensor(batch_actions)
            policy_estimator.update(a_tnsr, b_tnsr, action_tensor, reward_tensor)
            
            batch_rewards = []
            batch_actions = []
            batch_states = []            
            counter = 0
#             print("finished")
    return total_rewards, days_counter
Example #9
0
 def __init__(self, alpha=1.0, lam=RANDOM, reformulate=False):
     super(RMixup, self).__init__()
     self.alpha = alpha
     self.lam = lam
     self.reformulate = reformulate
     self.distrib = Beta(self.alpha,
                         self.alpha) if not reformulate else Beta(
                             self.alpha + 1, self.alpha)
Example #10
0
 def sample_action(self, s):
     s_T = T.tensor(s).unsqueeze(0)
     act = self.forward(s_T)
     c1 = F.sigmoid(act[:, :self.act_dim]) * 5
     c2 = F.sigmoid(act[:, self.act_dim:]) * 5
     beta_dist = Beta(c1, c2)
     rnd_act = beta_dist.sample()
     return rnd_act.detach().squeeze(0).numpy()
Example #11
0
 def select_action(self, state, deterministic, reparameterize=False):
     alpha, beta = self.forward(state)
     dist = Beta(concentration1=alpha, concentration0=beta)
     if reparameterize:
         action = dist.rsample()  # (bsize, action_dim)
     else:
         action = dist.sample()  # (bsize, action_dim)
     return action, dist
Example #12
0
def test2():
    """
    beta distribution is a family of continuous random variables defined in the range of 0 and 1.
    :return:
    """
    from torch.distributions.beta import Beta
    dist = Beta(torch.tensor([0.5]), torch.tensor(0.5))
    dist.sample()  # >>> tensor([0.0594])
Example #13
0
def get_lambda(self,
        batch_size):
    """
        Sample lambda given batch size.
    """
    dist = Beta(self.args.alpha, self.args.alpha)
    lambda_ = dist.sample(sample_shape=[bsz]).to("cuda")
    lambda_ = torch.max(lambda_, 1 - lambda_)
    return lambda_
Example #14
0
 def __init__(self, k=3, alpha=1, severity=3):
     super(AugMix, self).__init__()
     self.k = k
     self.alpha = alpha
     self.severity = severity
     self.dirichlet = Dirichlet(torch.full(torch.Size([k]), alpha, dtype=torch.float32))
     self.beta = Beta(alpha, alpha)
     self.augs = augmentations
     self.kl = nn.KLDivLoss(reduction='batchmean')
Example #15
0
    def __init__(self, alpha, num_classes):
        super(BatchMixupLayer, self).__init__()

        assert isinstance(alpha, float)
        assert isinstance(num_classes, int)

        self.alpha = alpha
        self.num_classes = num_classes

        self.Beta = Beta(self.alpha, self.alpha)
Example #16
0
 def update(self, a_tnsr, b_tnsr, action_tensor, reward_tensor):
     self.optimizer.zero_grad()
     m = Beta(a_tnsr, b_tnsr)   
     log_probs = m.log_prob(action_tensor)
     log_probs = -1* torch.matmul(reward_tensor, log_probs)   
     loss = log_probs.mean()   
     # print(loss)             
     loss.backward()
         
     self.optimizer.step()
     self.scheduler.step()       
Example #17
0
    def log_probs(self, batch_states, batch_actions):
        # Get action means from policy
        act = self.forward(batch_states)

        # Calculate probabilities
        c1 = F.sigmoid(act[:, :, self.act_dim]) * 5
        c2 = F.sigmoid(act[:, :, self.act_dim:]) * 5

        beta_dist = Beta(c1, c2)
        log_probs = beta_dist.log_prob(batch_actions)
        return log_probs.sum(1, keepdim=True)
 def __init__(self, alpha=1.0, lam=RANDOM):
     super(ManifoldMixup, self).__init__()
     self._layers = []
     self._mixup_layers = None
     self.alpha = alpha
     self.lam = lam
     self.distrib = Beta(self.alpha, self.alpha)
     self.layer_names = []
     self.depth = 0
     self._layer_filter = []
     self._layer_types = []
Example #19
0
 def calc_unnormalized_beta_cdf(self, b, alpha, beta, npts=100):
     bt = Beta(alpha.float(), beta.float())
     x = torch.linspace(0 + self.epsilon,
                        b - self.epsilon,
                        int(npts * b.cpu().numpy()),
                        device=self.device).float()
     pdf = bt.log_prob(x).exp()
     dx = torch.tensor([1. / (npts * self.num_classes)],
                       device=self.device).float()
     P = pdf.sum(dim=1) * dx
     return P
Example #20
0
 def observe(self, move, reward):
     if isinstance(reward, torch.Tensor):
         reward = reward.item()
     alpha = (1-self.gamma)*self.rewards.concentration1\
             + self.gamma*torch.ones(3)
     beta = (1-self.gamma)*self.rewards.concentration0\
             + self.gamma*torch.ones(3)
     if reward == 1:
         alpha[move] += reward
     else:
         beta[move] -= reward
     self.rewards = Beta(alpha, beta)
Example #21
0
 def forward(self, nsmpl, return_z=False):
     zero = torch.zeros_like(self.ref)  # Proper device.
     one = torch.ones_like(self.ref)  # Proper device.
     #      one = torch.ones_like(self.ref)
     #      mix = 2 * Bernoulli(.2 * one[0]).sample([nsmpl]) - 1.
     #      mu = torch.ger(mix, one) # Array of +/-1.
     #      sd = one.expand([nsmpl,-1])
     #      z = Normal(mu, sd).sample()
     z = Normal(zero, one).sample([nsmpl])
     a, b = self.detfwd(z)
     if return_z: return z, Beta(a, b).rsample()
     else: return Beta(a, b).rsample()
Example #22
0
    def step(self, input, target, teams):
        """Do one training step and return the loss."""

        self.train()
        self.zero_grad()
        event_scores, time_scores = self.forward(input, teams)

        event_proba = F.softmax(event_scores, 2)
        time_proba = F.softmax(time_scores, 2)

        # Only get events during the games
        events_during_game, target_events_during_game, time_during_game, target_time_during_game, end_game_indices = get_during_game_tensors(
            event_scores, time_scores, target, return_end_game_idx=True)

        # Only get goals during the games
        goals_home_tensor, goals_home_target_tensor, goals_away_tensor, goals_away_target_tensor = get_during_game_goals(
            event_proba, target)

        goals_tensor = torch.stack([goals_home_tensor, goals_away_tensor], 1)
        goals_target_tensor = torch.stack(
            [goals_home_target_tensor, goals_away_target_tensor], 1)

        accuracy = torch.tensor(0)
        loss_result_game = torch.tensor(0)

        # Events and time loss functions
        loss_events_during_game = self.loss_function_events(
            events_during_game, target_events_during_game)
        loss_time_during_game = self.loss_function_time(
            time_during_game, target_time_during_game)

        # Compute loss for forcing not having too much events at the same minute
        time_proba_during_game = F.softmax(time_during_game, 1)
        beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR)
        log_prob = beta_distr.log_prob(
            time_proba_during_game[:, SAME_TIME_THAN_PREV])
        same_minute_event_loss = -torch.mean(log_prob)

        #same_minute_event_loss = Variable(torch.tensor(0))

        total_loss = (loss_events_during_game + loss_time_during_game +
                      BETA_WEIGHT * same_minute_event_loss) / (2 + BETA_WEIGHT)

        total_loss.backward()

        self.optimizer.step()

        return event_proba, time_proba, total_loss.data.item(
        ), loss_events_during_game.data.item(
        ), loss_time_during_game.data.item(), same_minute_event_loss.item(
        ), loss_result_game.data.item(), accuracy.item()
Example #23
0
    def act(self, state_tensor
            ):  # state is a batch of tensors rather than a joint state
        # value, mu, cov = self.value_action_predictor(state_tensor)
        # dist = MultivariateNormal(mu, cov)
        # actions = dist.sample()
        # action_log_probs = dist.log_prob(actions)
        # action_to_take = [ActionXY(action[0], action[1]) for action in actions.cpu().numpy()]

        value, alpha_beta_1, alpha_beta_2 = self.value_action_predictor(
            state_tensor)
        vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1])
        vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1])
        actions = torch.cat(
            [vx_dist.sample().unsqueeze(1),
             vy_dist.sample().unsqueeze(1)],
            dim=1)
        action_log_probs = vx_dist.log_prob(
            actions[:, 0]).unsqueeze(1) + vy_dist.log_prob(
                actions[:, 1]).unsqueeze(1)
        action_to_take = [
            ActionXY(action[0] * 2 - 1, action[1] * 2 - 1)
            for action in actions.cpu().numpy()
        ]

        return value, actions, action_log_probs, action_to_take
Example #24
0
    def __init__(self, model_gp, likelihood_gp, hyperpriors: dict) -> None:
        self.model_gp = model_gp
        self.likelihood_gp = likelihood_gp
        self.hyperpriors = hyperpriors

        a_beta = self.hyperpriors["lengthscales"].kwds["a"]
        b_beta = self.hyperpriors["lengthscales"].kwds["b"]

        self.Beta_tmp = Beta(concentration1=a_beta, concentration0=b_beta)

        a_gg = self.hyperpriors["outputscale"].kwds["a"]
        b_gg = self.hyperpriors["outputscale"].kwds["scale"]

        self.Gamma_tmp = Gamma(concentration=a_gg, rate=1. / b_gg)
Example #25
0
    def predict_proba_and_get_loss(self, input, target, teams):
        event_scores, time_scores = self.forward(input, teams)

        # Get probabilities
        event_proba = F.softmax(event_scores, 2)
        time_proba = F.softmax(time_scores, 2)

        # Separate events from time
        target_events = target[:, :, 0]
        target_time = target[:, :, 1]

        # Only get events during the games
        events_during_game, target_events_during_game, time_during_game, target_time_during_game = get_during_game_tensors(
            event_scores, time_scores, target)

        # Only get goals during the games
        goals_home_tensor, goals_home_target_tensor, goals_away_tensor, goals_away_target_tensor = get_during_game_goals(
            event_proba, target)

        goals_tensor = torch.stack([goals_home_tensor, goals_away_tensor], 1)
        goals_target_tensor = torch.stack(
            [goals_home_target_tensor, goals_away_target_tensor], 1)

        games_proba = get_games_proba_from_goals_proba(goals_tensor)
        games_results = get_games_results_from_goals(goals_target_tensor)

        # Cross entropy loss for result, but don't use it in backwards
        loss_result_game = self.loss_function_result(games_proba,
                                                     games_results)

        # Compute loss for forcing not having too much events at the same minute
        time_proba_during_game = F.softmax(time_during_game, 1)
        beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR)
        log_prob = beta_distr.log_prob(
            time_proba_during_game[:, SAME_TIME_THAN_PREV])
        same_minute_event_loss = -torch.mean(log_prob)

        # Events and time loss functions
        loss_time_during_game = self.loss_function_time(
            time_during_game, target_time_during_game)
        loss_events_during_game = self.loss_function_events(
            events_during_game, target_events_during_game)

        total_loss = (loss_events_during_game + loss_time_during_game +
                      BETA_WEIGHT * same_minute_event_loss) / (2 + BETA_WEIGHT)

        return event_proba, time_proba, total_loss.data.item(
        ), loss_events_during_game.data.item(
        ), loss_time_during_game.data.item(), same_minute_event_loss.data.item(
        ), loss_result_game.data.item()
Example #26
0
def generate_data(num_obs):
    # domain = [False, True]
    prior = {'A': torch.tensor([1., 10.]),
             'B': torch.tensor([[10., 1.],
                                [1., 10.]]),
             'C': torch.tensor([[10., 1.],
                                [1., 10.]])}
    CPDs = {'p_A': Beta(prior['A'][0], prior['A'][1]).sample(),
            'p_B': Beta(prior['B'][:, 0], prior['B'][:, 1]).sample(),
            'p_C': Beta(prior['C'][:, 0], prior['C'][:, 1]).sample(),
            }
    data = {'A': Bernoulli(torch.ones(num_obs) * CPDs['p_A']).sample()}
    data['B'] = Bernoulli(torch.gather(CPDs['p_B'], 0, data['A'].type(torch.long))).sample()
    data['C'] = Bernoulli(torch.gather(CPDs['p_C'], 0, data['B'].type(torch.long))).sample()
    return prior, CPDs, data
Example #27
0
    def optimize(self, train_data, test_data, epochs=30, bsz=256):
        # The initial learning rates are set to avoid the parameters
        # to blow up. If they are higher no learning takes place.
        optimizer = \
              torch.optim.Adam(self.parameters(), lr=0.001)
        sched = torch.optim.lr_scheduler.MultiStepLR(optimizer, [29])

        batches = DataLoader(dataset=train_data, batch_size=bsz, shuffle=True)
        test_set = DataLoader(dataset=test_data, batch_size=bsz, shuffle=True)

        best = float('inf')

        for ep in range(epochs):
            batch_loss = 0.0
            self.train()
            for bno, data in enumerate(batches):
                atac = torch.clamp(data[:, :50], min=.001, max=.9999)
                hic = data[:, 50:]
                # Shrink 5% of the entries.
                shrink = torch.ones_like(hic, device=data.device)
                idx = torch.rand(shrink.shape, device=data.device) < .05
                shrink[idx] = torch.rand(shrink.shape, device=data.device)[idx]
                # Random factor.
                #rfact = .8 + .4 * torch.rand(1, device=data.device)
                (a, b) = self(hic * shrink)
                #(a,b) = self(hic)
                loss = -torch.mean(Beta(a, b).log_prob(atac))
                batch_loss += float(loss)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            sched.step()

            # Test data.
            self.eval()
            with torch.no_grad():
                test_rcst = 0.0
                for sno, data in enumerate(test_set):
                    hic = data[:, 50:]
                    atac = torch.clamp(data[:, :50], min=.001, max=.9999)
                    (a, b) = self(hic)
                    test_rcst -= float(torch.mean(Beta(a, b).log_prob(atac)))

            # Print logs on stderr.
            if test_rcst / sno < best: best = test_rcst / sno
            sys.stderr.write('%d\t%f\t%f\t%f\n' % \
                  (ep, batch_loss / bno, test_rcst / sno, best))
Example #28
0
class MixUp(Callback):
    _order = 90  #Runs after normalization and cuda

    def __init__(self, alpha=0.4):
        self.distrib = Beta(tensor([alpha]), tensor([alpha]))

    def begin_fit(self):
        self.old_loss_func, self.learn.loss_func = self.loss_func, self.loss_func

    def begin_batch(self):
        if not self.training: return  #Only mixup things during training
        lam = self.distrib.sample(
            (self.yb.size(0), )).squeeze().to(self.xb.device)
        lam = torch.stack([lam, 1 - lam], 1)
        self.lam = lam.max(1)[0][:, None, None, None]
        shuffle = torch.randperm(self.yb.size(0)).to(self.xb.device)
        xb1, self.yb1 = self.xb[shuffle], self.yb[shuffle]
        self.learn.xb = torch.lerp(xb1, self.xb, self.lam)

    def after_fit(self):
        self.run.loss_func = self.old_loss_func

    def loss_func(self, pred, yb):
        if not self.in_train: return self.old_loss_func(pred, yb)
        with NoneReduce(self.old_loss_func) as loss_func:
            loss1 = loss_func(pred, yb)
            loss2 = loss_func(pred, self.yb1)
        loss = torch.lerp(loss2, loss1, self.lam)
        return reduce_loss(loss,
                           getattr(self.old_loss_func, 'reduction', 'mean'))
Example #29
0
    def _rejection_sample_wood(loc: torch.Tensor, concentration: torch.Tensor,
                               w: torch.Tensor):
        """
        The acceptance-rejection sampling scheme from Wood (1994).

        Based on TensorFlow's implementation:
        https://github.com/tensorflow/probability/blob/v0.11.1/tensorflow_probability/python/distributions/von_mises_fisher.py#L421

        and the implementation from "Spherical Latent Spaces for Stable Variational Autoencoders" by Jiacheng Xu, Greg Durrett
        https://github.com/jiacheng-xu/vmf_vae_nlp/blob/master/NVLL/distribution/vmf_only.py#L92
        """
        m = loc.shape[-1]

        b = (m - 1) / (2 * concentration + torch.sqrt((4 *
                                                       (concentration**2)) +
                                                      (m - 1)**2))
        x = (1 - b) / (1 + b)
        c = concentration * x + (m - 1) * torch.log(1 - x**2)

        # Sampling should accept a scalar `w` for each training example.
        done = torch.zeros(w.shape, dtype=torch.bool, device=loc.device)
        while not done.all():
            epsilon = Beta(0.5 * (m - 1), 0.5 * (m - 1)).sample(w.shape)
            w_prime = (1 - (1 + b) * epsilon) / (1 - (1 - b) * epsilon)

            u = Uniform(0.0 + 1e-6, 1.0).sample(w.shape)

            accept = concentration * w_prime + (
                m - 1) * torch.log(1 - x * w_prime) - c >= torch.log(u)

            if accept.any():
                w = torch.where(accept, w_prime, w)
                done = done | accept

        return w
Example #30
0
class MixupBlending(BaseMiniBatchBlending):
    """Implementing Mixup in a mini-batch.

    This module is proposed in `mixup: Beyond Empirical Risk Minimization
    <https://arxiv.org/abs/1710.09412>`_.
    Code Reference https://github.com/open-mmlab/mmclassification/blob/master/mmcls/models/utils/mixup.py # noqa

    Args:
        num_classes (int): The number of classes.
        alpha (float): Parameters for Beta distribution.
    """
    def __init__(self, num_classes, alpha=.2):
        super().__init__(num_classes=num_classes)
        self.beta = Beta(alpha, alpha)

    def do_blending(self, imgs, label, **kwargs):
        """Blending images with mixup."""
        assert len(kwargs) == 0, f'unexpected kwargs for mixup {kwargs}'

        lam = self.beta.sample()
        batch_size = imgs.size(0)
        rand_index = torch.randperm(batch_size)

        mixed_imgs = lam * imgs + (1 - lam) * imgs[rand_index, :]
        mixed_label = lam * label + (1 - lam) * label[rand_index, :]

        return mixed_imgs, mixed_label