def augmentAndMix(x_orig, k, alpha, preprocess): # k : number of chains # alpha : sampling constant x_temp = x_orig # back up for skip connection x_aug = torch.zeros_like(preprocess(x_orig)) mixing_weight_dist = Dirichlet(torch.empty(k).fill_(alpha)) mixing_weights = mixing_weight_dist.sample() for i in range(k): sampled_augs = random.sample(augmentations, k) aug_chain_length = random.choice(range(1, k + 1)) aug_chain = sampled_augs[:aug_chain_length] for aug in aug_chain: severity = random.choice(range(1, 6)) x_temp = aug(x_temp, severity) x_aug += mixing_weights[i] * preprocess(x_temp) skip_conn_weight_dist = Beta(torch.tensor([alpha]), torch.tensor([alpha])) skip_conn_weight = skip_conn_weight_dist.sample() x_augmix = skip_conn_weight * x_aug + ( 1 - skip_conn_weight) * preprocess(x_orig) return x_augmix
def __init__(self, model: nn.Module, optimizer: Optimizer, loss_f: Callable, temperature: float, beta: float, consistency_weight: float, *, reporters: Optional[_ReporterBase or List[_ReporterBase]] = None, scheduler: Optional[Scheduler] = None, verb=True, use_cudnn_benchmark=True, report_accuracy_topk: Optional[int or List[int]] = None, **kwargs): super(MixMatchTrainer, self).__init__(model, optimizer, loss_f, reporters=reporters, scheduler=scheduler, verb=verb, use_cudnn_benchmark=use_cudnn_benchmark, **kwargs) self.temperature = temperature self.beta = Beta(beta, beta) self.consistency_weight = consistency_weight if report_accuracy_topk is not None and not isinstance( report_accuracy_topk, Iterable): report_accuracy_topk = [report_accuracy_topk] self._report_topk = report_accuracy_topk
def __init__(self, model: nn.Module, optimizer: Optimizer, loss_f: Callable, consistency_weight: float, alpha: float, beta: float, *, reporters: Optional[_ReporterBase or List[_ReporterBase]] = None, scheduler: Optional[Scheduler] = None, verb=True, use_cudnn_benchmark=True, report_accuracy_topk: Optional[int or List[int]] = None, **kwargs): teacher = deepcopy(model) model = {'student': model, 'teacher': teacher} super(InterpolationConsistencyTrainer, self).__init__(model, optimizer, loss_f, reporters=reporters, scheduler=scheduler, verb=verb, use_cudnn_benchmark=use_cudnn_benchmark, **kwargs) self.consistency_weight = consistency_weight self.alpha = alpha self.beta = Beta(beta, beta) if report_accuracy_topk is not None and not isinstance( report_accuracy_topk, Iterable): report_accuracy_topk = [report_accuracy_topk] self._report_topk = report_accuracy_topk
def __getitem__(self, idx): # idx only acts as a counter while generating batches. prob = 0.5 * torch.ones([self.input_seq_len, self.seq_width], dtype=torch.float64) seq = Binomial(1, prob).sample() # Extra input channel for providing priority value input_seq = torch.zeros([self.input_seq_len, self.seq_width + 1]) input_seq[:self.input_seq_len, :self.seq_width] = seq # torch's Uniform function draws samples from the half-open interval # [low, high) but in the paper the priorities are drawn from [-1,1]. # This minor difference is being ignored here as supposedly it doesn't # affects the task. if not self.uniform: alpha = torch.tensor([2.0]) beta = torch.tensor([5.0]) if self.random_distr: alpha_beta_gen = Uniform(torch.tensor([0.0]), torch.tensor([100.0])) alpha = alpha_beta_gen.sample() beta = alpha_beta_gen.sample() priority = Beta(alpha, beta) else: priority = Uniform(torch.tensor([-1.0]), torch.tensor([1.0])) for i in range(self.input_seq_len): input_seq[i, self.seq_width] = priority.sample() sorted_index = torch.sort(input_seq[:, -1], descending=True)[1] target_seq = input_seq[sorted_index][:self.target_seq_len, :self. seq_width] return {'input': input_seq, 'target': target_seq}
def get_random_domainess(cur_iter, total_iter, batch): alpha = np.exp((cur_iter - (0.5 * total_iter)) / (0.25 * total_iter)) distribution = Beta(alpha, 1) z = distribution.sample((batch, 1)) z2 = z * torch.rand(1) output = torch.cat([1 - z, z2, z - z2], dim=1) return output
def train_step( self, sample, model, criterion, optimizer, update_num, ignore_grad=False): model.train() model.set_num_updates(update_num) shuffled_ids = np.array(list(range(len(sample["id"])))) np.random.shuffle(shuffled_ids) net_input_a = sample["net_input"] net_input_b = {"src_tokens": net_input_a["src_tokens"][shuffled_ids], "prev_output_tokens": net_input_a["prev_output_tokens"][shuffled_ids], "src_lengths": net_input_a["src_lengths"][shuffled_ids]} pair_sample = { "id": sample["id"], "nsentences": sample["nsentences"], "ntokens": sample["ntokens"], "net_input_a": net_input_a, "net_input_b": net_input_b, "target_a": sample["target"], "target_b": sample["target"][shuffled_ids], } dist = Beta(self.args.alpha, self.args.alpha) bsz = len(shuffled_ids) lambda_ = dist.sample(sample_shape=[bsz]).to("cuda") lambda_ = torch.max(lambda_, 1 - lambda_) if self.args.fp16: lambda_ = lambda_.half() loss, sample_size, logging_output = criterion(model, pair_sample, lambda_=lambda_) if ignore_grad: loss *= 0 optimizer.backward(loss) return loss, sample_size, logging_output
def mixup_data(x: torch.FloatTensor, y: torch.LongTensor, alpha: float = 1.0): if not len(x) == len(y): raise ValueError( "The size of `x` and `y` must match in the first dim.") if alpha > 0.: alpha = float(alpha) beta_dist = Beta(torch.tensor([alpha]), torch.tensor([alpha])) lam = beta_dist.sample().item() else: lam = 1. batch_size, num_channels, _, _ = x.size() index = torch.randperm(batch_size).to(x.device) # For WM811K, the input tensors `x` have two channels, where # the first channel has values of either one (for fail) or zero (for pass), # while the second channel has values of either one (for valid bins) or zeros (null bins). if num_channels == 2: mixed_x0 = \ lam * x[:, 0, :, :] + (1 - lam) * x[index, 0, :, :] # (B, H, W) mixed_x1 = (x[:, 1, :, :] + x[index, 1, :, :]) # (B, H, W) mixed_x1 = torch.clamp(mixed_x1, min=0, max=1) # (B, H, W) mixed_x = torch.stack([mixed_x0, mixed_x1], dim=1) # (B, 2, H, W) else: raise NotImplementedError y_a, y_b = y, y[index] return mixed_x, y_a, y_b, lam
def reinforce(env, policy_estimator, num_episodes=2000, batch_size=10, gamma=0.99): total_rewards = [] days_counter = [] batch_rewards = [] batch_states = [] batch_actions = [] counter = 0 ep = 0 days = 0 while ep < num_episodes: # print(ep) s_0 = env.reset() days = 0 states = [] rewards = [] actions = [] done = False while done == False: if days > 1000: print(days) processed_state = process(s_0, 50000) a, b = policy_estimator.foward(processed_state) distribution = Beta(a, b) action = distribution.sample().detach().numpy() s_1, r, done, _ = env.step(action) states.append(processed_state) rewards.append(r) actions.append(action) days += 1 counter += 1 s_0 = s_1 ep += 1 total_rewards.append(sum(rewards)) days_counter.append(days) if counter > 256 and done: # print("reached") returns = discount_rewards(rewards, gamma) batch_states.extend(states) batch_rewards.extend(returns) batch_actions.extend(actions) state_tensor = torch.FloatTensor(batch_states) reward_tensor = torch.FloatTensor(batch_rewards) a_tnsr, b_tnsr = policy_estimator.foward(state_tensor) action_tensor = torch.FloatTensor(batch_actions) policy_estimator.update(a_tnsr, b_tnsr, action_tensor, reward_tensor) batch_rewards = [] batch_actions = [] batch_states = [] counter = 0 # print("finished") return total_rewards, days_counter
def __init__(self, alpha=1.0, lam=RANDOM, reformulate=False): super(RMixup, self).__init__() self.alpha = alpha self.lam = lam self.reformulate = reformulate self.distrib = Beta(self.alpha, self.alpha) if not reformulate else Beta( self.alpha + 1, self.alpha)
def sample_action(self, s): s_T = T.tensor(s).unsqueeze(0) act = self.forward(s_T) c1 = F.sigmoid(act[:, :self.act_dim]) * 5 c2 = F.sigmoid(act[:, self.act_dim:]) * 5 beta_dist = Beta(c1, c2) rnd_act = beta_dist.sample() return rnd_act.detach().squeeze(0).numpy()
def select_action(self, state, deterministic, reparameterize=False): alpha, beta = self.forward(state) dist = Beta(concentration1=alpha, concentration0=beta) if reparameterize: action = dist.rsample() # (bsize, action_dim) else: action = dist.sample() # (bsize, action_dim) return action, dist
def test2(): """ beta distribution is a family of continuous random variables defined in the range of 0 and 1. :return: """ from torch.distributions.beta import Beta dist = Beta(torch.tensor([0.5]), torch.tensor(0.5)) dist.sample() # >>> tensor([0.0594])
def get_lambda(self, batch_size): """ Sample lambda given batch size. """ dist = Beta(self.args.alpha, self.args.alpha) lambda_ = dist.sample(sample_shape=[bsz]).to("cuda") lambda_ = torch.max(lambda_, 1 - lambda_) return lambda_
def __init__(self, k=3, alpha=1, severity=3): super(AugMix, self).__init__() self.k = k self.alpha = alpha self.severity = severity self.dirichlet = Dirichlet(torch.full(torch.Size([k]), alpha, dtype=torch.float32)) self.beta = Beta(alpha, alpha) self.augs = augmentations self.kl = nn.KLDivLoss(reduction='batchmean')
def __init__(self, alpha, num_classes): super(BatchMixupLayer, self).__init__() assert isinstance(alpha, float) assert isinstance(num_classes, int) self.alpha = alpha self.num_classes = num_classes self.Beta = Beta(self.alpha, self.alpha)
def update(self, a_tnsr, b_tnsr, action_tensor, reward_tensor): self.optimizer.zero_grad() m = Beta(a_tnsr, b_tnsr) log_probs = m.log_prob(action_tensor) log_probs = -1* torch.matmul(reward_tensor, log_probs) loss = log_probs.mean() # print(loss) loss.backward() self.optimizer.step() self.scheduler.step()
def log_probs(self, batch_states, batch_actions): # Get action means from policy act = self.forward(batch_states) # Calculate probabilities c1 = F.sigmoid(act[:, :, self.act_dim]) * 5 c2 = F.sigmoid(act[:, :, self.act_dim:]) * 5 beta_dist = Beta(c1, c2) log_probs = beta_dist.log_prob(batch_actions) return log_probs.sum(1, keepdim=True)
def __init__(self, alpha=1.0, lam=RANDOM): super(ManifoldMixup, self).__init__() self._layers = [] self._mixup_layers = None self.alpha = alpha self.lam = lam self.distrib = Beta(self.alpha, self.alpha) self.layer_names = [] self.depth = 0 self._layer_filter = [] self._layer_types = []
def calc_unnormalized_beta_cdf(self, b, alpha, beta, npts=100): bt = Beta(alpha.float(), beta.float()) x = torch.linspace(0 + self.epsilon, b - self.epsilon, int(npts * b.cpu().numpy()), device=self.device).float() pdf = bt.log_prob(x).exp() dx = torch.tensor([1. / (npts * self.num_classes)], device=self.device).float() P = pdf.sum(dim=1) * dx return P
def observe(self, move, reward): if isinstance(reward, torch.Tensor): reward = reward.item() alpha = (1-self.gamma)*self.rewards.concentration1\ + self.gamma*torch.ones(3) beta = (1-self.gamma)*self.rewards.concentration0\ + self.gamma*torch.ones(3) if reward == 1: alpha[move] += reward else: beta[move] -= reward self.rewards = Beta(alpha, beta)
def forward(self, nsmpl, return_z=False): zero = torch.zeros_like(self.ref) # Proper device. one = torch.ones_like(self.ref) # Proper device. # one = torch.ones_like(self.ref) # mix = 2 * Bernoulli(.2 * one[0]).sample([nsmpl]) - 1. # mu = torch.ger(mix, one) # Array of +/-1. # sd = one.expand([nsmpl,-1]) # z = Normal(mu, sd).sample() z = Normal(zero, one).sample([nsmpl]) a, b = self.detfwd(z) if return_z: return z, Beta(a, b).rsample() else: return Beta(a, b).rsample()
def step(self, input, target, teams): """Do one training step and return the loss.""" self.train() self.zero_grad() event_scores, time_scores = self.forward(input, teams) event_proba = F.softmax(event_scores, 2) time_proba = F.softmax(time_scores, 2) # Only get events during the games events_during_game, target_events_during_game, time_during_game, target_time_during_game, end_game_indices = get_during_game_tensors( event_scores, time_scores, target, return_end_game_idx=True) # Only get goals during the games goals_home_tensor, goals_home_target_tensor, goals_away_tensor, goals_away_target_tensor = get_during_game_goals( event_proba, target) goals_tensor = torch.stack([goals_home_tensor, goals_away_tensor], 1) goals_target_tensor = torch.stack( [goals_home_target_tensor, goals_away_target_tensor], 1) accuracy = torch.tensor(0) loss_result_game = torch.tensor(0) # Events and time loss functions loss_events_during_game = self.loss_function_events( events_during_game, target_events_during_game) loss_time_during_game = self.loss_function_time( time_during_game, target_time_during_game) # Compute loss for forcing not having too much events at the same minute time_proba_during_game = F.softmax(time_during_game, 1) beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR) log_prob = beta_distr.log_prob( time_proba_during_game[:, SAME_TIME_THAN_PREV]) same_minute_event_loss = -torch.mean(log_prob) #same_minute_event_loss = Variable(torch.tensor(0)) total_loss = (loss_events_during_game + loss_time_during_game + BETA_WEIGHT * same_minute_event_loss) / (2 + BETA_WEIGHT) total_loss.backward() self.optimizer.step() return event_proba, time_proba, total_loss.data.item( ), loss_events_during_game.data.item( ), loss_time_during_game.data.item(), same_minute_event_loss.item( ), loss_result_game.data.item(), accuracy.item()
def act(self, state_tensor ): # state is a batch of tensors rather than a joint state # value, mu, cov = self.value_action_predictor(state_tensor) # dist = MultivariateNormal(mu, cov) # actions = dist.sample() # action_log_probs = dist.log_prob(actions) # action_to_take = [ActionXY(action[0], action[1]) for action in actions.cpu().numpy()] value, alpha_beta_1, alpha_beta_2 = self.value_action_predictor( state_tensor) vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1]) vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1]) actions = torch.cat( [vx_dist.sample().unsqueeze(1), vy_dist.sample().unsqueeze(1)], dim=1) action_log_probs = vx_dist.log_prob( actions[:, 0]).unsqueeze(1) + vy_dist.log_prob( actions[:, 1]).unsqueeze(1) action_to_take = [ ActionXY(action[0] * 2 - 1, action[1] * 2 - 1) for action in actions.cpu().numpy() ] return value, actions, action_log_probs, action_to_take
def __init__(self, model_gp, likelihood_gp, hyperpriors: dict) -> None: self.model_gp = model_gp self.likelihood_gp = likelihood_gp self.hyperpriors = hyperpriors a_beta = self.hyperpriors["lengthscales"].kwds["a"] b_beta = self.hyperpriors["lengthscales"].kwds["b"] self.Beta_tmp = Beta(concentration1=a_beta, concentration0=b_beta) a_gg = self.hyperpriors["outputscale"].kwds["a"] b_gg = self.hyperpriors["outputscale"].kwds["scale"] self.Gamma_tmp = Gamma(concentration=a_gg, rate=1. / b_gg)
def predict_proba_and_get_loss(self, input, target, teams): event_scores, time_scores = self.forward(input, teams) # Get probabilities event_proba = F.softmax(event_scores, 2) time_proba = F.softmax(time_scores, 2) # Separate events from time target_events = target[:, :, 0] target_time = target[:, :, 1] # Only get events during the games events_during_game, target_events_during_game, time_during_game, target_time_during_game = get_during_game_tensors( event_scores, time_scores, target) # Only get goals during the games goals_home_tensor, goals_home_target_tensor, goals_away_tensor, goals_away_target_tensor = get_during_game_goals( event_proba, target) goals_tensor = torch.stack([goals_home_tensor, goals_away_tensor], 1) goals_target_tensor = torch.stack( [goals_home_target_tensor, goals_away_target_tensor], 1) games_proba = get_games_proba_from_goals_proba(goals_tensor) games_results = get_games_results_from_goals(goals_target_tensor) # Cross entropy loss for result, but don't use it in backwards loss_result_game = self.loss_function_result(games_proba, games_results) # Compute loss for forcing not having too much events at the same minute time_proba_during_game = F.softmax(time_during_game, 1) beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR) log_prob = beta_distr.log_prob( time_proba_during_game[:, SAME_TIME_THAN_PREV]) same_minute_event_loss = -torch.mean(log_prob) # Events and time loss functions loss_time_during_game = self.loss_function_time( time_during_game, target_time_during_game) loss_events_during_game = self.loss_function_events( events_during_game, target_events_during_game) total_loss = (loss_events_during_game + loss_time_during_game + BETA_WEIGHT * same_minute_event_loss) / (2 + BETA_WEIGHT) return event_proba, time_proba, total_loss.data.item( ), loss_events_during_game.data.item( ), loss_time_during_game.data.item(), same_minute_event_loss.data.item( ), loss_result_game.data.item()
def generate_data(num_obs): # domain = [False, True] prior = {'A': torch.tensor([1., 10.]), 'B': torch.tensor([[10., 1.], [1., 10.]]), 'C': torch.tensor([[10., 1.], [1., 10.]])} CPDs = {'p_A': Beta(prior['A'][0], prior['A'][1]).sample(), 'p_B': Beta(prior['B'][:, 0], prior['B'][:, 1]).sample(), 'p_C': Beta(prior['C'][:, 0], prior['C'][:, 1]).sample(), } data = {'A': Bernoulli(torch.ones(num_obs) * CPDs['p_A']).sample()} data['B'] = Bernoulli(torch.gather(CPDs['p_B'], 0, data['A'].type(torch.long))).sample() data['C'] = Bernoulli(torch.gather(CPDs['p_C'], 0, data['B'].type(torch.long))).sample() return prior, CPDs, data
def optimize(self, train_data, test_data, epochs=30, bsz=256): # The initial learning rates are set to avoid the parameters # to blow up. If they are higher no learning takes place. optimizer = \ torch.optim.Adam(self.parameters(), lr=0.001) sched = torch.optim.lr_scheduler.MultiStepLR(optimizer, [29]) batches = DataLoader(dataset=train_data, batch_size=bsz, shuffle=True) test_set = DataLoader(dataset=test_data, batch_size=bsz, shuffle=True) best = float('inf') for ep in range(epochs): batch_loss = 0.0 self.train() for bno, data in enumerate(batches): atac = torch.clamp(data[:, :50], min=.001, max=.9999) hic = data[:, 50:] # Shrink 5% of the entries. shrink = torch.ones_like(hic, device=data.device) idx = torch.rand(shrink.shape, device=data.device) < .05 shrink[idx] = torch.rand(shrink.shape, device=data.device)[idx] # Random factor. #rfact = .8 + .4 * torch.rand(1, device=data.device) (a, b) = self(hic * shrink) #(a,b) = self(hic) loss = -torch.mean(Beta(a, b).log_prob(atac)) batch_loss += float(loss) optimizer.zero_grad() loss.backward() optimizer.step() sched.step() # Test data. self.eval() with torch.no_grad(): test_rcst = 0.0 for sno, data in enumerate(test_set): hic = data[:, 50:] atac = torch.clamp(data[:, :50], min=.001, max=.9999) (a, b) = self(hic) test_rcst -= float(torch.mean(Beta(a, b).log_prob(atac))) # Print logs on stderr. if test_rcst / sno < best: best = test_rcst / sno sys.stderr.write('%d\t%f\t%f\t%f\n' % \ (ep, batch_loss / bno, test_rcst / sno, best))
class MixUp(Callback): _order = 90 #Runs after normalization and cuda def __init__(self, alpha=0.4): self.distrib = Beta(tensor([alpha]), tensor([alpha])) def begin_fit(self): self.old_loss_func, self.learn.loss_func = self.loss_func, self.loss_func def begin_batch(self): if not self.training: return #Only mixup things during training lam = self.distrib.sample( (self.yb.size(0), )).squeeze().to(self.xb.device) lam = torch.stack([lam, 1 - lam], 1) self.lam = lam.max(1)[0][:, None, None, None] shuffle = torch.randperm(self.yb.size(0)).to(self.xb.device) xb1, self.yb1 = self.xb[shuffle], self.yb[shuffle] self.learn.xb = torch.lerp(xb1, self.xb, self.lam) def after_fit(self): self.run.loss_func = self.old_loss_func def loss_func(self, pred, yb): if not self.in_train: return self.old_loss_func(pred, yb) with NoneReduce(self.old_loss_func) as loss_func: loss1 = loss_func(pred, yb) loss2 = loss_func(pred, self.yb1) loss = torch.lerp(loss2, loss1, self.lam) return reduce_loss(loss, getattr(self.old_loss_func, 'reduction', 'mean'))
def _rejection_sample_wood(loc: torch.Tensor, concentration: torch.Tensor, w: torch.Tensor): """ The acceptance-rejection sampling scheme from Wood (1994). Based on TensorFlow's implementation: https://github.com/tensorflow/probability/blob/v0.11.1/tensorflow_probability/python/distributions/von_mises_fisher.py#L421 and the implementation from "Spherical Latent Spaces for Stable Variational Autoencoders" by Jiacheng Xu, Greg Durrett https://github.com/jiacheng-xu/vmf_vae_nlp/blob/master/NVLL/distribution/vmf_only.py#L92 """ m = loc.shape[-1] b = (m - 1) / (2 * concentration + torch.sqrt((4 * (concentration**2)) + (m - 1)**2)) x = (1 - b) / (1 + b) c = concentration * x + (m - 1) * torch.log(1 - x**2) # Sampling should accept a scalar `w` for each training example. done = torch.zeros(w.shape, dtype=torch.bool, device=loc.device) while not done.all(): epsilon = Beta(0.5 * (m - 1), 0.5 * (m - 1)).sample(w.shape) w_prime = (1 - (1 + b) * epsilon) / (1 - (1 - b) * epsilon) u = Uniform(0.0 + 1e-6, 1.0).sample(w.shape) accept = concentration * w_prime + ( m - 1) * torch.log(1 - x * w_prime) - c >= torch.log(u) if accept.any(): w = torch.where(accept, w_prime, w) done = done | accept return w
class MixupBlending(BaseMiniBatchBlending): """Implementing Mixup in a mini-batch. This module is proposed in `mixup: Beyond Empirical Risk Minimization <https://arxiv.org/abs/1710.09412>`_. Code Reference https://github.com/open-mmlab/mmclassification/blob/master/mmcls/models/utils/mixup.py # noqa Args: num_classes (int): The number of classes. alpha (float): Parameters for Beta distribution. """ def __init__(self, num_classes, alpha=.2): super().__init__(num_classes=num_classes) self.beta = Beta(alpha, alpha) def do_blending(self, imgs, label, **kwargs): """Blending images with mixup.""" assert len(kwargs) == 0, f'unexpected kwargs for mixup {kwargs}' lam = self.beta.sample() batch_size = imgs.size(0) rand_index = torch.randperm(batch_size) mixed_imgs = lam * imgs + (1 - lam) * imgs[rand_index, :] mixed_label = lam * label + (1 - lam) * label[rand_index, :] return mixed_imgs, mixed_label