def act(self, state_tensor ): # state is a batch of tensors rather than a joint state # value, mu, cov = self.value_action_predictor(state_tensor) # dist = MultivariateNormal(mu, cov) # actions = dist.sample() # action_log_probs = dist.log_prob(actions) # action_to_take = [ActionXY(action[0], action[1]) for action in actions.cpu().numpy()] value, alpha_beta_1, alpha_beta_2 = self.value_action_predictor( state_tensor) vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1]) vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1]) actions = torch.cat( [vx_dist.sample().unsqueeze(1), vy_dist.sample().unsqueeze(1)], dim=1) action_log_probs = vx_dist.log_prob( actions[:, 0]).unsqueeze(1) + vy_dist.log_prob( actions[:, 1]).unsqueeze(1) action_to_take = [ ActionXY(action[0] * 2 - 1, action[1] * 2 - 1) for action in actions.cpu().numpy() ] return value, actions, action_log_probs, action_to_take
def __init__(self, alpha=1.0, lam=RANDOM, reformulate=False): super(RMixup, self).__init__() self.alpha = alpha self.lam = lam self.reformulate = reformulate self.distrib = Beta(self.alpha, self.alpha) if not reformulate else Beta( self.alpha + 1, self.alpha)
def forward(self, nsmpl, return_z=False): zero = torch.zeros_like(self.ref) # Proper device. one = torch.ones_like(self.ref) # Proper device. # one = torch.ones_like(self.ref) # mix = 2 * Bernoulli(.2 * one[0]).sample([nsmpl]) - 1. # mu = torch.ger(mix, one) # Array of +/-1. # sd = one.expand([nsmpl,-1]) # z = Normal(mu, sd).sample() z = Normal(zero, one).sample([nsmpl]) a, b = self.detfwd(z) if return_z: return z, Beta(a, b).rsample() else: return Beta(a, b).rsample()
def optimize_epoch(self, num_epochs): if self.optimizer is None: raise ValueError('Learning rate is not set!') if self.data_loader is None: # convert action into indices self.data_loader = DataLoader(self.memory, self.batch_size, shuffle=True) average_value_loss = 0 average_policy_loss = 0 for epoch in range(num_epochs): value_loss = 0 policy_loss = 0 logging.debug('{}-th epoch starts'.format(epoch)) for data in self.data_loader: inputs, values, _, actions = data self.optimizer.zero_grad() # # outputs_val, outputs_mu, outputs_cov = self.model(inputs) # action_log_probs = MultivariateNormal(outputs_mu, outputs_cov).log_prob(actions) outputs_val, alpha_beta_1, alpha_beta_2 = self.model(inputs) vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1]) vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1]) p = torch.Tensor([1 + 1e-6]).to(self.device) q = torch.Tensor([1e-8]).to(self.device) action_log_probs = (vx_dist.log_prob(actions[:, 0] / p + q)).unsqueeze(1) +\ (vy_dist.log_prob(actions[:, 1] / p + q)).unsqueeze(1) values = values.to(self.device) dist_entropy = vx_dist.entropy().mean() + vy_dist.entropy( ).mean() loss1 = self.criterion_val(outputs_val, values) loss2 = -action_log_probs.mean() loss = loss1 + loss2 - dist_entropy * self.entropy_coef # loss = loss1 + loss2 loss.backward() self.optimizer.step() value_loss += loss1.data.item() policy_loss += loss2.data.item() logging.debug('{}-th epoch ends'.format(epoch)) average_value_loss = value_loss / len(self.memory) average_policy_loss = policy_loss / len(self.memory) self.writer.add_scalar('IL/average_value_loss', average_value_loss, epoch) self.writer.add_scalar('IL/average_policy_loss', average_policy_loss, epoch) logging.info('Average value, policy loss in epoch %d: %.2E, %.2E', epoch, average_value_loss, average_policy_loss) return average_value_loss
def generate_data(num_obs): # domain = [False, True] prior = {'A': torch.tensor([1., 10.]), 'B': torch.tensor([[10., 1.], [1., 10.]]), 'C': torch.tensor([[10., 1.], [1., 10.]])} CPDs = {'p_A': Beta(prior['A'][0], prior['A'][1]).sample(), 'p_B': Beta(prior['B'][:, 0], prior['B'][:, 1]).sample(), 'p_C': Beta(prior['C'][:, 0], prior['C'][:, 1]).sample(), } data = {'A': Bernoulli(torch.ones(num_obs) * CPDs['p_A']).sample()} data['B'] = Bernoulli(torch.gather(CPDs['p_B'], 0, data['A'].type(torch.long))).sample() data['C'] = Bernoulli(torch.gather(CPDs['p_C'], 0, data['B'].type(torch.long))).sample() return prior, CPDs, data
def optimize(self, train_data, test_data, epochs=30, bsz=256): # The initial learning rates are set to avoid the parameters # to blow up. If they are higher no learning takes place. optimizer = \ torch.optim.Adam(self.parameters(), lr=0.001) sched = torch.optim.lr_scheduler.MultiStepLR(optimizer, [29]) batches = DataLoader(dataset=train_data, batch_size=bsz, shuffle=True) test_set = DataLoader(dataset=test_data, batch_size=bsz, shuffle=True) best = float('inf') for ep in range(epochs): batch_loss = 0.0 self.train() for bno, data in enumerate(batches): atac = torch.clamp(data[:, :50], min=.001, max=.9999) hic = data[:, 50:] # Shrink 5% of the entries. shrink = torch.ones_like(hic, device=data.device) idx = torch.rand(shrink.shape, device=data.device) < .05 shrink[idx] = torch.rand(shrink.shape, device=data.device)[idx] # Random factor. #rfact = .8 + .4 * torch.rand(1, device=data.device) (a, b) = self(hic * shrink) #(a,b) = self(hic) loss = -torch.mean(Beta(a, b).log_prob(atac)) batch_loss += float(loss) optimizer.zero_grad() loss.backward() optimizer.step() sched.step() # Test data. self.eval() with torch.no_grad(): test_rcst = 0.0 for sno, data in enumerate(test_set): hic = data[:, 50:] atac = torch.clamp(data[:, :50], min=.001, max=.9999) (a, b) = self(hic) test_rcst -= float(torch.mean(Beta(a, b).log_prob(atac))) # Print logs on stderr. if test_rcst / sno < best: best = test_rcst / sno sys.stderr.write('%d\t%f\t%f\t%f\n' % \ (ep, batch_loss / bno, test_rcst / sno, best))
def __init__(self, model: nn.Module, optimizer: Optimizer, loss_f: Callable, temperature: float, beta: float, consistency_weight: float, *, reporters: Optional[_ReporterBase or List[_ReporterBase]] = None, scheduler: Optional[Scheduler] = None, verb=True, use_cudnn_benchmark=True, report_accuracy_topk: Optional[int or List[int]] = None, **kwargs): super(MixMatchTrainer, self).__init__(model, optimizer, loss_f, reporters=reporters, scheduler=scheduler, verb=verb, use_cudnn_benchmark=use_cudnn_benchmark, **kwargs) self.temperature = temperature self.beta = Beta(beta, beta) self.consistency_weight = consistency_weight if report_accuracy_topk is not None and not isinstance( report_accuracy_topk, Iterable): report_accuracy_topk = [report_accuracy_topk] self._report_topk = report_accuracy_topk
def begin_batch(self): "Updates alpha as a function of the training percentage." # we do the partial application here (and not in the constructor) to avoid a pickle ambiguity error on learn.export # due to the fact that the partially applied function as the same name as the original function alpha = self.scheduler(self.alpha_min, self.alpha_max)(self.pct_train) self.distrib = Beta(tensor(alpha), tensor(alpha)) super().begin_batch()
def __init__(self, model: nn.Module, optimizer: Optimizer, loss_f: Callable, consistency_weight: float, alpha: float, beta: float, *, reporters: Optional[_ReporterBase or List[_ReporterBase]] = None, scheduler: Optional[Scheduler] = None, verb=True, use_cudnn_benchmark=True, report_accuracy_topk: Optional[int or List[int]] = None, **kwargs): teacher = deepcopy(model) model = {'student': model, 'teacher': teacher} super(InterpolationConsistencyTrainer, self).__init__(model, optimizer, loss_f, reporters=reporters, scheduler=scheduler, verb=verb, use_cudnn_benchmark=use_cudnn_benchmark, **kwargs) self.consistency_weight = consistency_weight self.alpha = alpha self.beta = Beta(beta, beta) if report_accuracy_topk is not None and not isinstance( report_accuracy_topk, Iterable): report_accuracy_topk = [report_accuracy_topk] self._report_topk = report_accuracy_topk
def __getitem__(self, idx): # idx only acts as a counter while generating batches. prob = 0.5 * torch.ones([self.input_seq_len, self.seq_width], dtype=torch.float64) seq = Binomial(1, prob).sample() # Extra input channel for providing priority value input_seq = torch.zeros([self.input_seq_len, self.seq_width + 1]) input_seq[:self.input_seq_len, :self.seq_width] = seq # torch's Uniform function draws samples from the half-open interval # [low, high) but in the paper the priorities are drawn from [-1,1]. # This minor difference is being ignored here as supposedly it doesn't # affects the task. if not self.uniform: alpha = torch.tensor([2.0]) beta = torch.tensor([5.0]) if self.random_distr: alpha_beta_gen = Uniform(torch.tensor([0.0]), torch.tensor([100.0])) alpha = alpha_beta_gen.sample() beta = alpha_beta_gen.sample() priority = Beta(alpha, beta) else: priority = Uniform(torch.tensor([-1.0]), torch.tensor([1.0])) for i in range(self.input_seq_len): input_seq[i, self.seq_width] = priority.sample() sorted_index = torch.sort(input_seq[:, -1], descending=True)[1] target_seq = input_seq[sorted_index][:self.target_seq_len, :self. seq_width] return {'input': input_seq, 'target': target_seq}
def get_distribution(self, params1, params2): if self.distribution == "normal": dist = Normal(params1, params2) elif self.distribution == "beta": dist = Beta(params1, params2) return dist
def probabilty_s_given_y(theta, s, y, l, k, ratio_agreement=0.95, model=1): if model == 1: eq = torch.eq(k.view(-1, 1).long(), y.long()).double().t() r = ratio_agreement * eq.squeeze() + (1 - ratio_agreement) * ( 1 - eq.squeeze()) eq = torch.stack([eq, 1 - eq]).squeeze().t() params = (theta * eq).sum(1) probability = 1 for i in range(k.shape[0]): m = Beta(r[i] * params[i] / (r[i] + 1), params[i] / (r[i] + 1)) probability *= torch.exp(m.log_prob( s[:, i].double(), )) * l[:, i].double() + (1 - l[:, i]).double() elif model == 2: eq = torch.eq(k.view(-1, 1).long(), y.long()).double().t() eq = torch.stack([eq, 1 - eq]).squeeze().t() params = (theta * eq).sum(1) probability = 1 for i in range(k.shape[0]): m = HalfNormal(params[i]) probability *= ( (1 - torch.exp(m.log_prob(s[:, i].double()))) * eq[i, 0] + (torch.exp(m.log_prob(s[:, i].double()))) * (1 - eq[i, 0])) * l[:, i].double() + (1 - l[:, i]).double() return probability
def mixup_data(x: torch.FloatTensor, y: torch.LongTensor, alpha: float = 1.0): if not len(x) == len(y): raise ValueError( "The size of `x` and `y` must match in the first dim.") if alpha > 0.: alpha = float(alpha) beta_dist = Beta(torch.tensor([alpha]), torch.tensor([alpha])) lam = beta_dist.sample().item() else: lam = 1. batch_size, num_channels, _, _ = x.size() index = torch.randperm(batch_size).to(x.device) # For WM811K, the input tensors `x` have two channels, where # the first channel has values of either one (for fail) or zero (for pass), # while the second channel has values of either one (for valid bins) or zeros (null bins). if num_channels == 2: mixed_x0 = \ lam * x[:, 0, :, :] + (1 - lam) * x[index, 0, :, :] # (B, H, W) mixed_x1 = (x[:, 1, :, :] + x[index, 1, :, :]) # (B, H, W) mixed_x1 = torch.clamp(mixed_x1, min=0, max=1) # (B, H, W) mixed_x = torch.stack([mixed_x0, mixed_x1], dim=1) # (B, 2, H, W) else: raise NotImplementedError y_a, y_b = y, y[index] return mixed_x, y_a, y_b, lam
def get_random_domainess(cur_iter, total_iter, batch): alpha = np.exp((cur_iter - (0.5 * total_iter)) / (0.25 * total_iter)) distribution = Beta(alpha, 1) z = distribution.sample((batch, 1)) z2 = z * torch.rand(1) output = torch.cat([1 - z, z2, z - z2], dim=1) return output
def augmentAndMix(x_orig, k, alpha, preprocess): # k : number of chains # alpha : sampling constant x_temp = x_orig # back up for skip connection x_aug = torch.zeros_like(preprocess(x_orig)) mixing_weight_dist = Dirichlet(torch.empty(k).fill_(alpha)) mixing_weights = mixing_weight_dist.sample() for i in range(k): sampled_augs = random.sample(augmentations, k) aug_chain_length = random.choice(range(1, k + 1)) aug_chain = sampled_augs[:aug_chain_length] for aug in aug_chain: severity = random.choice(range(1, 6)) x_temp = aug(x_temp, severity) x_aug += mixing_weights[i] * preprocess(x_temp) skip_conn_weight_dist = Beta(torch.tensor([alpha]), torch.tensor([alpha])) skip_conn_weight = skip_conn_weight_dist.sample() x_augmix = skip_conn_weight * x_aug + ( 1 - skip_conn_weight) * preprocess(x_orig) return x_augmix
def forward(self, ob, mu, K, sampled=True, z_old=None, beta_old=None): q = probtorch.Trace() S, B, N, D = ob.shape ob_mu = ob.unsqueeze(2).repeat( 1, 1, K, 1, 1) - mu.unsqueeze(-2).repeat(1, 1, 1, N, 1) q_probs = F.softmax( self.pi_log_prob(ob_mu).squeeze(-1).transpose(-1, -2), -1) if sampled: z = cat(q_probs).sample() _ = q.variable(cat, probs=q_probs, value=z, name='states') mu_expand = torch.gather( mu, -2, z.argmax(-1).unsqueeze(-1).repeat(1, 1, 1, D)) q_angle_con1 = self.angle_log_con1(ob - mu_expand).exp() q_angle_con0 = self.angle_log_con0(ob - mu_expand).exp() beta = Beta(q_angle_con1, q_angle_con0).sample() q.beta(q_angle_con1, q_angle_con0, value=beta, name='angles') else: _ = q.variable(cat, probs=q_probs, value=z_old, name='states') mu_expand = torch.gather( mu, -2, z_old.argmax(-1).unsqueeze(-1).repeat(1, 1, 1, D)) q_angle_con1 = self.angle_log_con1(ob - mu_expand).exp() q_angle_con0 = self.angle_log_con0(ob - mu_expand).exp() q.beta(q_angle_con1, q_angle_con0, value=beta_old, name='angles') return q
def _rejection_sample_wood(loc: torch.Tensor, concentration: torch.Tensor, w: torch.Tensor): """ The acceptance-rejection sampling scheme from Wood (1994). Based on TensorFlow's implementation: https://github.com/tensorflow/probability/blob/v0.11.1/tensorflow_probability/python/distributions/von_mises_fisher.py#L421 and the implementation from "Spherical Latent Spaces for Stable Variational Autoencoders" by Jiacheng Xu, Greg Durrett https://github.com/jiacheng-xu/vmf_vae_nlp/blob/master/NVLL/distribution/vmf_only.py#L92 """ m = loc.shape[-1] b = (m - 1) / (2 * concentration + torch.sqrt((4 * (concentration**2)) + (m - 1)**2)) x = (1 - b) / (1 + b) c = concentration * x + (m - 1) * torch.log(1 - x**2) # Sampling should accept a scalar `w` for each training example. done = torch.zeros(w.shape, dtype=torch.bool, device=loc.device) while not done.all(): epsilon = Beta(0.5 * (m - 1), 0.5 * (m - 1)).sample(w.shape) w_prime = (1 - (1 + b) * epsilon) / (1 - (1 - b) * epsilon) u = Uniform(0.0 + 1e-6, 1.0).sample(w.shape) accept = concentration * w_prime + ( m - 1) * torch.log(1 - x * w_prime) - c >= torch.log(u) if accept.any(): w = torch.where(accept, w_prime, w) done = done | accept return w
def train_step( self, sample, model, criterion, optimizer, update_num, ignore_grad=False): model.train() model.set_num_updates(update_num) shuffled_ids = np.array(list(range(len(sample["id"])))) np.random.shuffle(shuffled_ids) net_input_a = sample["net_input"] net_input_b = {"src_tokens": net_input_a["src_tokens"][shuffled_ids], "prev_output_tokens": net_input_a["prev_output_tokens"][shuffled_ids], "src_lengths": net_input_a["src_lengths"][shuffled_ids]} pair_sample = { "id": sample["id"], "nsentences": sample["nsentences"], "ntokens": sample["ntokens"], "net_input_a": net_input_a, "net_input_b": net_input_b, "target_a": sample["target"], "target_b": sample["target"][shuffled_ids], } dist = Beta(self.args.alpha, self.args.alpha) bsz = len(shuffled_ids) lambda_ = dist.sample(sample_shape=[bsz]).to("cuda") lambda_ = torch.max(lambda_, 1 - lambda_) if self.args.fp16: lambda_ = lambda_.half() loss, sample_size, logging_output = criterion(model, pair_sample, lambda_=lambda_) if ignore_grad: loss *= 0 optimizer.backward(loss) return loss, sample_size, logging_output
def reinforce(env, policy_estimator, num_episodes=2000, batch_size=10, gamma=0.99): total_rewards = [] days_counter = [] batch_rewards = [] batch_states = [] batch_actions = [] counter = 0 ep = 0 days = 0 while ep < num_episodes: # print(ep) s_0 = env.reset() days = 0 states = [] rewards = [] actions = [] done = False while done == False: if days > 1000: print(days) processed_state = process(s_0, 50000) a, b = policy_estimator.foward(processed_state) distribution = Beta(a, b) action = distribution.sample().detach().numpy() s_1, r, done, _ = env.step(action) states.append(processed_state) rewards.append(r) actions.append(action) days += 1 counter += 1 s_0 = s_1 ep += 1 total_rewards.append(sum(rewards)) days_counter.append(days) if counter > 256 and done: # print("reached") returns = discount_rewards(rewards, gamma) batch_states.extend(states) batch_rewards.extend(returns) batch_actions.extend(actions) state_tensor = torch.FloatTensor(batch_states) reward_tensor = torch.FloatTensor(batch_rewards) a_tnsr, b_tnsr = policy_estimator.foward(state_tensor) action_tensor = torch.FloatTensor(batch_actions) policy_estimator.update(a_tnsr, b_tnsr, action_tensor, reward_tensor) batch_rewards = [] batch_actions = [] batch_states = [] counter = 0 # print("finished") return total_rewards, days_counter
def select_action(self, state, deterministic, reparameterize=False): alpha, beta = self.forward(state) dist = Beta(concentration1=alpha, concentration0=beta) if reparameterize: action = dist.rsample() # (bsize, action_dim) else: action = dist.sample() # (bsize, action_dim) return action, dist
def test2(): """ beta distribution is a family of continuous random variables defined in the range of 0 and 1. :return: """ from torch.distributions.beta import Beta dist = Beta(torch.tensor([0.5]), torch.tensor(0.5)) dist.sample() # >>> tensor([0.0594])
def sample_action(self, s): s_T = T.tensor(s).unsqueeze(0) act = self.forward(s_T) c1 = F.sigmoid(act[:, :self.act_dim]) * 5 c2 = F.sigmoid(act[:, self.act_dim:]) * 5 beta_dist = Beta(c1, c2) rnd_act = beta_dist.sample() return rnd_act.detach().squeeze(0).numpy()
def __init__(self, k=3, alpha=1, severity=3): super(AugMix, self).__init__() self.k = k self.alpha = alpha self.severity = severity self.dirichlet = Dirichlet(torch.full(torch.Size([k]), alpha, dtype=torch.float32)) self.beta = Beta(alpha, alpha) self.augs = augmentations self.kl = nn.KLDivLoss(reduction='batchmean')
def get_lambda(self, batch_size): """ Sample lambda given batch size. """ dist = Beta(self.args.alpha, self.args.alpha) lambda_ = dist.sample(sample_shape=[bsz]).to("cuda") lambda_ = torch.max(lambda_, 1 - lambda_) return lambda_
def generate_data(num_obs): # domain = [False, True] prior = { "A": torch.tensor([1.0, 10.0]), "B": torch.tensor([[10.0, 1.0], [1.0, 10.0]]), "C": torch.tensor([[10.0, 1.0], [1.0, 10.0]]), } CPDs = { "p_A": Beta(prior["A"][0], prior["A"][1]).sample(), "p_B": Beta(prior["B"][:, 0], prior["B"][:, 1]).sample(), "p_C": Beta(prior["C"][:, 0], prior["C"][:, 1]).sample(), } data = {"A": Bernoulli(torch.ones(num_obs) * CPDs["p_A"]).sample()} data["B"] = Bernoulli( torch.gather(CPDs["p_B"], 0, data["A"].type(torch.long))).sample() data["C"] = Bernoulli( torch.gather(CPDs["p_C"], 0, data["B"].type(torch.long))).sample() return prior, CPDs, data
def get_log_probs(pi, actions, dist_type): if dist_type == 'gauss': mean, std = pi log_prob = Normal(mean, std).log_prob(actions) elif dist_type == 'beta': alpha, beta = pi log_prob = Beta(alpha, beta).log_prob(actions) return log_prob
def select_actions(pi, dist_type): if dist_type == 'gauss': mean, std = pi actions = Normal(mean, std).sample() elif dist_type == 'beta': alpha, beta = pi actions = Beta(alpha.detach().cpu(), beta.detach().cpu()).sample() return actions.detach().cpu().numpy()[0]
def __init__(self, alpha, num_classes): super(BatchMixupLayer, self).__init__() assert isinstance(alpha, float) assert isinstance(num_classes, int) self.alpha = alpha self.num_classes = num_classes self.Beta = Beta(self.alpha, self.alpha)
def __call__(self, sample: Dict[str, Any]) -> Dict[str, Any]: """ Args: sample: the batch data. """ assert len(sample["target"]) % 2 == 0, "Batch size should be even" if torch.is_tensor(sample["input"]) and sample["input"].ndim == 4: # This is the simple case of image data batch (i.e. 4D tensor). # We support more advanved joint mixup and cutmix in this case. if self.mode == "elem": lam = self._mix_elem(sample["input"]) elif self.mode == "pair": lam = self._mix_pair(sample["input"]) else: lam = self._mix_batch(sample["input"]) sample["target"] = mixup_target( sample["target"], self.num_classes, lam=lam, smoothing=self.label_smoothing, ) else: # This is the complex case of video data batch (i.e. 5D tensor) or more complex # data batch. We only support mixup augmentation in batch mode. if sample["target"].ndim == 1: assert ( self.num_classes is not None ), "num_classes is expected for 1D target" off_value = self.label_smoothing / self.num_classes on_value = 1.0 - self.label_smoothing + off_value sample["target"] = one_hot( sample["target"], self.num_classes, on_value=on_value, off_value=off_value, device=sample["target"].device, ) else: assert ( sample["target"].ndim == 2 ), "target tensor shape must be 1D or 2D" c = Beta(self.mixup_alpha, self.mixup_alpha).sample() sample["target"] = c * sample["target"] + (1.0 - c) * sample["target"].flip( 0 ) sample["input"] = _recursive_mixup(sample["input"], c) return sample
def log_probs(self, batch_states, batch_actions): # Get action means from policy act = self.forward(batch_states) # Calculate probabilities c1 = F.sigmoid(act[:, :, self.act_dim]) * 5 c2 = F.sigmoid(act[:, :, self.act_dim:]) * 5 beta_dist = Beta(c1, c2) log_probs = beta_dist.log_prob(batch_actions) return log_probs.sum(1, keepdim=True)