def kmeans(x, k): x = torch.tensor(x, dtype=torch.float) # initialize k centroids randomly c, old = x[torch.randperm(len(x))[:k]], None # assign labels to each datapoint based on centroids dists, y = torch.abs_(x.unsqueeze(-1) - c).min(dim=-1) while old is None or not c.equal(old): # handle the empty clusters for i in range(k): # choose the farthest datapoint from the biggest cluster # and move that the empty cluster if not y.eq(i).any(): mask = y.eq(torch.arange(k).unsqueeze(-1)) lens = mask.sum(dim=-1) biggest = mask[lens.argmax()].nonzero().view(-1) farthest = dists[biggest].argmax() y[biggest[farthest]] = i # update the centroids c, old = torch.tensor([x[y.eq(i)].mean() for i in range(k)]), c # re-assign all datapoints to clusters dists, y = torch.abs_(x.unsqueeze(-1) - c).min(dim=-1) clusters = [y.eq(i) for i in range(k)] clusters = [i.nonzero().view(-1).tolist() for i in clusters if i.any()] centroids = [round(x[i].mean().item()) for i in clusters] return centroids, clusters
def structure_weighted_binary_cross_entropy_with_logits( input, target: torch.Tensor): target_pad = F.pad(target, [10, 10, 10, 10], mode='circular') weit = torch.abs( F.avg_pool2d(target_pad, kernel_size=21, stride=1, padding=0) - target) b, c, h, w = weit.shape weit = ( weit - weit.view(b, c, -1).min(dim=-1, keepdim=True)[0].unsqueeze(-1) ) / (1e-6 + weit.view(b, c, -1).max(dim=-1, keepdim=True)[0].unsqueeze(-1) - weit.view(b, c, -1).min(dim=-1, keepdim=True)[0].unsqueeze(-1)) dx = F.conv2d(F.pad(target, [1, 1, 0, 0], mode='reflect'), torch.FloatTensor([-0.5, 0, 0.5]).view(1, 1, 1, 3).to(target.device), stride=1, padding=0) dy = F.conv2d(F.pad(target, [0, 0, 1, 1], mode='reflect'), torch.FloatTensor([-0.5, 0, 0.5]).view(1, 1, 3, 1).to(target.device), stride=1, padding=0) torch.abs_(dx) torch.abs_(dy) edge_info = (dx + dy) > 0.4 weit[edge_info] = 0.0 weit = 1 + Configuration.instance().S_LOSS_GAMA * weit wbce = F.binary_cross_entropy_with_logits(input, target, reduction='none') wbce = (weit * wbce) return wbce.sum()
def __call__(self, input): output = fn.conv2d(input, self.kernels, padding = self.padding).float() if not(self.thresholds is None): output = torch.where(output < self.thresholds, torch.tensor(0.0, device=output.device), output) if self.use_abs: torch.abs_(output) return output
def __call__(self, real, fake): real_grad_h, real_grad_v = self.forward(real) fake_grad_h, fake_grad_v = self.forward(fake) diff_hv = torch.abs_(real_grad_h - fake_grad_h) + torch.abs_(real_grad_v - fake_grad_v) return torch.mean(diff_hv)
def safe_real_exp(values: torch.Tensor) -> torch.Tensor: assert values.dim() == 2 and values.size(1) == 2 amplitude = values[:, 0] amplitude -= torch.max(amplitude) torch.exp_(amplitude) phase = values[:, 1] phase /= 3.141592653589793 torch.round_(phase) torch.abs_(phase) phase = torch.fmod(phase, 2.0) return amplitude * (1.0 - 2.0 * phase)
def kmeans(x, k): """ Parameters ---------- x : list Lengths of sentences k : int Number of clusters Returns ------- centroids : list Average lengths in each cluster clusters : list List of clusters, which hold indices of data points """ x = torch.tensor(x, dtype=torch.float) # count the frequency of each datapoint d, indices, f = x.unique(return_inverse=True, return_counts=True) # calculate the sum of the values of the same datapoints total = d * f # the number of clusters must not be greater than that of datapoints k = min(len(d), k) # initialize k centroids randomly c, old = d[torch.randperm(len(d))[:k]], None # assign labels to each datapoint based on centroids dists, y = torch.abs_(d.unsqueeze(-1) - c).min(dim=-1) while old is None or not c.equal(old): # if an empty cluster is encountered, # choose the farthest datapoint from the biggest cluster # and move that the empty one for i in range(k): if not y.eq(i).any(): mask = y.eq(torch.arange(k).unsqueeze(-1)) lens = mask.sum(dim=-1) biggest = mask[lens.argmax()].nonzero().view(-1) farthest = dists[biggest].argmax() y[biggest[farthest]] = i mask = y.eq(torch.arange(k).unsqueeze(-1)) # update the centroids c, old = (total * mask).sum(-1) / (f * mask).sum(-1), c # re-assign all datapoints to clusters dists, y = torch.abs_(d.unsqueeze(-1) - c).min(dim=-1) # assign all datapoints to the new-generated clusters # without considering the empty ones y, assigned = y[indices], y.unique().tolist() # get the centroids of the assigned clusters centroids = c[assigned].tolist() # map all values of datapoints to buckets clusters = [torch.where(y.eq(i))[0].tolist() for i in assigned] return centroids, clusters
def w_distance(P, Q): batch_size = P.shape[0] length = P.shape[1] result = torch.zeros(batch_size).cuda() for i in range(batch_size): delta = torch.tensor(0).cuda() delta_lst = torch.zeros(length).cuda() for j in range(length): delta = delta + P[i][j] - Q[i][j] delta_lst[j] = delta torch.abs_(delta_lst) out = torch.sum(delta_lst) result[i] = out return torch.sum(result)
def gen_trumap_viz( self, target_text: str, target_pred_ind: int, stmt_pred: float, stmt_embed: torch.Tensor, stmt_pred_matrix: torch.Tensor, stmt_embed_matrix: torch.Tensor) -> Tuple[str, List[Tuple]]: embed_compare = stmt_embed.expand_as(stmt_embed_matrix) l2_norm_distance = torch.nn.PairwiseDistance(p=2) embed_sims = (l2_norm_distance(stmt_embed_matrix, embed_compare)) embed_sims = embed_sims / torch.norm(embed_sims) stmt_pred = torch.tensor(stmt_pred).to( device=self.interpret_session.device) stmt_compare = stmt_pred.expand_as(stmt_pred_matrix) pred_sims = torch.abs_(stmt_pred_matrix - stmt_compare) _, sid_idx = torch.topk( (embed_sims + pred_sims), k=self.interpret_session.config.inference.trumap_topk, largest=False) topk_sim_full_embeds = torch.cat([ stmt_embed_matrix[sid_idx], stmt_embed.unsqueeze(0), stmt_embed_matrix[ self.interpret_session.max_pred_idx].unsqueeze(0), stmt_embed_matrix[self.interpret_session.min_pred_idx].unsqueeze(0) ]).tolist() trumap_spectrum, target_token_tup, umap_bounds_tup = \ prep_trumap_inputs(self.interpret_session, sid_idx, topk_sim_full_embeds, target_text, target_pred_ind) ss_image_path = self.plot_trumap_spectrum(trumap_spectrum, target_token_tup, umap_bounds_tup) return ss_image_path, trumap_spectrum
def BackHook(self, GradInput, GradOutput): # takes to absolute max and min of the gradient global Max global Min Current_Grad_Max = torch.abs_(torch.max(GradInput[0])) Current_Grad_Min = torch.abs_(torch.min(GradInput[0])) if torch.abs_(Max[0].to(device)) < Current_Grad_Max.to(device): print(Max[0].to(device), Current_Grad_Max.to(device)) Max.clear() Max.append(Current_Grad_Max.to(device)) if torch.abs_(Min[0].to(device)) > Current_Grad_Min.to(device): Min.clear() Min.append(Current_Grad_Min.to(device))
def coin_flip(z, actfun, M, k): shuffle_map = torch.empty(int(M / k), dtype=torch.long).random_(k) z = z[:, torch.arange(z.size(1)), shuffle_map, ...] if actfun == 'cf_relu': return F.relu_(z) elif actfun == 'cf_abs': return torch.abs_(z)
def forward(self, x): out = self.conv1(x) out = F.leaky_relu(out) out = torch.log_(1 + torch.abs_(out)) out = self.conv2(out) out = F.leaky_relu(self.bn2(out)) out = F.avg_pool1d(out, kernel_size=7, padding=0, stride=3) out1 = self.layer1(out) out2 = self.layer2(out1) out3 = self.layer3(out2) out4 = self.layer4(out3) out5 = self.layer5(out4) # skip1 = F.avg_pool1d(out1, 15, stride=15) # skip2 = F.avg_pool1d(out2, 7, stride=7) skip3 = F.avg_pool1d(out3, 4, stride=4, padding=1) skip4 = F.avg_pool1d(out4, 3, stride=2, padding=1) out = torch.cat([out5, skip4, skip3], 1) out = out[:, :, 1:-1] # обрезаем концы, чтобы padding не вызывал глюков out = F.avg_pool1d(out, 26) out = out.view(out.size(0), -1) out = self.linear(F.dropout(out, training=self.training, p=0.25)) out = self.logsoftmax(out) return out
def forward(self, x: Tensor) -> Tensor: pos_idx = torch.gt(x, 0.) y = torch.zeros_like(x) if x.is_cuda: if x.numel() < mnn_config.get_value('cpu_or_gpu'): device = x.device temp = torch.from_numpy( math.pi / 2 * scipy.erfi(x[pos_idx].cpu().numpy())).to(device=device) else: temp = math.pi / 2 * self.erfi(x[pos_idx]) else: temp = torch.from_numpy(math.pi / 2 * scipy.erfi(x[pos_idx].numpy())) idx = torch.bitwise_or(torch.lt(x, self.cheb_xmin_for_G), torch.gt(x, -self.cheb_xmin_for_G)) y[idx] = self.integrate_asym_neg_inf(-torch.abs(x[idx])) idx.bitwise_not_() y[idx] = chebyshev_val_no_transform(-torch.abs_(x[idx]), self.cheb_G_neg, x_min=self.cheb_xmin_for_G, x_max=0., num_sub=self.div) y[pos_idx] += temp return y
def forward(self, pred, gt): # one_hot = torch.zeros(pred.shape).cuda() one_hot = pred.data.clone().mul_(0.0) for i in range(0, self.args.num_classes): one_hot[:, i:i + 1, ...].add_((gt == i).float()) # ignore the segment boundary one_hot[:, i:i + 1, ...].mul_( (gt != self.args.ignore_index).float()) diff = torch.abs_(one_hot.sub_(pred.detach())) diff = torch.sum(diff, dim=1, keepdim=True).div_(2) diff = self.blur(diff) diff = self.dilate(diff) diff = self.reblur(diff) # normlize to 1 for each samples dmax = diff.max(dim=3, keepdim=True)[0].max( dim=2, keepdim=True)[0].max(dim=1, keepdim=True)[0] dmin = diff.min(dim=3, keepdim=True)[0].min( dim=2, keepdim=True)[0].min(dim=1, keepdim=True)[0] diff.sub_(dmin).div_(dmax - dmin + 1e-9) flawmap_gt = diff return flawmap_gt
def __init__(self, n_params, num_samples): super().__init__() self.N = n_params self.a = nn.Linear(1, self.N, bias=False) torch.exp_(self.a.weight.data) self.b = nn.Linear(1, self.N, bias=False) torch.abs_(self.b.weight.data) self.pz = MultivariateNormal(torch.zeros(self.N), torch.eye(self.N)) # latent distribution # self.px = Uniform(-1*torch.ones(self.N) + 3, 3*torch.ones(self.N)) # target distribution self.px = MultivariateNormal( torch.zeros(self.N) + 4, 3 * torch.eye(self.N)) # target distribution self.num_samples = num_samples
def r_proj_loss(z_online, z_target, r_online, r_target): z_online = F.normalize(z_online, dim=-1, p=2) z_target = F.normalize(z_target, dim=-1, p=2) z_dist = 2 - 2 * (z_online * z_target).sum(dim=-1) r_dist = torch.abs_(r_online - r_target).squeeze() return (z_dist - r_dist).mean()
def __call__(self, targets, scores): loss = torch.zeros(1) loss.requires_grad = True loss = torch.mean( torch.remainder( torch.abs_(scores[:, -self.resp_dur:, :] - targets[:, -self.resp_dur:, :]), np.pi)) return loss
def kmeans(x, k, max_it=32): """From https://github.com/yzhangcs/parser/blob/main/supar/utils/alg.py#L7""" # the number of clusters must not be greater than the number of datapoints x, k = torch.tensor(x, dtype=torch.float), min(len(x), k) # collect unique datapoints d = x.unique() # initialize k centroids randomly c = d[torch.randperm(len(d))[:k]] # assign each datapoint to the cluster with the closest centroid dists, y = torch.abs_(x.unsqueeze(-1) - c).min(-1) for _ in range(max_it): # if an empty cluster is encountered, # choose the farthest datapoint from the biggest cluster and move that the empty one mask = torch.arange(k).unsqueeze(-1).eq(y) none = torch.where(~mask.any(-1))[0].tolist() while len(none) > 0: for i in none: # the biggest cluster b = torch.where(mask[mask.sum(-1).argmax()])[0] # the datapoint farthest from the centroid of cluster b f = dists[b].argmax() # update the assigned cluster of f y[b[f]] = i # re-calculate the mask mask = torch.arange(k).unsqueeze(-1).eq(y) none = torch.where(~mask.any(-1))[0].tolist() # update the centroids c, old = (x * mask).sum(-1) / mask.sum(-1), c # re-assign all datapoints to clusters dists, y = torch.abs_(x.unsqueeze(-1) - c).min(-1) # stop iteration early if the centroids converge if c.equal(old): break # assign all datapoints to the new-generated clusters # the empty ones are discarded assigned = y.unique().tolist() # get the centroids of the assigned clusters centroids = c[assigned].tolist() # map all values of datapoints to buckets clusters = [torch.where(y.eq(i))[0].tolist() for i in assigned] return centroids, clusters
def kmeans(x, k, crf=False): x = torch.tensor(x, dtype=torch.float) # count the frequency of each datapoint d, indices, f = x.unique(return_inverse=True, return_counts=True) # calculate the sum of the values of the same datapoints total = d * f # initialize k centroids randomly c, old = d[torch.randperm(len(d))[:k]], None # assign labels to each datapoint based on centroids dists, y = torch.abs_(d.unsqueeze(-1) - c).min(dim=-1) # make sure number of datapoints is greater than that of clusters if crf: k = len(d) if len(d) < k: raise AssertionError(f"unable to assign {len(d)} datapoints to " f"{k} clusters") while old is None or not c.equal(old): # if an empty cluster is encountered, # choose the farthest datapoint from the biggest cluster # and move that the empty one for i in range(k): if not y.eq(i).any(): mask = y.eq(torch.arange(k).unsqueeze(-1)) lens = mask.sum(dim=-1) biggest = mask[lens.argmax()].nonzero().view(-1) farthest = dists[biggest].argmax() y[biggest[farthest]] = i mask = y.eq(torch.arange(k).unsqueeze(-1)) # update the centroids c, old = (total * mask).sum(-1) / (f * mask).sum(-1), c # re-assign all datapoints to clusters dists, y = torch.abs_(d.unsqueeze(-1) - c).min(dim=-1) # assign all datapoints to the new-generated clusters # without considering the empty ones y, assigned = y[indices], y.unique().tolist() # get the centroids of the assigned clusters centroids = c[assigned].tolist() # map all values of datapoints to buckets clusters = [torch.where(y.eq(i))[0].tolist() for i in assigned] return centroids, clusters
def wingLoss(pred, target, w=10.0, epsilon=2.0): w, epsilon = torch.FloatTensor([w]).cuda(), torch.FloatTensor([epsilon ]).cuda() dis = torch.abs_(pred - target) isSmall = dis <= w isLarge = dis > w small_loss = w * torch.log((isSmall * dis) / epsilon + 1) large_loss = isLarge * dis - w * (1 - torch.log(1 + w / epsilon)) loss = small_loss + large_loss * isLarge loss = torch.mean(torch.sum(loss, dim=1), dim=0) return loss
def target_fn(targets, shape): count_targets = (shape[-1] // 15) * (shape[-2] // 15) device = targets.device winners_z = torch.ones_like(targets, dtype=torch.float) targets = targets - 225 winners_z[targets < 0] = -1.0 winners_z[targets > 0] = 1.0 winners_z.unsqueeze_(-1) targets = torch.abs_(targets) - 1 winners_z = torch.cat([winners_z for _ in range(count_targets)], dim=0) targets = torch.cat([targets for _ in range(count_targets)], dim=0) return [targets.to(device=device), winners_z.to(device=device)]
def embeddings_to_cosine_similarity(E, sigma=1.0): ''' Build a pairwise symmetrical cosine similarity matrix diganal is set as zero ''' dot = torch.abs_(torch.mm(E, E.t())) # E[i]E[j] norm = torch.norm(E, 2, 1) # ||E[i]|| x = torch.div(dot, norm) # E[i]E[j]/||E[j]|| x = torch.div(x, torch.unsqueeze(norm, 0)) # E[i]E[j]/(||E[j]||*||E[i]||) x = x.div_(sigma) return torch.max(x, x.t()).fill_diagonal_(0)
def backward(ctx, grad_incoming): x, t, q, s = ctx.saved_tensors t_shape = [*t.size()] + [1 for _ in range(x.dim())] # dimensions with size 1 enable broadcasting x_minus_t = x - t.reshape(t_shape) if s[1] != 0.: sb_inv = 1 / s[1] pdf = (torch.abs_(x_minus_t) <= s[1]).float() * (0.5 * sb_inv) else: pdf = torch.zeros_like(grad_incoming) d = q[1:] - q[:-1] local_jacobian = torch.sum(d.reshape(t_shape) * pdf, 0) grad_outgoing = grad_incoming * local_jacobian return grad_outgoing, None, None, None, None
def pairwise_cosine_similarity(E, C, temp=1.0): ''' Build a pairwise cosine similarity matrix. ''' dot = torch.abs_(torch.mm(E, C.t())) # E[i]C[j] # dot = torch.mm(E, C.t()) # E[i]C[j] # print(dot.size()) E = torch.norm(E, 2, 1) # ||E[j]|| # print(E.size(), norm_E.size()) C = torch.norm(C, 2, 1) # ||C[i]|| # print(C.size(), norm_C.size()) x = torch.div(dot, E.view(-1, 1)) # E[i]E[j]/||E[j]|| x = torch.div(x, C.view(1, -1)) # E[i]E[j]/(||E[j]||*||E[i]||) return x.div_(temp)
def forward(self, pred, gt): diff = torch.abs_(gt - pred.detach()) diff = torch.sum(diff, dim=1, keepdim=True).mul_(self.args.mu) diff = self.blur(diff) for _ in range(0, self.args.nu): diff = self.reblur(self.dilate(diff)) # normlize each sample to [0, 1] dmax = diff.max(dim=3, keepdim=True)[0].max( dim=2, keepdim=True)[0].max(dim=1, keepdim=True)[0] dmin = diff.min(dim=3, keepdim=True)[0].min( dim=2, keepdim=True)[0].min(dim=1, keepdim=True)[0] diff.sub_(dmin).div_(dmax - dmin + 1e-9) flawmap_gt = diff return flawmap_gt
def kmeans(x, k, max_it=32, dist_lambda=None): """ KMeans algorithm for clustering the sentences by length. Args: x (list[int]): Lengths of sentences. k (int): Number of clusters. This is an approximate value. The final number of clusters can be less or equal to k. max_it (int): Maximum number of iterations. If centroids does not converge after several iterations, the algorithm will be early stopped. Returns: centroids (list[float]): Average lengths of sentences in each cluster. clusters (list[list[int]]): List of clusters that hold indices of data points. Examples: >>> x = torch.randint(10,20,(10,)).tolist() >>> x [15, 10, 17, 11, 18, 13, 17, 19, 18, 14] >>> centroids, clusters = kmeans(x, 3) >>> centroids [10.5, 14.0, 17.799999237060547] >>> clusters [[1, 3], [0, 5, 9], [2, 4, 6, 7, 8]] """ if dist_lambda is None: dist_lambda = lambda a, b: torch.abs_(a - b) # the number of clusters must not be greater than the number of datapoints x = x.float() if isinstance(x, torch.Tensor) else torch.tensor(x, dtype=torch.float) k = min(len(x), k) if x.dim() == 1: x = x.unsqueeze(-1) # collect unique datapoints d = x.unique(dim=0) # initialize k centroids randomly c = d[torch.randperm(len(d))[:k]] # assign each datapoint to the cluster with the closest centroid n_x = x.size(0) x_dim = x[0].numel() dists, y = dist_lambda(x.unsqueeze(1), c.unsqueeze(0)).view(n_x, k, x_dim).sum(-1).min(-1) for _ in range(max_it): # if an empty cluster is encountered, # choose the farthest datapoint from the biggest cluster and move that the empty one mask = torch.arange(k).unsqueeze(-1).eq(y) none = torch.where(~mask.any(-1))[0].tolist() while len(none) > 0: for i in none: # the biggest cluster b = torch.where(mask[mask.sum(-1).argmax()])[0] # the datapoint farthest from the centroid of cluster b f = dists[b].argmax() # update the assigned cluster of f y[b[f]] = i # re-calculate the mask mask = torch.arange(k).unsqueeze(-1).eq(y) none = torch.where(~mask.any(-1))[0].tolist() # update the centroids c, old = (x.view(1, n_x, x_dim) * mask.unsqueeze(-1)).sum(1) / mask.sum(-1).unsqueeze(-1), c # re-assign all datapoints to clusters dists, y = dist_lambda(x.unsqueeze(1), c.unsqueeze(0)).view(n_x, k, x_dim).sum(-1).min(-1) # stop iteration early if the centroids converge if c.equal(old): break # assign all datapoints to the new-generated clusters # the empty ones are discarded assigned = y.unique(dim=0).tolist() # get the centroids of the assigned clusters if c.size(-1) == 1: c = c.squeeze(-1) centroids = c[assigned].tolist() # map all values of datapoints to buckets clusters = [torch.where(y.eq(i))[0].tolist() for i in assigned] return centroids, clusters
def kmeans(x, k, max_it=32): r""" KMeans algorithm for clustering the sentences by length. Args: x (list[int]): The list of sentence lengths. k (int): The number of clusters. This is an approximate value. The final number of clusters can be less or equal to `k`. max_it (int): Maximum number of iterations. If centroids does not converge after several iterations, the algorithm will be early stopped. Returns: list[float], list[list[int]]: The first list contains average lengths of sentences in each cluster. The second is the list of clusters holding indices of data points. Examples: >>> x = torch.randint(10,20,(10,)).tolist() >>> x [15, 10, 17, 11, 18, 13, 17, 19, 18, 14] >>> centroids, clusters = kmeans(x, 3) >>> centroids [10.5, 14.0, 17.799999237060547] >>> clusters [[1, 3], [0, 5, 9], [2, 4, 6, 7, 8]] """ # the number of clusters must not be greater than the number of datapoints x, k = torch.tensor(x, dtype=torch.float), min(len(x), k) # collect unique datapoints d = x.unique() # initialize k centroids randomly c = d[torch.randperm(len(d))[:k]] # assign each datapoint to the cluster with the closest centroid dists, y = torch.abs_(x.unsqueeze(-1) - c).min(-1) for _ in range(max_it): # if an empty cluster is encountered, # choose the farthest datapoint from the biggest cluster and move that the empty one mask = torch.arange(k).unsqueeze(-1).eq(y) none = torch.where(~mask.any(-1))[0].tolist() while len(none) > 0: for i in none: # the biggest cluster b = torch.where(mask[mask.sum(-1).argmax()])[0] # the datapoint farthest from the centroid of cluster b f = dists[b].argmax() # update the assigned cluster of f y[b[f]] = i # re-calculate the mask mask = torch.arange(k).unsqueeze(-1).eq(y) none = torch.where(~mask.any(-1))[0].tolist() # update the centroids c, old = (x * mask).sum(-1) / mask.sum(-1), c # re-assign all datapoints to clusters dists, y = torch.abs_(x.unsqueeze(-1) - c).min(-1) # stop iteration early if the centroids converge if c.equal(old): break # assign all datapoints to the new-generated clusters # the empty ones are discarded assigned = y.unique().tolist() # get the centroids of the assigned clusters centroids = c[assigned].tolist() # map all values of datapoints to buckets clusters = [torch.where(y.eq(i))[0].tolist() for i in assigned] return centroids, clusters
def residual_l1(reconstruction: Tensor, original: Tensor) -> Tensor: """Construct the absolute l1 difference between original and reconstruction images.""" return torch.abs_(original - reconstruction)
def step(self, lr=None, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: # add option to set lr on step, similar to other SGLD implementation by henripal if lr: group['lr'] = lr weight_decay = group['weight_decay'] for p in group['params']: if p.grad is None: continue grad = p.grad.data if grad.is_sparse: raise RuntimeError( 'Gaussian Gradients does not support sparse gradients') state = self.state[p] #if weight_decay != 0: # grad.add_(weight_decay, p.data) # State initialization if len(state) == 0: # Intialize mean and variance to zero state['mean'] = torch.zeros_like(p.data) state['variance'] = torch.zeros_like(p.data) state['std'] = torch.zeros_like(p.data) state['step'] = 0 mean = state['mean'] # Works now var = state['variance'] std = state['std'] state['step'] += 1 # Getting mean,std at previous step old_mean = mean.clone() old_std = std.clone() # Calculating gradients # if state['step'] == 2: # print('generate noise from mean: ', old_mean, ' and std: ', old_std) new_updt = torch.normal(mean=old_mean, std=old_std) updt = grad.add(new_updt, alpha=group['noise']) if weight_decay != 0: updt.add_(p.data, alpha=weight_decay) # Updating mean mean = mean.mul(group['momentum']).add(updt) part_var1 = grad.add(-old_mean) part_var2 = grad.add(-mean) new_std = torch.pow(old_std, 2).mul(group['momentum']).addcmul( part_var1, part_var2).add(group['eps']) new_std = torch.pow(torch.abs_(new_std), 1 / 2) std.add_(std, alpha=-1).add_(new_std) p.data.add_(updt, alpha=-group['lr']) return loss
def forward(self, x): # pylint: disable=arguments-differ, no-self-use """Forward prop""" return torch.abs_(x) # abs_() is the in-place version
def forward(self, x): x = self.relu(x) x = self.leaky_relu(x) x = torch.abs_(x) # x = self.dropout(x) return x