def compute_acc(y_true: torch.tensor, y_pred: torch.tensor) -> float: y_t_np = y_true.clone().detach().numpy() y_p_np = y_pred.clone().detach().numpy() n = np.shape(y_t_np)[0] count = 0 for i in range(0, n): if np.argmax(y_p_np[i]) == np.argmax(y_t_np[i]): count += 1 return count / n
def calculate_epe_statistics( predictions: torch.tensor, ground_truth: torch.tensor, dim: int, validitiy_flags=None, ) -> dict: """Calculates the eucledian diatnce statistics between the all coordinates. In case of 2.5 D Args: predictions (torch.tensor): Predicted coordinates of shape (#sample x 21 x 3) ground_truth (torch.tensor): True coordinates of shape (#samples x 21 x3) dim (int): to denote if the predictions and ground truth are 2.5D or 3D. If 2 is passed . Returns: dict: Returns a dictionary containing following keys 'mean_epe', 'median_epe', 'min_epe', 'max_epe' """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if dim == 2: predictions_ = predictions[:, :, :2].clone() ground_truth_ = ground_truth[:, :, :2].clone() else: if dim != 3: print("Coordinates treated as 3D") predictions_ = predictions.clone() ground_truth_ = ground_truth.clone() with torch.no_grad(): eucledian_dist = (torch.sum( ((predictions_.to(device) - ground_truth_.to(device))**2), 2)**0.5) if validitiy_flags is not None: mean_epe = torch.mean(eucledian_dist[validitiy_flags.view(-1, 21)]) median_epe = torch.median(eucledian_dist[validitiy_flags.view( -1, 21)]) max_epe = torch.max(eucledian_dist[validitiy_flags.view(-1, 21)]) min_epe = torch.min(eucledian_dist[validitiy_flags.view(-1, 21)]) else: mean_epe = torch.mean(eucledian_dist) median_epe = torch.median(eucledian_dist) max_epe = torch.max(eucledian_dist) min_epe = torch.min(eucledian_dist) return { "eucledian_dist": eucledian_dist, "mean": mean_epe, "median": median_epe, "min": min_epe, "max": max_epe, }
def generate_fix_step_permutation_for_finding_boundary(input_data: torch. tensor, variance=0.1, steps=10): all_permutations = [] for s in range(input_data.size()[0]): for i in range(steps): distance = (variance * (i + 1)) plus_data = input_data.clone() plus_data[s] = plus_data[s] + distance all_permutations.append(plus_data) minus_data = input_data.clone() minus_data[s] = minus_data[s] - distance all_permutations.append(minus_data) return all_permutations
def generate_permutation_for_numerical( input_data: torch.tensor, num_samples_per_feature, variance=0.5, ): ''' [input_data]: Normalised data. should be a 1-D tensor. -------------------------- Return: all permutations. ''' all_permutations = [] input_backup = input_data.clone() max_range = torch.clip(input_data + variance, -1, 1) min_range = torch.clip(input_data - variance, -1, 1) for i in range(input_data.size(-1)): input_to_permute = input_backup.clone() input_to_permute = input_to_permute.unsqueeze(0).repeat( num_samples_per_feature, 1) input_to_permute[:, i] = torch.zeros(num_samples_per_feature).uniform_( min_range[i], max_range[i]) all_permutations.extend( list(torch.chunk(input_to_permute, num_samples_per_feature, dim=0))) ########## append the original data ########## all_permutations.append(input_backup.unsqueeze(0)) return all_permutations
def ista(self, x: torch.tensor, r: torch.tensor): """ISTA steps for sparsification Args: x ([torch.tensor]): Input for reconstruction r ([torch.tensor]): Initialization of the code Returns: [torch.tensor]: the sparse code fitted to x """ r.requires_grad_(True) converged = False # update R optim = torch.optim.SGD([{'params': r, "lr": self.lr_r}]) # train while not converged: old_r = r.clone().detach() # prediction x_hat = self.U(r) # loss loss = ((x - x_hat)**2).sum() loss.backward() # update R in place optim.step() # print(r.grad) # zero grad optim.zero_grad() self.zero_grad() # prox r.data = self.soft_thresholding_(r, self.lmda) # convergence converged = torch.norm(r - old_r) / torch.norm(old_r) < 0.01 #print(torch.norm(r - old_r) / torch.norm(old_r)) return r
def forward(ctx, inputs: torch.tensor, threshold: float, sigmoid: bool): """ Args: inputs (`torch.FloatTensor`) The input matrix from which the binarizer computes the binary mask. threshold (`float`) The percentage of weights to keep (the rest is pruned). `threshold` is a float between 0 and 1. sigmoid (`bool`) Whether to apply a sigmoid on the threshold Returns: mask (`torch.FloatTensor`) Binary matrix of the same size as `inputs` acting as a mask (1 - the associated weight is retained, 0 - the associated weight is pruned). """ # Get the subnetwork by sorting the inputs and using the top threshold if sigmoid: threshold = torch.sigmoid(threshold).item() ctx.sigmoid = sigmoid mask = inputs.clone() _, idx = inputs.flatten().sort(descending=True) j = math.ceil(threshold * inputs.numel()) # flat_out and mask access the same memory. flat_out = mask.flatten() flat_out[idx[j:]] = 0. flat_out[idx[:j]] = 1. ctx.save_for_backward(mask) return mask
def forward(ctx, inputs: torch.tensor, threshold: float): """ Args: inputs (`torch.FloatTensor`) The input matrix from which the binarizer computes the binary mask. threshold (`float`) The percentage of weights to keep (the rest is pruned). `threshold` is a float between 0 and 1. Returns: mask (`torch.FloatTensor`) Binary matrix of the same size as `inputs` acting as a mask (1 - the associated weight is retained, 0 - the associated weight is pruned). """ # Get the subnetwork by sorting the inputs and using the top threshold % if not isinstance(threshold, float): threshold = threshold[0] mask = inputs.clone() _, idx = inputs.flatten().sort(descending=True) j = int(threshold * inputs.numel()) # flat_out and mask access the same memory. flat_out = mask.flatten() flat_out[idx[j:]] = 0 flat_out[idx[:j]] = 1 return mask
def heatmap_blend(img:torch.tensor, heatmap:torch.tensor, heatmap_blend_alpha=0.5, heatmap_clip_range=None, cmap='jet'): """ Blend the colormap onto original image :param img: original image in RGB, dim (N, 3, H, W) :param heatmap: input heatmap, dim (N, H, W) or (N, 1, H, W) :param heatmap_blend_alpha: blend factor, 'heatmap_blend_alpha = 0' means the output is identical to original image :param cmap: colormap to blend :return: blended heatmap image, dim (N, 3, H, W) """ if heatmap.dim() == 4: if heatmap.size(1) == 1: heatmap = heatmap.view(heatmap.size(0), heatmap.size(2), heatmap.size(3)) else: raise Exception("The heatmap should be (N, 1, H, W) or (N, H, W)") N, C3, H, W = img.shape assert heatmap_blend_alpha < 1.0 assert H == heatmap.size(1) assert W == heatmap.size(2) assert N == heatmap.size(0) assert C3 == 3 # input image has three channel RGB color_map = colormap(heatmap, cmap=cmap, clip_range=heatmap_clip_range, chw_order=True).to(img.device) output_heat_map = img.clone()*(1.0 - heatmap_blend_alpha) + color_map * heatmap_blend_alpha return output_heat_map
def __init__(self, initial_centroids: torch.tensor, lm_model, tokenizer, metric=lp_distance, embedding_extractor=cls_embedding_extractor, do_language_modeling=True, device='cpu'): super(ClusterLM, self).__init__() self.initial_centroids = initial_centroids self.add_module('lm_model', lm_model) self.register_parameter( 'centroids', nn.Parameter(initial_centroids.clone().float(), requires_grad=True)) self.tokenizer = tokenizer self.metric = metric self.embedding_extractor = embedding_extractor self.do_language_modeling = do_language_modeling self.device = device self.to(self.device)
def cxcywh_to_x1y1x2y2(cxcywh: torch.tensor) -> torch.tensor: assert cxcywh.shape[-1] >= 4 x1y1x2y2 = cxcywh.clone() x1y1x2y2[..., 0] = (cxcywh[..., 0] - cxcywh[..., 2] / 2) x1y1x2y2[..., 1] = (cxcywh[..., 1] - cxcywh[..., 3] / 2) x1y1x2y2[..., 2] = (cxcywh[..., 0] + cxcywh[..., 2] / 2) x1y1x2y2[..., 3] = (cxcywh[..., 1] + cxcywh[..., 3] / 2) return x1y1x2y2
def result(output: torch.tensor, labels: torch.tensor): bats = output.shape[0] adj_output = output.clone() adj_output[torch.arange(bats), labels] -= 1 loss_mse = torch.mean(adj_output ** 2) loss_noise = (np.pi*noise_strength*noise_strength)/2 * torch.sum(output ** 2) return loss_mse + loss_noise
def get_mask(labels: torch.tensor) -> torch.tensor: """ Returns mask from labels Args: labels: tensor containing labels """ labels = labels.clone() zero_mask = labels[:, :, 0] return zero_mask
def mask_tokens( inputs: torch.tensor, tokenizer: TextEncoderBase, mlm_probability: float = 0.15, ignore_index: int = -100, ): """ Mask tokens function from Hugging Face that prepares masked tokens inputs/labels for masked language modeling. :param inputs: Input tensor to be masked. :param tokenizer: COMET text encoder. :param mlm_probability: Probability of masking a token (default: 15%). :param ignore_index: Specifies a target value that is ignored and does not contribute to the input gradient (default: -100). Returns: - Tuple with input to the model and the target. """ if tokenizer.mask_index is None: raise ValueError( "This tokenizer does not have a mask token which is necessary for masked language" "modeling. Remove the --mlm flag if you want to use this tokenizer." ) labels = inputs.clone() probability_matrix = torch.full(labels.shape, mlm_probability) special_tokens_mask = [ tokenizer.get_special_tokens_mask(val) for val in labels.tolist() ] probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0) padding_mask = labels.eq(tokenizer.padding_index) probability_matrix.masked_fill_(padding_mask, value=0.0) masked_indices = torch.bernoulli(probability_matrix).bool() labels[ ~masked_indices] = ignore_index # We only compute loss on masked tokens # 80% of the time, we replace masked input tokens with ([MASK]) indices_replaced = (torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices) inputs[indices_replaced] = tokenizer.mask_index # 10% of the time, we replace masked input tokens with random word indices_random = (torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced) random_words = torch.randint(tokenizer.vocab_size, labels.shape, dtype=torch.long) inputs[indices_random] = random_words[indices_random] # The rest of the time (10% of the time) we keep the masked input tokens unchanged return inputs, labels
def to_sparse_by_cdf(t: torch.tensor, lens, cdf: float): _t = t.clone().detach() _t = list(_t.split(lens, dim=0)) for i, this_t in enumerate(_t): this_t_sorted, indices = torch.sort(this_t, descending=True) mask = torch.cumsum(this_t_sorted, dim=-1) < cdf mask[torch.sum(mask)] = True _t[i][indices[mask]] = 1 _t[i][indices[~mask]] = 0 return torch.cat(_t, dim=0).long()
def cross_entropy(outputs: torch.tensor, labels: torch.tensor, mask: torch.tensor) -> torch.tensor: """ Returns cross entropy loss using masking Args: outputs: tensor with predictions labels: tensor with labels mask: tensor with masking """ labels = labels.clone() labels[mask == 0] = -1 return nn.CrossEntropyLoss(ignore_index=-1)(outputs, labels.long())
def initialize_inducing(self,init_Z: torch.tensor) -> None: """Initializes inducing points using the argument init_Z. It prepares this inducing points to be or not to be shared by all kernels. """ if self.Z_is_shared: self.Z = nn.Parameter(init_Z.unsqueeze(dim = 0)) # this parameter is repeated when needed else: Z = torch.zeros(self.out_dim,self.M,self.inp_dim) for l in range(self.out_dim): aux_init = init_Z.clone() Z[l,:] = aux_init self.Z = nn.Parameter(Z)
def insert(self, observation: torch.tensor, hidden_state: torch.tensor, reward: torch.tensor, action: torch.tensor, done: torch.tensor, true_terminal: torch.tensor) -> None: """ Store single experience. Inserted in such a way so that the same index across all the buffers should contain: o_{t}, h_{t-1}, a_{t}, log_p_a_{t}, hat_V_{t}, r_{t+1}, done_{t}, true_terminal_{t} Where the time index is determined by whether the variable was generated before or after the env.step() step :param observation: observation tensor, at {t+1} :param hidden_state: hidden state tensor, at {t} :param reward: reward tensor, at {t+1} :param action: action tensor, at {t} :param done: done tensor, at {t+1} :param true_terminal: true termination state tensor, at {t+1} :param pred_value: predicted value tensor, at {t} :param action_log_prob: action log probability tensor, at {t} :return: None """ # == # Save copy of experiences self.obs_buffer[self.step + 1] = observation.clone().detach() self.hid_buffer[self.step + 1] = hidden_state.clone().detach() self.don_buffer[self.step + 1] = done.clone().detach() self.true_termin[self.step + 1] = true_terminal.clone().detach() self.rew_buffer[self.step] = reward.clone().detach() self.act_buffer[self.step] = action.clone().detach() self.step += 1 # potential TODO confirm correctness
def __init__(self, data: torch.tensor, intervals_te_rew, column_to_time_features, window_size, device=None, scaler=PaperScaler()): if device is None: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.scaler = scaler self.device = device self.data = data.clone().detach().to(self.device) self.pred_counter = window_size self.trace_index = None self.intervals = intervals_te_rew self.column_feature = column_to_time_features self.win = window_size self.given_state = None
def xywh_to_xyXY(boxes_xywh: torch.tensor) -> torch.tensor: ''' Get bounding box coordinates in [ x_top_left, y_top_left, x_bottom_right, y_bottom_right] format. Parameters ---------- boxes : Bounding Box, a tensor in [ x_top_left, y_top_left, bb_width, bb_height] format. ''' boxes_xyXY = boxes_xywh.clone() boxes_xyXY[:, 2] = boxes_xyXY[:, 2] + boxes_xyXY[:, 0] boxes_xyXY[:, 3] = boxes_xyXY[:, 3] + boxes_xyXY[:, 1] return boxes_xyXY
def transform(self, x: torch.tensor, inplace=True): if not inplace: out = x.clone() else: out = x if len(x.shape) == 3: for _cf in self.column_features: out[:, :, self.column_features[_cf]] = out[:, :, self.column_features[_cf]] / self.scales[_cf] if type(out) == pd.DataFrame: for _cf in self.column_features: out[_cf] = out[_cf] / self.scales[_cf] return out
def get_corrupted_batch_unfiltered(self, batch: torch.tensor, device: torch.device): corrupted_batch = batch.clone().to() head_tail_indexes = torch.randint(2, (batch.shape[0], )).to(device) * 2 corrupted_batch[torch.arange(corrupted_batch.shape[0]), head_tail_indexes] = torch.randint( self.num_of_entities, (corrupted_batch.shape[0], )).to(device) # torch.randint statement randomly generates 0 or 1. The outcome is multiplied with 2 to get 0 or 2 # which is the column index for either the head_id or tail_id. Hence, we create a tensor defining # whether we corrupt the head or tail for the corrupted batch and transmit it as an input ## print batch and corr batch in forward() to dismiss the possibility that they are refering to the same tensor instance return corrupted_batch
def __init__(self, dist: torch.tensor, num_utterances: int, spub: list): """Trade Comm public belief state Args: dist [num_items, num_items]: Joint probability distribution over player items num_utterances: Number of utterances in game spub: Public state (list of public observations) Attributes: See args """ assert_joint_probability(dist, dist.shape) self.dist = dist.clone() self.num_utterances = num_utterances self.spub = list(spub)
def log_non_linear(self, f: torch.tensor, Y: torch.tensor, noise_var: torch.tensor, flow: list, X: torch.tensor, **kwargs): """ Return the log likelihood of S Gaussian distributions, each of this S correspond to a quadrature point. The only samples f have to be warped with the composite flow G(). -> f is assumed to be stacked samples of the same dimension of Y. Here we compute (apply lotus rule): \int \log p(y|fK) q(fK) dfK = \int \log p(y|fk) q(f0) df0 \approx 1/sqrt(pi) sum_i w_i { \log[ p( y | G( sqrt(2)\sigma f_i + mu), sigma^2 ) ] }; where q(f0) is the initial distribution. We just face the problem of computing the expectation under a log Gaussian of a non-linear transformation of the mean, given by the flow. Args: `f` (torch.tensor) :->: Minibatched - latent function samples in (S,Dy,MB), being S the number of quadrature points and MB the minibatch. This is directly given by the gpytorch.GaussHermiteQuadrature1D method in this format and corresponds to \sqrt(2)\sigma f_i + mu see https://en.wikipedia.org/wiki/Gauss%E2%80%93Hermite_quadrature `Y` (torch.tensor) :->: Minibatched Observations in Dy x MB. `noise_var` (torch.tensor) :->: Observation noise 'flow' (CompositeFlow) :->: Sequence of flows to be applied to each of the outputs 'X' (torch.tensor) :->: Input locations used for input dependent flows. Has shape [Dy,S*MB,Dx] or shape [S*MB,Dx]. N """ assert len(flow) == self.out_dim, "This likelihood only supports a flow per output_dim. Got {} for Dy {}".format(self.out_dim,len(flow)) assert len(X.shape) == 3, 'Bad input X, expected (out_dim,MB*S,Dx)' assert X.size(0) == self.out_dim, 'Wrong first dimension in X, expected out_dim' S = self.quad_points MB = Y.size(1) Dy = self.out_dim Y = Y.view(Dy,MB,1).repeat((S,1,1,1)) # S,Dy,MB,1 noise_var = noise_var.view(Dy,MB,1).repeat((S,1,1,1)) # S,Dy,MB,1 fK = f.clone() # Be aware that as we expand X we will be performing self.quad_points forwards through the NNets for each output. This might be inneficient unless pytorch only performs the operation once and returned the expanded dimension # expanded_size = [self.quad_points] + [-1]*(len(X.size())) # X = X.expand(expanded_size) # no need for this broadcasting as pytorch will broadcast automatically for idx,fl in enumerate(flow): # warp the samples fK[:,idx,:] = fl(f[:,idx,:],X[idx]) fK = fK.view(S,Dy,MB,1) # we add extra dimension so that batched_log_gaussian does not reduce minibatch dimension. This will be reduced at the end as the # GaussHermiteQudrature from gpytorch reduces S by default. Although sum is associative we prefer to separate for clarity. log_p_y = batched_log_Gaussian( obs = Y, mean = fK, cov = noise_var, diagonal = True, cov_is_inverse = False) # (S,Dy,MB) return log_p_y # return (S,Dy,MB) so that reduction is done for S.
def compute_loss(args: argparse.Namespace, model: DDP, images: torch.tensor, targets: torch.tensor, num_classes: int) -> torch.tensor: """ inputs: images : shape [batch_size, C, h, w] logits : shape [batch_size, num_classes, h, w] targets : shape [batch_size, h, w] returns: loss: shape [] logits: shape [batch_size] """ batch, h, w = targets.size() one_hot_mask = torch.zeros(batch, num_classes, h, w).to(dist.get_rank()) new_target = targets.clone().unsqueeze(1) new_target[new_target == 255] = 0 one_hot_mask.scatter_(1, new_target, 1).long() if args.smoothing: eps = 0.1 one_hot = one_hot_mask * (1 - eps) + (1 - one_hot_mask) * eps / (num_classes - 1) else: one_hot = one_hot_mask # [batch_size, num_classes, h, w] if args.mixup: alpha = 0.2 lam = np.random.beta(alpha, alpha) rand_index = torch.randperm(images.size()[0]).to(dist.get_rank()) one_hot_a = one_hot targets_a = targets one_hot_b = one_hot[rand_index] target_b = targets[rand_index] mixed_images = lam * images + (1 - lam) * images[rand_index] logits = model(mixed_images) loss = cross_entropy(logits, one_hot_a, targets_a) * lam \ + cross_entropy(logits, one_hot_b, target_b) * (1. - lam) else: logits = model(images) loss = cross_entropy(logits, one_hot, targets) return loss
def score_subjects(self, p: torch.tensor, o: torch.tensor) -> torch.tensor: """ Compute subject scores for all entities, given p and o. :param p: torch.tensor, scalar The predicate. :param o: torch.tensor, scalar The object. :return: torch.tensor, shape: (num_entities,) The scores for all entities. """ new_p = p.clone() if self.inverse_model: new_p += self.num_relations // 2 return self.predict_for_ranking(o, new_p)
def meansqerr(output: torch.tensor, labels: torch.tensor): """This criterion evaluates a one-hot vector on mean-squared error when the labels are the indices of the output that should be 1. This is a drop-in replacement for cross-entropy loss whereas the default MSEError requires changing your encoding. Args: output (torch.tensor): the output labels (torch.tensor): the expected labels Returns: loss (torch.tensor): the loss of the network """ bats = output.shape[0] adj_output = output.clone() adj_output[torch.arange(bats), labels] -= 1 return torch.mean(adj_output ** 2)
def argmax(module: nn.Module, arg: torch.tensor): print('Computing argmax') arg.requires_grad = True opt = torch.optim.Adam([arg], lr=0.1) for idx in range(1000): out = module(arg) loss = -out prev_arg = arg.clone() loss.backward() opt.step() opt.zero_grad() module.zero_grad() d = (arg - prev_arg).norm(2) if d < 1e-4: print('breaking') break #print(f'Final d: {d}') return arg, out
def __init__(self, initial_centroids: torch.tensor, metric=lp_distance, device='cpu'): super(DifferentiableKMeans, self).__init__() self.initial_centroids = initial_centroids self.register_parameter( 'centroids', nn.Parameter(initial_centroids.clone().float(), requires_grad=True)) self.metric = metric self.device = device self.to(self.device)
def cummax( tensor: torch.tensor, dim: int = 0) -> torch.tensor: """Return the cumulative maximum of the elements along a given axis. Args: tensor (torch.tensor): input tensor dim (int, optional): Defaults to 0. Axis along which the cumulative maximum is computed Returns: max_tensor (torch.tensor): A tensor with the same size as the input holding the cumulative maximum of the input tensor. """ ret_val = tensor.clone() ret_val_unbind = ret_val.unbind(dim) if len(ret_val_unbind): for current, last in zip(ret_val_unbind[1:], ret_val_unbind): current[()] = torch.max(current, last) return ret_val
def remap_heading(data: torch.tensor, heading_map: Dict = None) -> torch.tensor: """ Remaps the values in data according to the map. Args: data: Data to remap. heading_map: Value map. Default remaping values: {0:0, 1:1, 85:2, 170:3, 255:4} Returns: Tensor with remaped values. """ if heading_map is None: heading_map = {0: 0, 1: 1, 85: 2, 170: 3, 255: 4} remaped = data.clone().detach() for key, val in heading_map.items(): remaped[remaped == key] = val return remaped