def encode(matched, priors, use_yolo_regressors: bool = False): """ Encode bboxes matched with each prior into the format produced by the network. See decode for more details on this format. Note that encode(decode(x, p), p) = x. Args: - matched: A tensor of bboxes in point form with shape [num_priors, 4] - priors: The tensor of all priors with shape [num_priors, 4] Return: A tensor with encoded relative coordinates in the format outputted by the network (see decode). Size: [num_priors, 4] """ # print(use_yolo_regressors) if use_yolo_regressors: # Exactly the reverse of what we did in decode # In fact encode(decode(x, p), p) should be x boxes = center_size(matched) loc = jt.contrib.concat( (boxes[:, :2] - priors[:, :2], jt.log( boxes[:, 2:] / priors[:, 2:])), 1) else: variances = [0.1, 0.2] # dist b/t match center and prior's center g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2] # encode variance g_cxcy /= (variances[0] * priors[:, 2:]) # match wh / prior wh g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] g_wh = jt.log(g_wh) / variances[1] # return target for smooth_l1_loss loc = jt.contrib.concat([g_cxcy, g_wh], 1) # [num_priors,4] return loc
def execute(self, inputs, targets, mask=None, act=False): losses = [] for id in range(len(inputs)): if mask is not None: input_flatten, target_flatten = self.flatten( inputs[id], targets[id], mask[id]) else: input_flatten, target_flatten = self.flatten( inputs[id], targets[id]) if act: MIN = 1e-9 input_flatten = jt.clamp(input_flatten, min_v=MIN, max_v=1 - MIN) input_flatten = jt.log(input_flatten) - jt.log(1 - input_flatten) losses.append(self.lovasz_hinge_flat(input_flatten, target_flatten)) losses = jt.stack(losses) if self.reduction == "mean": losses = losses.mean() elif self.reduction == "sum": losses = losses.sum() return losses
def encode(self, reference_boxes, proposals): """ Encode a set of proposals with respect to some reference boxes Arguments: reference_boxes (Tensor): reference boxes proposals (Tensor): boxes to be encoded """ TO_REMOVE = 1 # TODO remove ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights wx, wy, ww, wh = self.weights targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = ww * jt.log(gt_widths / ex_widths) targets_dh = wh * jt.log(gt_heights / ex_heights) targets = jt.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) return targets
def bce_loss(output, target, size_average=True): if size_average: return -(target * jt.log(jt.maximum(output, 1e-20)) + (1 - target) * jt.log(jt.maximum(1 - output, 1e-20))).mean() else: return -(target * jt.log(jt.maximum(output, 1e-20)) + (1 - target) * jt.log(jt.maximum(1 - output, 1e-20))).sum()
def bce_loss(output, target, weight=None, size_average=True): loss = - (target * jt.log(jt.maximum(output, 1e-20)) + (1 - target) * jt.log(jt.maximum(1 - output, 1e-20))) if weight is not None: loss *= weight if size_average: return loss.mean() else: return loss.sum()
def sigmoid_focal_loss(logits, targets, gamma, alpha): num_classes = logits.shape[1] dtype = targets.dtype class_range = jt.arange(1, num_classes + 1, dtype=dtype).unsqueeze(0) t = targets.unsqueeze(1) p = logits.sigmoid() term1 = (1 - p)**gamma * jt.log(p) term2 = p**gamma * jt.log(1 - p) return -(t == class_range).float() * term1 * alpha - ( (t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
def execute(self, x): batch_size = x.shape[0] x = nn.relu(self.fc1(x)) x = nn.relu(self.fc2(x)) # decoder follows NMR centroid = self.fc_centroid(x) * self.centroid_scale bias = self.fc_bias(x) * self.bias_scale bias = bias.view(-1, self.nv, 3) base = self.vertices_base * self.obj_scale sign = nn.sign(base) base = base.abs() base = jt.log(base / (1 - base)) centroid = jt.tanh(centroid[:, None, :]) scale_pos = 1 - centroid scale_neg = centroid + 1 vertices = (base + bias).sigmoid() * sign vertices = nn.relu(vertices) * scale_pos - nn.relu( -vertices) * scale_neg vertices = vertices + centroid vertices = vertices * 0.5 faces = self.faces[None, :, :].repeat(batch_size, 1, 1) return vertices, faces
def __init__(self, p=None, logits=None): assert (p is not None) or (logits is not None) assert 0 < p and p < 1 if p is None: self.prob = jt.sigmoid(logits) self.logits = logits elif logits is None: self.prob = p self.logits = -jt.log(1. / p - 1)
def log_sum_exp(x): """Utility function for computing log_sum_exp while determining This will be used to determine unaveraged confidence loss across all examples in a batch. Args: x (Variable(tensor)): conf_preds from conf layers """ x_max = x.data.max() return jt.log(jt.sum(jt.exp(x - x_max), 1)) + x_max
def execute(self, batch_size): base = jt.log(self.vertices.abs() / (1 - self.vertices.abs())) centroid = jt.tanh(self.center) vertices = (base + self.displace).sigmoid() * nn.sign(self.vertices) vertices = nn.relu(vertices) * (1 - centroid) - nn.relu(-vertices) * (centroid + 1) vertices = vertices + centroid # apply Laplacian and flatten geometry constraints laplacian_loss = self.laplacian_loss(vertices).mean() flatten_loss = self.flatten_loss(vertices).mean() return jr.Mesh(vertices.repeat(batch_size, 1, 1), self.faces.repeat(batch_size, 1, 1), dr_type='n3mr'), laplacian_loss, flatten_loss
def kl_divergence(cur_dist, old_dist): assert isinstance(cur_dist, type(old_dist)) if isinstance(cur_dist, Normal): vr = (cur_dist.sigma / old_dist.sigma)**2 t1 = ((cur_dist.mu - old_dist.mu) / old_dist.sigma)**2 return 0.5 * (vr + t1 - 1 - jt.log(vr)) if isinstance(cur_dist, Categorical) or isinstance(cur_dist, OneHotCategorical): t = cur_dist.probs * (cur_dist.logits - old_dist.logits) t[jt.array((old_dist.probs == 0))] = math.inf t[jt.array((cur_dist.probs == 0))] = 0 return t.sum(-1) if isinstance(cur_dist, Uniform): res = jt.log( (old_dist.high - old_dist.low) / (cur_dist.high - cur_dist.low)) if old_dist.low > cur_dist.low or old_dist.high < cur_dist.high: res = math.inf return res if isinstance(cur_dist, Geometric): return -cur_dist.entropy() - jt.log( -old_dist.prob + 1) / cur_dist.prob - old_dist.logits
def kl_divergence(cur_dist, old_dist): assert isinstance(cur_dist, type(old_dist)) if isinstance(cur_dist, Normal): vr = (cur_dist.sigma / old_dist.sigma)**2 t1 = ((cur_dist.mu - old_dist.mu) / old_dist.sigma)**2 return 0.5 * (vr + t1 - 1 - jt.log(vr)) if isinstance(cur_dist, Categorical) or isinstance(cur_dist, OneHotCategorical): # ? t = cur_dist.probs * (cur_dist.logits - old_dist.logits) t[jt.array((old_dist.probs == 0))] = math.inf t[jt.array((cur_dist.probs == 0))] = 0 return t.sum(-1)
def __init__(self, probs=None, logits=None): assert not (probs is None and logits is None) if probs is None: # cannot align to pytorch probs = jt.sigmoid(logits) elif logits is None: logits = jt.log(probs) with jt.no_grad(): self.probs = probs / probs.sum(-1, True) self.logits = logits self.cum_probs = simple_presum(probs) self.cum_probs_l = self.cum_probs[..., :-1] self.cum_probs_r = self.cum_probs[..., 1:]
def bbox2loc(src_bbox,dst_bbox): width = src_bbox[:, 2:3] - src_bbox[:, 0:1] height = src_bbox[:, 3:4] - src_bbox[:, 1:2] center_x = src_bbox[:, 0:1] + 0.5 * width center_y = src_bbox[:, 1:2] + 0.5 * height base_width = dst_bbox[:, 2:3] - dst_bbox[:, 0:1] base_height = dst_bbox[:, 3:4] - dst_bbox[:, 1:2] base_center_x = dst_bbox[:, 0:1] + 0.5 * base_width base_center_y = dst_bbox[:, 1:2] + 0.5 * base_height eps = 1e-5 height = jt.maximum(height, eps) width = jt.maximum(width, eps) dy = (base_center_y - center_y) / height dx = (base_center_x - center_x) / width dw = jt.log(base_width / width) dh = jt.log(base_height / height) loc = jt.contrib.concat([dx,dy,dw,dh],dim=1) return loc
def execute(self, pred, target, weight=None): pred_left = pred[:, 0] pred_top = pred[:, 1] pred_right = pred[:, 2] pred_bottom = pred[:, 3] target_left = target[:, 0] target_top = target[:, 1] target_right = target[:, 2] target_bottom = target[:, 3] target_area = (target_left + target_right) * \ (target_top + target_bottom) pred_area = (pred_left + pred_right) * \ (pred_top + pred_bottom) w_intersect = jt.minimum(pred_left, target_left) + jt.minimum( pred_right, target_right) g_w_intersect = jt.maximum(pred_left, target_left) + jt.maximum( pred_right, target_right) h_intersect = jt.minimum(pred_bottom, target_bottom) + jt.minimum( pred_top, target_top) g_h_intersect = jt.maximum(pred_bottom, target_bottom) + jt.maximum( pred_top, target_top) ac_uion = g_w_intersect * g_h_intersect + 1e-7 area_intersect = w_intersect * h_intersect area_union = target_area + pred_area - area_intersect ious = (area_intersect + 1.0) / (area_union + 1.0) gious = ious - (ac_uion - area_union) / ac_uion if self.loc_loss_type == 'iou': losses = -jt.log(ious) elif self.loc_loss_type == 'linear_iou': losses = 1 - ious elif self.loc_loss_type == 'giou': losses = 1 - gious else: raise NotImplementedError if weight is not None and weight.sum() > 0: return (losses * weight).sum() / weight.sum() else: assert losses.numel() != 0 return losses.mean()
def soft_cross_entropy_loss(output, target, smoothing=True): ''' Calculate cross entropy loss, apply label smoothing if needed. ''' target = target.view(-1) softmax = nn.Softmax(dim=1) if smoothing: eps = 0.2 b, n_class = output.shape one_hot = jt.zeros(output.shape) for i in range(b): one_hot[i, target[i].data] = 1 one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1) # print (one_hot[0].data) log_prb = jt.log(softmax(output)) loss = -(one_hot * log_prb).sum(dim=1).mean() else: loss = nn.cross_entropy_loss(output, target) return loss
def log2(x): return jt.log(x)/math.log(2.0)
def log_sigmoid(x): return jt.log(jt.sigmoid(x))
def log_softmax(x, dim=None): x = softmax(x, dim=dim) return jt.log(x)
def execute(self, x): return 1 / self.beta * jt.log(1 + (self.beta * x).exp())
def softplus(x, beta=1, threshold=20): return 1 / beta * jt.log(1 + (beta * x).exp())
def __init__(self, loc, scale): self.loc = loc self.scale = scale self.log_scale = jt.log(self.scale)
def entropy(self): return -jt.sum(jt.mean(self.probs) * jt.log(self.probs))
def log_prob(self, x): return jt.log(self.probs)[0,x]
def logits(self): if self._logits is None: return jt.log(jt.clamp(self.probs, min_v=eps, max_v=1-eps)) else: return self._logits
import jittor as jt from jittor import nn import numpy as np # Misc img2mse = lambda x, y: jt.mean((x - y)**2) mse2psnr = lambda x: -10. * jt.log(x) / jt.log(jt.array(np.array([10.]))) to8b = lambda x: (255 * np.clip(x, 0, 1)).astype(np.uint8) # Positional encoding (section 5.1) class Embedder: def __init__(self, **kwargs): self.kwargs = kwargs self.create_embedding_fn() def create_embedding_fn(self): embed_fns = [] d = self.kwargs['input_dims'] out_dim = 0 if self.kwargs['include_input']: embed_fns.append(lambda x: x) out_dim += d max_freq = self.kwargs['max_freq_log2'] N_freqs = self.kwargs['num_freqs'] if self.kwargs['log_sampling']: freq_bands = 2.**jt.linspace(0., max_freq, steps=N_freqs) else: freq_bands = jt.linspace(2.**0., 2.**max_freq, steps=N_freqs)
def bce_loss(output, target): return -(target * jt.log(jt.maximum(output, 1e-20)) + (1 - target) * jt.log(jt.maximum(1 - output, 1e-20))).mean()
def prod(x,dim=0): x = jt.log(x) x = x.sum(dim=dim) return jt.exp(x)
def cumprod(x,dim=0): x = jt.log(x) x = cumsum(x,dim=dim) return jt.exp(x)
def execute(self, input, target): bs_idx = jt.array(range(input.shape[0])) ret = (-jt.log(nn.softmax(input, dim=1)))[bs_idx, target] if self.reduction != None: ret = jt.mean(ret) if self.reduction == 'mean' else jt.sum(ret) return ret