def loc2bbox(src_bbox,loc): if src_bbox.shape[0] == 0: return jt.zeros((0, 4), dtype=loc.dtype) src_width = src_bbox[:, 2:3] - src_bbox[:, 0:1] src_height = src_bbox[:, 3:4] - src_bbox[:, 1:2] src_center_x = src_bbox[:, 0:1] + 0.5 * src_width src_center_y = src_bbox[:, 1:2] + 0.5 * src_height dx = loc[:, 0:1] dy = loc[:, 1:2] dw = loc[:, 2:3] dh = loc[:, 3:4] center_x = dx*src_width+src_center_x center_y = dy*src_height+src_center_y w = jt.exp(dw.minimum(20.0)) * src_width h = jt.exp(dh.minimum(20.0)) * src_height x1,y1,x2,y2 = center_x-0.5*w, center_y-0.5*h, center_x+0.5*w, center_y+0.5*h dst_bbox = jt.contrib.concat([x1,y1,x2,y2],dim=1) return dst_bbox
def execute(self, pred, true): loss = self.loss_fcn(pred, true) pred = jt.sigmoid(pred) # prob from logits dx = pred - true # reduce only missing label effects # dx = (pred - true).abs() # reduce missing label and false label effects alpha_factor = 1 - jt.exp((dx - 1) / (self.alpha + 1e-4)) loss *= alpha_factor return loss.mean()
def log_sum_exp(x): """Utility function for computing log_sum_exp while determining This will be used to determine unaveraged confidence loss across all examples in a batch. Args: x (Variable(tensor)): conf_preds from conf layers """ x_max = x.data.max() return jt.log(jt.sum(jt.exp(x - x_max), 1)) + x_max
def decode(loc, priors, use_yolo_regressors: bool = False): """ Decode predicted bbox coordinates using the same scheme employed by Yolov2: https://arxiv.org/pdf/1612.08242.pdf b_x = (sigmoid(pred_x) - .5) / conv_w + prior_x b_y = (sigmoid(pred_y) - .5) / conv_h + prior_y b_w = prior_w * exp(loc_w) b_h = prior_h * exp(loc_h) Note that loc is inputed as [(s(x)-.5)/conv_w, (s(y)-.5)/conv_h, w, h] while priors are inputed as [x, y, w, h] where each coordinate is relative to size of the image (even sigmoid(x)). We do this in the network by dividing by the 'cell size', which is just the size of the convouts. Also note that prior_x and prior_y are center coordinates which is why we have to subtract .5 from sigmoid(pred_x and pred_y). Args: - loc: The predicted bounding boxes of size [num_priors, 4] - priors: The priorbox coords with size [num_priors, 4] Returns: A tensor of decoded relative coordinates in point form form with size [num_priors, 4] """ if use_yolo_regressors: # Decoded boxes in center-size notation boxes = jt.contrib.concat( (loc[:, :2] + priors[:, :2], priors[:, 2:] * jt.exp(loc[:, 2:])), 1) boxes = point_form(boxes) else: variances = [0.1, 0.2] boxes = jt.contrib.concat( (priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], priors[:, 2:] * jt.exp(loc[:, 2:] * variances[1])), 1) boxes[:, :2] -= boxes[:, 2:] / 2 boxes[:, 2:] += boxes[:, :2] return boxes
def decode(self, rel_codes, boxes): """ From a set of original boxes and encoded relative box offsets, get the decoded boxes. Arguments: rel_codes (Tensor): encoded boxes boxes (Tensor): reference boxes. """ boxes = boxes.cast(rel_codes.dtype) TO_REMOVE = 1 # TODO remove widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE ctr_x = boxes[:, 0] + 0.5 * widths ctr_y = boxes[:, 1] + 0.5 * heights wx, wy, ww, wh = self.weights dx = rel_codes[:, 0::4] / wx dy = rel_codes[:, 1::4] / wy dw = rel_codes[:, 2::4] / ww dh = rel_codes[:, 3::4] / wh # Prevent sending too large values into torch.exp() dw = jt.clamp(dw, max_v=self.bbox_xform_clip) dh = jt.clamp(dh, max_v=self.bbox_xform_clip) pred_ctr_x = dx * widths.unsqueeze(-1) + ctr_x.unsqueeze(-1) pred_ctr_y = dy * heights.unsqueeze(-1) + ctr_y.unsqueeze(-1) pred_w = jt.exp(dw) * widths.unsqueeze(-1) pred_h = jt.exp(dh) * heights.unsqueeze(-1) pred_boxes = jt.zeros_like(rel_codes) # x1 pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # y1 pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 return pred_boxes
def compute_density(xyz, bandwidth): ''' xyz: input points position data, [B, N, C] ''' #import ipdb; ipdb.set_trace() B, N, C = xyz.shape sqrdists = square_distance(xyz, xyz) gaussion_density = jt.exp( -sqrdists / (2.0 * bandwidth * bandwidth)) / (2.5 * bandwidth) xyz_density = gaussion_density.mean(dim=-1) return xyz_density
def compute_density(points, bandwidth): ''' points: input points position data, [B, N, C] ''' #import ipdb; ipdb.set_trace() B, N, C = points.shape sqrdists = square_distance(points, points) gaussion_density = jt.exp( -sqrdists / (2.0 * bandwidth * bandwidth)) / (2.5 * bandwidth) points_density = gaussion_density.mean(dim=-1) return points_density
def execute(self, x): logits = [] bbox_reg = [] centerness = [] for l, feature in enumerate(x): cls_tower = self.cls_tower(feature) logits.append(self.cls_logits(cls_tower)) if self.cfg.MODEL.RPN.FCOS_ONLY: centerness.append(self.centerness(cls_tower)) bbox_reg.append( jt.exp(self.scales[l](self.bbox_pred( self.bbox_tower(feature))))) continue box_tower = self.bbox_tower(feature) ''' centerness.append(self.centerness(box_tower)) bbox_reg.append(jt.exp(self.scales[l]( self.bbox_pred(box_tower) ))) ''' if self.centerness_on_reg: centerness.append(self.centerness(box_tower)) else: centerness.append(self.centerness(cls_tower)) bbox_pred = self.scales[l](self.bbox_pred(box_tower)) if self.norm_reg_targets: bbox_pred = nn.relu(bbox_pred) if self.is_training(): bbox_reg.append(bbox_pred) else: bbox_reg.append(bbox_pred * self.fpn_strides[l]) else: bbox_reg.append(jt.exp(bbox_pred)) return logits, bbox_reg, centerness
def R1Penalty(self, real_img, height, alpha): # TODO: use_loss_scaling, for fp16 # apply_loss_scaling = lambda x: x * torch.exp(x * torch.Tensor([np.float32(np.log(2.0))]).to(real_img.device)) apply_loss_scaling = lambda x: x * jt.exp(x * jt.array( [np.float32(np.log(2.0))])) # undo_loss_scaling = lambda x: x * torch.exp(-x * torch.Tensor([np.float32(np.log(2.0))]).to(real_img.device)) undo_loss_scaling = lambda x: x * jt.exp(-x * jt.array( [np.float32(np.log(2.0))])) # real_img = torch.autograd.Variable(real_img, requires_grad=True) real_img = init.constant(real_img.shape, 'float32', real_img) assert not real_img.is_stop_grad() real_logit = self.dis(real_img, height, alpha) # real_logit = apply_loss_scaling(torch.sum(real_logit)) # real_grads = torch.autograd.grad(outputs=real_logit, inputs=real_img, # grad_outputs=torch.ones(real_logit.size()).to(real_img.device), # create_graph=True, retain_graph=True)[0].view(real_img.size(0), -1) real_grads = jt.grad(real_logit, real_img).view(real_img.size(0), -1) # real_grads = undo_loss_scaling(real_grads) # r1_penalty = torch.sum(torch.mul(real_grads, real_grads)) r1_penalty = jt.sum(jt.multiply(real_grads, real_grads)) return r1_penalty
def execute(self, x): logits = [] bbox_reg = [] centerness = [] for l, feature in enumerate(x): if self.identity: shared_tower = self.shared_tower(feature) + feature else: shared_tower = self.shared_tower(feature) logits.append(self.cls_logits(shared_tower)) centerness.append(self.centerness(shared_tower)) bbox_reg.append( jt.exp(self.scales[l](self.bbox_pred(shared_tower)))) return logits, bbox_reg, centerness
def compute_mask_prob(self, proposal_embed, proposal_margin, pixel_embed): m_h, m_w = pixel_embed.shape[-2:] obj_num = proposal_embed.shape[0] pixel_embed = pixel_embed.transpose(1, 2, 0).unsqueeze(0).expand( obj_num, -1, -1, -1) proposal_embed = proposal_embed.view(obj_num, 1, 1, -1).expand(-1, m_h, m_w, -1) if self.fix_margin: proposal_margin = proposal_margin.new_ones(obj_num, m_h, m_w) * self.init_margin else: proposal_margin = proposal_margin.view(obj_num, 1, 1).expand(-1, m_h, m_w) mask_var = jt.sum((pixel_embed - proposal_embed).sqr(), dim=3) mask_prob = jt.exp(-mask_var * proposal_margin) return mask_prob
def integrator(raw, z_vals, rays_d, raw_noise_std=0, white_bkgd=False): """Transforms model's predictions to semantically meaningful values. Args: raw: [num_rays, num_samples along ray, 4]. Prediction from model. z_vals: [num_rays, num_samples along ray]. Integration time. rays_d: [num_rays, 3]. Direction of each ray. Returns: rgb_map: [num_rays, 3]. Estimated RGB color of a ray. disp_map: [num_rays]. Disparity map. Inverse of depth map. acc_map: [num_rays]. Sum of weights along each ray. weights: [num_rays, num_samples]. Weights assigned to each sampled color. depth_map: [num_rays]. Estimated distance to object. """ raw2alpha = lambda raw, dists, act_fn=jt.nn.relu: 1. - jt.exp(-act_fn(raw) * dists) dists = z_vals[..., 1:] - z_vals[..., :-1] dists = jt.concat([ dists, jt.array(np.array([1e10]).astype(np.float32)).expand( dists[..., :1].shape) ], -1) # [N_rays, N_samples] dists = dists * jt.norm(rays_d.unsqueeze(-2), p=2, dim=-1) rgb = jt.sigmoid(raw[..., :3]) # [N_rays, N_samples, 3] noise = 0. if raw_noise_std > 0.: noise = jt.init.gauss(raw[..., 3].shape, raw.dtype) * raw_noise_std alpha = raw2alpha(raw[..., 3] + noise, dists) # [N_rays, N_samples] weights = alpha * jt.cumprod( jt.concat([jt.ones( (alpha.shape[0], 1)), 1. - alpha + 1e-10], -1), -1)[:, :-1] rgb_map = jt.sum(weights.unsqueeze(-1) * rgb, -2) # [N_rays, 3] depth_map = jt.sum(weights * z_vals, -1) disp_map = 1. / jt.maximum(1e-10 * jt.ones_like(depth_map), depth_map / jt.sum(weights, -1)) acc_map = jt.sum(weights, -1) if white_bkgd: rgb_map = rgb_map + (1. - acc_map.unsqueeze(-1)) return rgb_map, disp_map, acc_map, weights, depth_map
def execute(self, x): return 1 / (1 + jt.exp(-x))
def cumprod(x,dim=0): x = jt.log(x) x = cumsum(x,dim=dim) return jt.exp(x)
def prod(x,dim=0): x = jt.log(x) x = x.sum(dim=dim) return jt.exp(x)
for (i, (real_imgs, _)) in enumerate(dataloader): batch_size = real_imgs.shape[0] g_target = (1 / (batch_size * 2)) d_target = (1 / batch_size) # ----------------- # Train Discriminator # ----------------- z = jt.array( np.random.normal( 0, 1, (real_imgs.shape[0], opt.latent_dim)).astype(np.float32)) gen_imgs = generator(z) d_real = discriminator(real_imgs) d_fake = discriminator(gen_imgs) Z = (jt.sum(jt.exp((-d_real))) + jt.sum(jt.exp((-d_fake)))) d_loss = ((d_target * jt.sum(d_real)) + log(Z)) optimizer_D.step(d_loss) # --------------------- # Train Generator # --------------------- g_loss = ((g_target * (jt.sum(d_real) + jt.sum(d_fake))) + log(Z)) optimizer_G.step(d_loss + g_loss) if warmup_times == -1: print(('[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]' % (epoch, opt.n_epochs, i, len(dataloader), d_loss.numpy()[0], g_loss.numpy()[0]))) batches_done = ((epoch * len(dataloader)) + i)
def execute(self, x) : return ((jt.exp (x) - jt.exp(-x)) / (jt.exp(x) + jt.exp (-x)))
def execute(self, x, locations, benchmark=False, timers=None): logits = [] bbox_reg = [] centerness = [] proposal_margin = [] proposal_embed = [] if benchmark and timers is not None: timers[2].tic() for l, feature in enumerate(x): cls_tower = self.cls_tower(feature) box_tower = self.bbox_tower(feature) logits.append(self.cls_logits(cls_tower)) if self.centerness_on_reg: centerness.append(self.centerness(box_tower)) else: centerness.append(self.centerness(cls_tower)) bbox_pred = self.scales[l](self.bbox_pred(box_tower)) if self.norm_reg_targets: bbox_pred = nn.relu(bbox_pred) if self.is_training(): bbox_reg.append(bbox_pred) else: bbox_reg.append(bbox_pred * self.fpn_strides[l]) else: bbox_reg.append(jt.exp(bbox_pred)) # ############### Mask Prediction ########### embed_x = box_tower h, w = embed_x.size()[-2:] proposal_spatial_embd = self.proposal_spatial_embed_pred(embed_x) proposal_other_embd = self.proposal_other_embed_pred(embed_x) coordinates = locations[l].transpose(1, 0).view( 2, h, w).unsqueeze(0).expand((embed_x.shape[0], 2, h, w)) scaled_coordinates = self.position_scale(coordinates) / 100.0 proposal_spatial_embd = scaled_coordinates + proposal_spatial_embd proposal_embed.append( jt.contrib.concat([proposal_spatial_embd, proposal_other_embd], dim=1)) #print(proposal_embed[0]) margin_x = box_tower / 32 proposal_margin.append(jt.exp(self.proposal_margin_pred(margin_x))) if benchmark and timers is not None: timers[2].toc() timers[3].tic() # pixel embedding mask_x = x[0] mask_x = self.mask_tower(mask_x) h, w = mask_x.size()[-2:] pixel_spatial_embd = self.pixel_spatial_embed_pred(mask_x) pixel_other_embd = self.pixel_other_embed_pred(mask_x) coordinates = locations[0].transpose(1, 0).view(2, h, w).unsqueeze(0).expand( (mask_x.shape[0], 2, h, w)) scaled_coordinates = self.position_scale(coordinates) / 100.0 pixel_spatial_embd = scaled_coordinates + pixel_spatial_embd pixel_embed = jt.contrib.concat([pixel_spatial_embd, pixel_other_embd], dim=1) if benchmark and timers is not None: timers[3].toc() return logits, bbox_reg, centerness, proposal_embed, proposal_margin, pixel_embed
def reparameterization(mu, logvar): std = jt.exp(logvar / 2) sampled_z = jt.array(np.random.normal( 0, 1, (mu.shape[0], opt.latent_dim))).float32() z = sampled_z * std + mu return z
def nonlinearDt(dt, c=3.3): nldt = (jt.exp(dt*c)-1) / (jt.exp(c)-1) return nldt