def load_obj(filename_obj, normalization=False, load_texture=False, texture_res=4, texture_type='surface'): """ Load Wavefront .obj file. This function only supports vertices (v x x x) and faces (f x x x). """ assert texture_type in ['surface', 'vertex'] # load vertices vertices = [] with open(filename_obj) as f: lines = f.readlines() for line in lines: if len(line.split()) == 0: continue if line.split()[0] == 'v': vertices.append([float(v) for v in line.split()[1:4]]) vertices = jt.array(np.vstack(vertices)).float32() # load faces faces = [] for line in lines: if len(line.split()) == 0: continue if line.split()[0] == 'f': vs = line.split()[1:] nv = len(vs) v0 = int(vs[0].split('/')[0]) for i in range(nv - 2): v1 = int(vs[i + 1].split('/')[0]) v2 = int(vs[i + 2].split('/')[0]) faces.append((v0, v1, v2)) faces = jt.array(np.vstack(faces).astype(np.int32)).float32() - 1 # load textures if load_texture and texture_type == 'surface': textures = None for line in lines: if line.startswith('mtllib'): filename_mtl = os.path.join(os.path.dirname(filename_obj), line.split()[1]) textures = load_textures(filename_obj, filename_mtl, texture_res) if textures is None: raise Exception('Failed to load textures.') elif load_texture and texture_type == 'vertex': textures = [] for line in lines: if len(line.split()) == 0: continue if line.split()[0] == 'v': textures.append([float(v) for v in line.split()[4:7]]) textures = jt.array(np.vstack(textures)).float() # normalize into a unit cube centered zero if normalization: vertices -= vertices.min(0) vertices /= jt.abs(vertices).max() vertices *= 2 vertices -= vertices.max(0) / 2 if load_texture: return vertices, faces, textures else: return vertices, faces
def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): diff = jt.abs(input - target) less_than_one = (diff<1.0).float32() loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5) if size_average: return loss.mean() return loss.sum()
def l2_loss(input, target): """ very similar to the smooth_l1_loss , but with the extra beta parameter """ pos_inds = jt.nonzero(target > 0.0).squeeze(1) if pos_inds.shape[0] > 0: cond = jt.abs(input[pos_inds] - target[pos_inds]) loss = 0.5 * cond**2 / pos_inds.shape[0] else: loss = input * 0.0 return loss.sum()
def test_unary_op(self): assert jt.float64(1).data.dtype == "float64" assert (jt.abs(-1) == 1).data.all() assert (abs(-jt.float64(1)) == 1).data.all() a = np.array([-1,2,3,0]) check("abs", a) check("negative", a) check("logical_not", a) check("bitwise_not", a) b = np.array([1.1, 2.2, 3.3, 4.4, -1, 0]) check("log", a) check("exp", a) check("sqrt", a)
def execute(self, pred, true): loss = self.loss_fcn(pred, true) pred_prob = jt.sigmoid(pred) # prob from logits alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) modulating_factor = jt.abs(true - pred_prob)**self.gamma loss *= alpha_factor * modulating_factor if self.reduction == 'mean': return loss.mean() elif self.reduction == 'sum': return loss.sum() else: # 'none' return loss
def smooth_l1_loss(y_true, y_pred,reduction="mean"): """Implements Smooth-L1 loss. y_true and y_pred are typically: [N, 4], but could be any shape. Args: y_true - ground truth y_pred - predictions reduction - the mode of cal loss which must be in ['mean','sum','none'] """ diff = jt.abs(y_true - y_pred) less_than_one = (diff<1.0).float32() loss = (less_than_one * 0.5 * diff.sqr()) + (1 - less_than_one) * (diff - 0.5) if reduction=="mean": return loss.mean() elif reduction=="sum": return loss.sum() elif reduction=="none": return loss else: raise ValueError(f'not support {reduction}')
def execute(self, input, target): ret = jt.abs(input - target) if self.reduction != None: ret = jt.mean(ret) if self.reduction == 'mean' else jt.sum(ret) return ret
for i, (imgs, _) in enumerate(train_loader): # Configure input real_imgs = jt.array(imgs) # ----------------- # Train Generator # ----------------- # Sample noise as generator input z = jt.array(np.random.normal(0, 1, (imgs.shape[0], opt.latent_dim))).float32() # Generate a batch of images gen_imgs = generator(z) # Loss measures generator's ability to fool the discriminator g_loss = jt.mean(jt.abs(discriminator(gen_imgs) - gen_imgs)) optimizer_G.step(g_loss) # --------------------- # Train Discriminator # --------------------- # Measure discriminator's ability to classify real from generated samples d_real = discriminator(real_imgs) d_fake = discriminator(gen_imgs.stop_grad()) d_loss_real = jt.mean(jt.abs(d_real - real_imgs)) d_loss_fake = jt.mean(jt.abs(d_fake - gen_imgs.stop_grad())) d_loss = d_loss_real - k * d_loss_fake optimizer_D.step(d_loss)
def execute(self, net, predictions, targets, masks, num_crowds): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, mask preds, and prior boxes from SSD net. loc shape: jt.size(batch_size,num_priors,4) conf shape: jt.size(batch_size,num_priors,num_classes) masks shape: jt.size(batch_size,num_priors,mask_dim) priors shape: jt.size(num_priors,4) proto* shape: jt.size(batch_size,mask_h,mask_w,mask_dim) targets (list<tensor>): Ground truth boxes and labels for a batch, shape: [batch_size][num_objs,5] (last idx is the label). masks (list<tensor>): Ground truth masks for each object in each image, shape: [batch_size][num_objs,im_height,im_width] num_crowds (list<int>): Number of crowd annotations per batch. The crowd annotations should be the last num_crowds elements of targets and masks. * Only if mask_type == lincomb """ loc_data = predictions['loc'] conf_data = predictions['conf'] mask_data = predictions['mask'] priors = predictions['priors'] if cfg.mask_type == mask_type.lincomb: proto_data = predictions['proto'] score_data = predictions['score'] if cfg.use_mask_scoring else None inst_data = predictions['inst'] if cfg.use_instance_coeff else None labels = [None] * len(targets) # Used in sem segm loss batch_size = loc_data.shape[0] num_priors = priors.shape[0] num_classes = self.num_classes # Match priors (default boxes) and ground truth boxes # These tensors will be created with the same device as loc_data loc_t = jt.empty((batch_size, num_priors, 4),dtype=loc_data.dtype) gt_box_t = jt.empty((batch_size, num_priors, 4),dtype=loc_data.dtype) conf_t = jt.empty((batch_size, num_priors)).int32() idx_t = jt.empty((batch_size, num_priors)).int32() if cfg.use_class_existence_loss: class_existence_t = jt.empty((batch_size, num_classes-1),dtype=loc_data.dtype) # jt.sync(list(predictions.values())) for idx in range(batch_size): truths = targets[idx][:, :-1] labels[idx] = targets[idx][:, -1].int32() if cfg.use_class_existence_loss: # Construct a one-hot vector for each object and collapse it into an existence vector with max # Also it's fine to include the crowd annotations here class_existence_t[idx,:] = jt.eye(num_classes-1)[labels[idx]].max(dim=0)[0] # Split the crowd annotations because they come bundled in cur_crowds = num_crowds[idx] if cur_crowds > 0: split = lambda x: (x[-cur_crowds:], x[:-cur_crowds]) crowd_boxes, truths = split(truths) # We don't use the crowd labels or masks _, labels[idx] = split(labels[idx]) _, masks[idx] = split(masks[idx]) else: crowd_boxes = None match(self.pos_threshold, self.neg_threshold, truths, priors, labels[idx], crowd_boxes, loc_t, conf_t, idx_t, idx, loc_data[idx]) gt_box_t[idx,:,:] = truths[idx_t[idx]] # wrap targets loc_t.stop_grad() conf_t.stop_grad() idx_t.stop_grad() pos = conf_t > 0 num_pos = pos.sum(dim=1, keepdims=True) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.ndim).expand_as(loc_data) losses = {} # Localization Loss (Smooth L1) if cfg.train_boxes: loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) # print(loc_t) losses['B'] = nn.smooth_l1_loss(loc_p, loc_t, reduction='sum') * cfg.bbox_alpha if cfg.train_masks: if cfg.mask_type == mask_type.direct: if cfg.use_gt_bboxes: pos_masks = [] for idx in range(batch_size): pos_masks.append(masks[idx][idx_t[idx, pos[idx]]]) masks_t = jt.contrib.concat(pos_masks, 0) masks_p = mask_data[pos, :].view(-1, cfg.mask_dim) losses['M'] = nn.bce_loss(jt.clamp(masks_p, 0, 1), masks_t, size_average=False) * cfg.mask_alpha else: losses['M'] = self.direct_mask_loss(pos_idx, idx_t, loc_data, mask_data, priors, masks) elif cfg.mask_type == mask_type.lincomb: ret = self.lincomb_mask_loss(pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, inst_data, labels) if cfg.use_maskiou: loss, maskiou_targets = ret else: loss = ret losses.update(loss) if cfg.mask_proto_loss is not None: if cfg.mask_proto_loss == 'l1': losses['P'] = jt.mean(jt.abs(proto_data)) / self.l1_expected_area * self.l1_alpha elif cfg.mask_proto_loss == 'disj': losses['P'] = -jt.mean(jt.max(nn.log_softmax(proto_data, dim=-1), dim=-1)[0]) # Confidence loss if cfg.use_focal_loss: if cfg.use_sigmoid_focal_loss: losses['C'] = self.focal_conf_sigmoid_loss(conf_data, conf_t) elif cfg.use_objectness_score: losses['C'] = self.focal_conf_objectness_loss(conf_data, conf_t) else: losses['C'] = self.focal_conf_loss(conf_data, conf_t) else: if cfg.use_objectness_score: losses['C'] = self.conf_objectness_loss(conf_data, conf_t, batch_size, loc_p, loc_t, priors) else: losses['C'] = self.ohem_conf_loss(conf_data, conf_t, pos, batch_size) # Mask IoU Loss if cfg.use_maskiou and maskiou_targets is not None: losses['I'] = self.mask_iou_loss(net, maskiou_targets) # These losses also don't depend on anchors if cfg.use_class_existence_loss: losses['E'] = self.class_existence_loss(predictions['classes'], class_existence_t) if cfg.use_semantic_segmentation_loss: losses['S'] = self.semantic_segmentation_loss(predictions['segm'], masks, labels) # Divide all losses by the number of positives. # Don't do it for loss[P] because that doesn't depend on the anchors. total_num_pos = num_pos.sum().float() for k in losses: if k not in ('P', 'E', 'S'): losses[k] /= total_num_pos else: losses[k] /= batch_size # Loss Key: # - B: Box Localization Loss # - C: Class Confidence Loss # - M: Mask Loss # - P: Prototype Loss # - D: Coefficient Diversity Loss # - E: Class Existence Loss # - S: Semantic Segmentation Loss return losses
def L1(self, A, B): diff = A.get_mask_tensor() - B.get_mask_tensor() diff = jt.sum(jt.abs(diff.float())).item() return diff