def direct_mask_loss(self, pos_idx, idx_t, loc_data, mask_data, priors, masks): """ Crops the gt masks using the predicted bboxes, scales them down, and outputs the BCE loss. """ loss_m = 0 for idx in range(mask_data.shape[0]): with jt.no_grad(): cur_pos_idx = pos_idx[idx] cur_pos_idx_squeezed = cur_pos_idx[:, 1] # Shape: [num_priors, 4], decoded predicted bboxes pos_bboxes = decode(loc_data[idx], priors.data, cfg.use_yolo_regressors) pos_bboxes = pos_bboxes[cur_pos_idx].view(-1, 4).clamp(0, 1) pos_lookup = idx_t[idx, cur_pos_idx_squeezed] cur_masks = masks[idx] pos_masks = cur_masks[pos_lookup] # Convert bboxes to absolute coordinates num_pos, img_height, img_width = pos_masks.shape # Take care of all the bad behavior that can be caused by out of bounds coordinates x1, x2 = sanitize_coordinates(pos_bboxes[:, 0], pos_bboxes[:, 2], img_width) y1, y2 = sanitize_coordinates(pos_bboxes[:, 1], pos_bboxes[:, 3], img_height) # Crop each gt mask with the predicted bbox and rescale to the predicted mask size # Note that each bounding box crop is a different size so I don't think we can vectorize this scaled_masks = [] for jdx in range(num_pos): tmp_mask = pos_masks[jdx, y1[jdx]:y2[jdx], x1[jdx]:x2[jdx]] # Restore any dimensions we've left out because our bbox was 1px wide while tmp_mask.ndim < 2: tmp_mask = tmp_mask.unsqueeze(0) new_mask = nn.AdaptiveAvgPool2d(cfg.mask_size)( tmp_mask.unsqueeze(0)) scaled_masks.append(new_mask.view(1, -1)) mask_t = (jt.contrib.concat(scaled_masks, 0) > 0.5).float() # Threshold downsampled mask pos_mask_data = mask_data[idx, cur_pos_idx_squeezed, :] loss_m += nn.bce_loss(jt.clamp(pos_mask_data, 0, 1), mask_t, size_average=False) * cfg.mask_alpha return loss_m
def __init__(self, probs=None, logits=None): assert not (probs is None and logits is None) if probs is None: # cannot align to pytorch probs = jt.sigmoid(logits) probs = probs / probs.sum(-1, True) if logits is None: logits = jt.safe_log(probs) with jt.no_grad(): self.probs = probs self.logits = logits self.cum_probs = simple_presum(self.probs) self.cum_probs_l = self.cum_probs[..., :-1] self.cum_probs_r = self.cum_probs[..., 1:]
def conf_objectness_loss(self, conf_data, conf_t, batch_size, loc_p, loc_t, priors): """ Instead of using softmax, use class[0] to be p(obj) * p(IoU) as in YOLO. Then for the rest of the classes, softmax them and apply CE for only the positive examples. """ conf_t = conf_t.view(-1) # [batch_size*num_priors] conf_data = conf_data.view( -1, conf_data.shape[-1]) # [batch_size*num_priors, num_classes] pos_mask = (conf_t > 0) neg_mask = (conf_t == 0) obj_data = conf_data[:, 0] obj_data_pos = obj_data[pos_mask] obj_data_neg = obj_data[neg_mask] # Don't be confused, this is just binary cross entropy similified obj_neg_loss = -nn.log_sigmoid(-obj_data_neg).sum() with jt.no_grad(): pos_priors = priors.unsqueeze(0).expand(batch_size, -1, -1).reshape(-1, 4)[pos_mask] boxes_pred = decode(loc_p, pos_priors, cfg.use_yolo_regressors) boxes_targ = decode(loc_t, pos_priors, cfg.use_yolo_regressors) iou_targets = elemwise_box_iou(boxes_pred, boxes_targ) obj_pos_loss = -iou_targets * nn.log_sigmoid(obj_data_pos) - ( 1 - iou_targets) * nn.log_sigmoid(-obj_data_pos) obj_pos_loss = obj_pos_loss.sum() # All that was the objectiveness loss--now time for the class confidence loss conf_data_pos = ( conf_data[:, 1:])[pos_mask] # Now this has just 80 classes conf_t_pos = conf_t[pos_mask] - 1 # So subtract 1 here class_loss = nn.cross_entropy_loss(conf_data_pos, conf_t_pos, size_average=False) return cfg.conf_alpha * (class_loss + obj_pos_loss + obj_neg_loss)
def prepare_data(datum, allocation: list = None): with jt.no_grad(): if allocation is None: allocation = [] allocation.append(args.batch_size - sum(allocation)) # The rest might need more/less images, (targets, masks, num_crowds) = datum cur_idx = 0 for alloc in allocation: for _ in range(alloc): images[cur_idx] = gradinator(images[cur_idx]) targets[cur_idx] = gradinator(targets[cur_idx]) masks[cur_idx] = gradinator(masks[cur_idx]) cur_idx += 1 if cfg.preserve_aspect_ratio: # Choose a random size from the batch _, h, w = images[random.randint(0, len(images) - 1)].shape for idx, (image, target, mask, num_crowd) in enumerate( zip(images, targets, masks, num_crowds)): images[idx], targets[idx], masks[idx], num_crowds[idx] \ = enforce_size(image, target, mask, num_crowd, w, h) cur_idx = 0 split_images, split_targets, split_masks, split_numcrowds \ = [[None for alloc in allocation] for _ in range(4)] for device_idx, alloc in enumerate(allocation): split_images[device_idx] = jt.stack(images[cur_idx:cur_idx + alloc], dim=0) split_targets[device_idx] = targets[cur_idx:cur_idx + alloc] split_masks[device_idx] = masks[cur_idx:cur_idx + alloc] split_numcrowds[device_idx] = num_crowds[cur_idx:cur_idx + alloc] cur_idx += alloc return split_images[0], split_targets[0], split_masks[ 0], split_numcrowds[0]
def _forward_train(self, anchors, objectness, rpn_box_regression, targets): if self.cfg.MODEL.RPN_ONLY: # When training an RPN-only model, the loss is determined by the # predicted objectness and rpn_box_regression values and there is # no need to transform the anchors into predicted boxes; this is an # optimization that avoids the unnecessary transformation. boxes = anchors else: # For end-to-end models, anchors must be transformed into boxes and # sampled into a training batch. with jt.no_grad(): boxes = self.box_selector_train(anchors, objectness, rpn_box_regression, targets) loss_objectness, loss_rpn_box_reg = self.loss_evaluator( anchors, objectness, rpn_box_regression, targets) losses = { "loss_objectness": loss_objectness, "loss_rpn_box_reg": loss_rpn_box_reg, } return boxes, losses
def enforce_size(img, targets, masks, num_crowds, new_w, new_h): """ Ensures that the image is the given size without distorting aspect ratio. """ with jt.no_grad(): _, h, w = img.size() if h == new_h and w == new_w: return img, targets, masks, num_crowds # Resize the image so that it fits within new_w, new_h w_prime = new_w h_prime = h * new_w / w if h_prime > new_h: w_prime *= new_h / h_prime h_prime = new_h w_prime = int(w_prime) h_prime = int(h_prime) # Do all the resizing img = nn.interpolate(img.unsqueeze(0), (h_prime, w_prime), mode='bilinear', align_corners=False) img.squeeze_(0) # Act like each object is a color channel masks = nn.interpolate(masks.unsqueeze(0), (h_prime, w_prime), mode='bilinear', align_corners=False) masks.squeeze_(0) # Scale bounding boxes (this will put them in the top left corner in the case of padding) targets[:, [0, 2]] *= (w_prime / new_w) targets[:, [1, 3]] *= (h_prime / new_h) # Finally, pad everything to be the new_w, new_h pad_dims = (0, new_w - w_prime, 0, new_h - h_prime) img = F.pad(img, pad_dims, mode='constant', value=0) masks = F.pad(masks, pad_dims, mode='constant', value=0) return img, targets, masks, num_crowds
def execute(self, features, proposals, targets=None): """ Arguments: features (list[Tensor]): feature-maps from possibly several levels proposals (list[BoxList]): proposal boxes targets (list[BoxList], optional): the ground-truth targets. Returns: x (Tensor): the result of the feature extractor proposals (list[BoxList]): during training, the subsampled proposals are returned. During testing, the predicted boxlists are returned losses (dict[Tensor]): During training, returns the losses for the head. During testing, returns an empty dict. """ if self.is_training(): # Faster R-CNN subsamples during training the proposals with a fixed # positive / negative ratio with jt.no_grad(): proposals = self.loss_evaluator.subsample(proposals, targets) # extract features that will be fed to the final classifier. The # feature_extractor generally corresponds to the pooler + heads x = self.feature_extractor(features, proposals) # final classifier that converts the features into predictions class_logits, box_regression = self.predictor(x) if not self.is_training(): result = self.post_processor((class_logits, box_regression), proposals) return x, result, {} loss_classifier, loss_box_reg = self.loss_evaluator([class_logits], [box_regression]) return ( x, proposals, dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg), )
def compute_validation_map(epoch, iteration, yolact_net, dataset, log: Log = None): with jt.no_grad(): yolact_net.eval() start = time.time() print() print("Computing validation mAP (this may take a while)..", flush=True) val_info = eval_script.evaluate(yolact_net, dataset, train_mode=True) end = time.time() if log is not None: log.log('val', val_info, elapsed=(end - start), epoch=epoch, iter=iteration) yolact_net.train()
def semantic_segmentation_loss(self, segment_data, mask_t, class_t, interpolation_mode='bilinear'): # Note num_classes here is without the background class so cfg.num_classes-1 batch_size, num_classes, mask_h, mask_w = segment_data.shape loss_s = 0 for idx in range(batch_size): cur_segment = segment_data[idx] cur_class_t = class_t[idx] with jt.no_grad(): downsampled_masks = nn.interpolate(mask_t[idx].unsqueeze(0), (mask_h, mask_w), mode=interpolation_mode, align_corners=False).squeeze(0) downsampled_masks = (downsampled_masks>0.5).float() # Construct Semantic Segmentation segment_t = jt.zeros_like(cur_segment) segment_t.stop_grad() for obj_idx in range(downsampled_masks.shape[0]): segment_t[cur_class_t[obj_idx]] = jt.maximum(segment_t[cur_class_t[obj_idx]], downsampled_masks[obj_idx]) loss_s += nn.BCEWithLogitsLoss(size_average=False)(cur_segment, segment_t) return loss_s / mask_h / mask_w * cfg.semantic_segmentation_alpha
def compute_on_dataset(model, data_loader, bbox_aug, timer=None): model.eval() results_dict = {} data_loader.is_train=False data_loader.num_workers = 4 start_time = 0 import cProfile as profiler for i, batch in enumerate(tqdm(data_loader)): if i==20: # For fair comparison,remove jittor compiling time start_time = time.time() # jt.profiler.start() with nvtx_scope("preprocess"): images, image_sizes, image_ids = batch # images = ImageList(jt.array(images),image_sizes) with nvtx_scope("model"): with jt.no_grad(): if timer: timer.tic() if bbox_aug: output = im_detect_bbox_aug(model, images) else: output = model(images) if timer: timer.toc() with nvtx_scope("detach"): output = detach_output(output) results_dict.update( {img_id: result for img_id, result in zip(image_ids, output)} ) end_time = time.time() print('fps',(5000-20*data_loader.batch_size)/(end_time-start_time)) return results_dict
def sample(self, n): shape = (n, ) + self.loc.shape with jt.no_grad(): eps = jt.randn(shape) return self.loc + self.scale * eps
def validate(): bs = 256 # create model model = create_model('vit_base_patch16_224', pretrained=True, num_classes=1000) criterion = nn.CrossEntropyLoss() dataset = create_val_dataset(root='/data/imagenet', batch_size=bs, num_workers=4, img_size=224) batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() with jt.no_grad(): input = jt.random((bs, 3, 224, 224)) model(input) end = time.time() for batch_idx, (input, target) in enumerate(dataset): # dataset.display_worker_status() batch_size = input.shape[0] # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss, batch_size) top1.update(acc1, batch_size) top5.update(acc5, batch_size) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % 10 == 0: # jt.sync_all(True) print( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' 'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) ' 'Acc@1: {top1.val:>7.3f} ({top1.avg:>7.3f}) ' 'Acc@5: {top5.val:>7.3f} ({top5.avg:>7.3f})'.format( batch_idx, len(dataset), batch_time=batch_time, rate_avg=batch_size / batch_time.avg, loss=losses, top1=top1, top5=top5)) # if batch_idx>50:break top1a, top5a = top1.avg, top5.avg top1 = round(top1a, 4) top1_err = round(100 - top1a, 4) top5 = round(top5a, 4) top5_err = round(100 - top5a, 4) print(' * Acc@1 {:.3f} ({:.3f}) Acc@5 {:.3f} ({:.3f})'.format( top1, top1_err, top5, top5_err))
def compute_on_dataset(model, data_loader, bbox_aug, timer=None): model.eval() results_dict = {} data_loader.is_train = False data_loader.num_workers = 4 start_time = 0 # jt.profiler.start(0, 0) for i, batch in enumerate(tqdm(data_loader)): # data_loader.display_worker_status() #if i<125:continue #jt.sync_all() #print(1,time.asctime()) #jt.display_memory_info() #if i<187:continue # if i>50:break # if i==0:continue if i == 20: # For fair comparison,remove jittor compiling time start_time = time.time() # with nvtx_scope("preprocess"): # images, targets, image_ids = batch # new_targets = [] # new_images = [] # # transforms= data_loader._transforms # for image,target in zip(images,targets): # # print(target.bbox) # # print(target.get_field('labels')) # labels = target.get_field('labels') # labels = jt.array(labels) # # print(labels) # target.add_field('labels',labels) # target.to_jittor() # target = target.convert('xyxy') # if target.has_field('masks'): # target.get_field('masks').to_jittor() # target = target.clip_to_image(remove_empty=True) # # with nvtx_scope("transforms"): # # if transforms is not None: # # image,target = transforms(image,target) # new_images.append(jt.array(image)) # new_targets.append(target) # images = to_image_list(new_images,data_loader.collate_batch.size_divisible) # targets = new_targets # images.tensors = images.tensors.float32() # with nvtx_scope("preprocess"): # images, image_sizes, image_ids = batch # images = ImageList(images,image_sizes) # # print('Model!!!!') # with nvtx_scope("model"): # with jt.no_grad(): # if timer: # timer.tic() # if bbox_aug: # output = im_detect_bbox_aug(model, images) # else: # output = model(images) # if timer: # timer.toc() # # print('Model Finished') # # jt.sync_all(True) # with nvtx_scope("get_data"): # output = detach_output(output) # results_dict.update( # {img_id: result for img_id, result in zip(image_ids, output)} # ) # #jt.sync_all() # #print(7,time.asctime()) # #jt.fetch(image_ids, output, lambda image_ids, output: \ # # results_dict.update( # # {img_id: result for img_id, result in zip(image_ids, output)} # # ) # #) images, image_sizes, image_ids = batch images = ImageList(jt.array(images), image_sizes) # print(images.tensors.mean(),images.tensors.shape,image_sizes) # print(image_ids) # images = to_image_list(images,data_loader.collate_batch.size_divisible) # images.tensors = images.tensors.float32() with jt.no_grad(): if timer: timer.tic() if bbox_aug: output = im_detect_bbox_aug(model, images) else: output = model(images) if timer: timer.toc() # jt.sync_all(True) output = detach_output(output) results_dict.update( {img_id: result for img_id, result in zip(image_ids, output)}) end_time = time.time() print('fps', (5000 - 20 * data_loader.batch_size) / (end_time - start_time)) #jt.sync_all() # jt.profiler.stop() # jt.profiler.report() return results_dict
def postprocess(det_output, w, h, batch_idx=0, interpolation_mode='bilinear', visualize_lincomb=False, crop_masks=True, score_threshold=0): """ Postprocesses the output of Yolact on testing mode into a format that makes sense, accounting for all the possible configuration settings. Args: - det_output: The lost of dicts that Detect outputs. - w: The real with of the image. - h: The real height of the image. - batch_idx: If you have multiple images for this batch, the image's index in the batch. - interpolation_mode: Can be 'nearest' | 'area' | 'bilinear' (see jt.nn.functional.interpolate) Returns 4 jt Tensors (in the following order): - classes [num_det]: The class idx for each detection. - scores [num_det]: The confidence score for each detection. - boxes [num_det, 4]: The bounding box for each detection in absolute point form. - masks [num_det, h, w]: Full image masks for each detection. """ dets = det_output[batch_idx] net = dets['net'] dets = dets['detection'] if dets is None: return [jt.array([]) ] * 4 # Warning, this is 4 copies of the same thing if score_threshold > 0: keep = dets['score'] > score_threshold for k in dets: if k != 'proto': dets[k] = dets[k][keep] if dets['score'].shape[0] == 0: return [jt.array([])] * 4 # Actually extract everything from dets now classes = dets['class'] boxes = dets['box'] scores = dets['score'] masks = dets['mask'] if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch: # At this points masks is only the coefficients proto_data = dets['proto'] # Test flag, do not upvote if cfg.mask_proto_debug: np.save('scripts/proto.npy', proto_data.numpy()) if visualize_lincomb: display_lincomb(proto_data, masks) masks = jt.matmul(proto_data, masks.transpose(1, 0)) masks = cfg.mask_proto_mask_activation(masks) # Crop masks before upsampling because you know why if crop_masks: masks = crop(masks, boxes) # Permute into the correct output shape [num_dets, proto_h, proto_w] masks = masks.permute(2, 0, 1) if cfg.use_maskiou: with timer.env('maskiou_net'): with jt.no_grad(): maskiou_p = net.maskiou_net(masks.unsqueeze(1)) maskiou_p = jt.gather( maskiou_p, dim=1, index=classes.unsqueeze(1)).squeeze(1) if cfg.rescore_mask: if cfg.rescore_bbox: scores = scores * maskiou_p else: scores = [scores, scores * maskiou_p] # Scale masks up to the full image masks = nn.interpolate(masks.unsqueeze(0), (h, w), mode=interpolation_mode, align_corners=False).squeeze(0) # Binarize the masks masks = masks > 0.5 boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, cast=False) boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, cast=False) boxes = boxes.int32() if cfg.mask_type == mask_type.direct and cfg.eval_mask_branch: # Upscale masks full_masks = jt.zeros(masks.shape[0], h, w) for jdx in range(masks.shape[0]): x1, y1, x2, y2 = boxes[jdx] mask_w = x2 - x1 mask_h = y2 - y1 # Just in case if mask_w * mask_h <= 0 or mask_w < 0: continue mask = masks[jdx].view(1, 1, cfg.mask_size, cfg.mask_size) mask = nn.interpolate(mask, (mask_h, mask_w), mode=interpolation_mode, align_corners=False) mask = (mask > 0.5).float() full_masks[jdx, y1:y2, x1:x2] = mask masks = full_masks return classes, scores, boxes, masks
def train(): parser = config_parser() args = parser.parse_args() # Load data intrinsic = None if args.dataset_type == 'llff': images, poses, bds, render_poses, i_test = load_llff_data( args.datadir, args.factor, recenter=True, bd_factor=.75, spherify=args.spherify) hwf = poses[0, :3, -1] poses = poses[:, :3, :4] print('Loaded llff', images.shape, render_poses.shape, hwf, args.datadir) if not isinstance(i_test, list): i_test = [i_test] if args.llffhold > 0: print('Auto LLFF holdout,', args.llffhold) i_test = np.arange(images.shape[0])[::args.llffhold] i_val = i_test i_train = np.array([ i for i in np.arange(int(images.shape[0])) if (i not in i_test and i not in i_val) ]) print('DEFINING BOUNDS') if args.no_ndc: near = np.ndarray.min(bds) * .9 far = np.ndarray.max(bds) * 1. else: near = 0. far = 1. print('NEAR FAR', near, far) elif args.dataset_type == 'blender': testskip = args.testskip faketestskip = args.faketestskip if jt.mpi and jt.mpi.local_rank() != 0: testskip = faketestskip faketestskip = 1 if args.do_intrinsic: images, poses, intrinsic, render_poses, hwf, i_split = load_blender_data( args.datadir, args.half_res, args.testskip, args.blender_factor, True) else: images, poses, render_poses, hwf, i_split = load_blender_data( args.datadir, args.half_res, args.testskip, args.blender_factor) print('Loaded blender', images.shape, render_poses.shape, hwf, args.datadir) i_train, i_val, i_test = i_split i_test_tot = i_test i_test = i_test[::args.faketestskip] near = args.near far = args.far print(args.do_intrinsic) print("hwf", hwf) print("near", near) print("far", far) if args.white_bkgd: images = images[..., :3] * images[..., -1:] + (1. - images[..., -1:]) else: images = images[..., :3] elif args.dataset_type == 'deepvoxels': images, poses, render_poses, hwf, i_split = load_dv_data( scene=args.shape, basedir=args.datadir, testskip=args.testskip) print('Loaded deepvoxels', images.shape, render_poses.shape, hwf, args.datadir) i_train, i_val, i_test = i_split hemi_R = np.mean(np.linalg.norm(poses[:, :3, -1], axis=-1)) near = hemi_R - 1. far = hemi_R + 1. else: print('Unknown dataset type', args.dataset_type, 'exiting') return # Cast intrinsics to right types H, W, focal = hwf H, W = int(H), int(W) hwf = [H, W, focal] render_poses = np.array(poses[i_test]) # Create log dir and copy the config file basedir = args.basedir expname = args.expname os.makedirs(os.path.join(basedir, expname), exist_ok=True) f = os.path.join(basedir, expname, 'args.txt') with open(f, 'w') as file: for arg in sorted(vars(args)): attr = getattr(args, arg) file.write('{} = {}\n'.format(arg, attr)) if args.config is not None: f = os.path.join(basedir, expname, 'config.txt') with open(f, 'w') as file: file.write(open(args.config, 'r').read()) # Create nerf model render_kwargs_train, render_kwargs_test, start, grad_vars, optimizer = create_nerf( args) global_step = start bds_dict = { 'near': near, 'far': far, } render_kwargs_train.update(bds_dict) render_kwargs_test.update(bds_dict) # Move testing data to GPU render_poses = jt.array(render_poses) # Short circuit if only rendering out from trained model if args.render_only: print('RENDER ONLY') with jt.no_grad(): testsavedir = os.path.join( basedir, expname, 'renderonly_{}_{:06d}'.format( 'test' if args.render_test else 'path', start)) os.makedirs(testsavedir, exist_ok=True) print('test poses shape', render_poses.shape) rgbs, _ = render_path(render_poses, hwf, args.chunk, render_kwargs_test, savedir=testsavedir, render_factor=args.render_factor) print('Done rendering', testsavedir) imageio.mimwrite(os.path.join(testsavedir, 'video.mp4'), to8b(rgbs), fps=30, quality=8) return # Prepare raybatch tensor if batching random rays accumulation_steps = 1 N_rand = args.N_rand // accumulation_steps use_batching = not args.no_batching if use_batching: # For random ray batching print('get rays') rays = np.stack( [get_rays_np(H, W, focal, p) for p in poses[:, :3, :4]], 0) # [N, ro+rd, H, W, 3] print('done, concats') rays_rgb = np.concatenate([rays, images[:, None]], 1) # [N, ro+rd+rgb, H, W, 3] rays_rgb = np.transpose(rays_rgb, [0, 2, 3, 1, 4]) # [N, H, W, ro+rd+rgb, 3] rays_rgb = np.stack([rays_rgb[i] for i in i_train], 0) # train images only rays_rgb = np.reshape(rays_rgb, [-1, 3, 3]) # [(N-1)*H*W, ro+rd+rgb, 3] rays_rgb = rays_rgb.astype(np.float32) print('shuffle rays') np.random.shuffle(rays_rgb) print('done') i_batch = 0 # Move training data to GPU images = jt.array(images.astype(np.float32)) poses = jt.array(poses) if use_batching: rays_rgb = jt.array(rays_rgb) N_iters = 51000 print('Begin') print('TRAIN views are', i_train) print('TEST views are', i_test) print('VAL views are', i_val) # Summary writers # writer = SummaryWriter(os.path.join(basedir, 'summaries', expname)) if not jt.mpi or jt.mpi.local_rank() == 0: date = str(datetime.datetime.now()) date = date[:date.rfind(":")].replace("-", "")\ .replace(":", "")\ .replace(" ", "_") gpu_idx = os.environ.get("CUDA_VISIBLE_DEVICES", "0") log_dir = os.path.join("./logs", "summaries", "log_" + date + "_gpu" + gpu_idx) if not os.path.exists(log_dir): os.makedirs(log_dir) writer = SummaryWriter(log_dir=log_dir) start = start + 1 for i in trange(start, N_iters): # jt.display_memory_info() time0 = time.time() # Sample random ray batch if use_batching: # Random over all images batch = rays_rgb[i_batch:i_batch + N_rand] # [B, 2+1, 3*?] batch = jt.transpose(batch, (1, 0, 2)) batch_rays, target_s = batch[:2], batch[2] i_batch += N_rand if i_batch >= rays_rgb.shape[0]: print("Shuffle data after an epoch!") rand_idx = jt.randperm(rays_rgb.shape[0]) rays_rgb = rays_rgb[rand_idx] i_batch = 0 else: # Random from one image np.random.seed(i) img_i = np.random.choice(i_train) target = images[img_i] #.squeeze(0) pose = poses[img_i, :3, :4] #.squeeze(0) if N_rand is not None: rays_o, rays_d = pinhole_get_rays( H, W, focal, pose, intrinsic) # (H, W, 3), (H, W, 3) if i < args.precrop_iters: dH = int(H // 2 * args.precrop_frac) dW = int(W // 2 * args.precrop_frac) coords = jt.stack( jt.meshgrid( jt.linspace(H // 2 - dH, H // 2 + dH - 1, 2 * dH), jt.linspace(W // 2 - dW, W // 2 + dW - 1, 2 * dW)), -1) if i == start: print( f"[Config] Center cropping of size {2*dH} x {2*dW} is enabled until iter {args.precrop_iters}" ) else: coords = jt.stack( jt.meshgrid(jt.linspace(0, H - 1, H), jt.linspace(0, W - 1, W)), -1) # (H, W, 2) coords = jt.reshape(coords, [-1, 2]) # (H * W, 2) select_inds = np.random.choice(coords.shape[0], size=[N_rand], replace=False) # (N_rand,) select_coords = coords[select_inds].int() # (N_rand, 2) rays_o = rays_o[select_coords[:, 0], select_coords[:, 1]] # (N_rand, 3) rays_d = rays_d[select_coords[:, 0], select_coords[:, 1]] # (N_rand, 3) batch_rays = jt.stack([rays_o, rays_d], 0) target_s = target[select_coords[:, 0], select_coords[:, 1]] # (N_rand, 3) ##### Core optimization loop ##### rgb, disp, acc, extras = render(H, W, focal, chunk=args.chunk, rays=batch_rays, verbose=i < 10, retraw=True, **render_kwargs_train) img_loss = img2mse(rgb, target_s) trans = extras['raw'][..., -1] loss = img_loss psnr = mse2psnr(img_loss) if 'rgb0' in extras: img_loss0 = img2mse(extras['rgb0'], target_s) loss = loss + img_loss0 psnr0 = mse2psnr(img_loss0) optimizer.backward(loss / accumulation_steps) if i % accumulation_steps == 0: optimizer.step() ### update learning rate ### decay_rate = 0.1 decay_steps = args.lrate_decay * accumulation_steps * 1000 new_lrate = args.lrate * (decay_rate**(global_step / decay_steps)) for param_group in optimizer.param_groups: param_group['lr'] = new_lrate ################################ dt = time.time() - time0 # Rest is logging if (i + 1) % args.i_weights == 0 and (not jt.mpi or jt.mpi.local_rank() == 0): print(i) path = os.path.join(basedir, expname, '{:06d}.tar'.format(i)) jt.save( { 'global_step': global_step, 'network_fn_state_dict': render_kwargs_train['network_fn'].state_dict(), 'network_fine_state_dict': render_kwargs_train['network_fine'].state_dict(), }, path) print('Saved checkpoints at', path) if i % args.i_video == 0 and i > 0: # Turn on testing mode with jt.no_grad(): rgbs, disps = render_path(render_poses, hwf, args.chunk, render_kwargs_test, intrinsic=intrinsic) if not jt.mpi or jt.mpi.local_rank() == 0: print('Done, saving', rgbs.shape, disps.shape) moviebase = os.path.join( basedir, expname, '{}_spiral_{:06d}_'.format(expname, i)) print('movie base ', moviebase) imageio.mimwrite(moviebase + 'rgb.mp4', to8b(rgbs), fps=30, quality=8) imageio.mimwrite(moviebase + 'disp.mp4', to8b(disps / np.max(disps)), fps=30, quality=8) if i % args.i_print == 0: tqdm.write( f"[TRAIN] Iter: {i} Loss: {loss.item()} PSNR: {psnr.item()}") if i % args.i_img == 0: img_i = np.random.choice(i_val) target = images[img_i] pose = poses[img_i, :3, :4] with jt.no_grad(): rgb, disp, acc, extras = render(H, W, focal, chunk=args.chunk, c2w=pose, intrinsic=intrinsic, **render_kwargs_test) psnr = mse2psnr(img2mse(rgb, target)) rgb = rgb.numpy() disp = disp.numpy() acc = acc.numpy() if not jt.mpi or jt.mpi.local_rank() == 0: writer.add_image('test/rgb', to8b(rgb), global_step, dataformats="HWC") writer.add_image('test/target', target.numpy(), global_step, dataformats="HWC") writer.add_scalar('test/psnr', psnr.item(), global_step) jt.clean_graph() jt.sync_all() jt.gc() if i % args.i_testset == 0 and i > 0: si_test = i_test_tot if i % args.i_tottest == 0 else i_test testsavedir = os.path.join(basedir, expname, 'testset_{:06d}'.format(i)) os.makedirs(testsavedir, exist_ok=True) print('test poses shape', poses[si_test].shape) with jt.no_grad(): rgbs, disps = render_path(jt.array(poses[si_test]), hwf, args.chunk, render_kwargs_test, savedir=testsavedir, intrinsic=intrinsic, expname=expname) jt.gc() global_step += 1
def do_train( cfg, model, data_loader, data_loader_val, optimizer, scheduler, checkpointer, checkpoint_period, test_period, arguments, ): logger = logging.getLogger("detectron.trainer") logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints",) dataset_names = cfg.DATASETS.TEST for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if any(len(target) < 1 for target in targets): logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) continue data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = loss_dict losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe optimizer.step(losses) scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join( [ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ] ).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=1024 / 1024.0 / 1024.0, # TODO CUDA Memory ) ) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) if data_loader_val is not None and test_period > 0 and iteration % test_period == 0: meters_val = MetricLogger(delimiter=" ") _ = inference( # The result can be used for additional logging, e. g. for TensorBoard model, # The method changes the segmentation mask format in a data loader, # so every time a new data loader is created: make_data_loader(cfg, is_train=False, is_distributed=False, is_for_period=True), dataset_name="[Validation]", iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=None, ) model.train() with jt.no_grad(): # Should be one image for each GPU: for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)): loss_dict = model(images_val, targets_val) losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = loss_dict losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters_val.update(loss=losses_reduced, **loss_dict_reduced) logger.info( meters_val.delimiter.join( [ "[Validation]: ", "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ] ).format( eta=eta_string, iter=iteration, meters=str(meters_val), lr=optimizer.param_groups[0]["lr"], memory= 2014 / 1024.0 / 1024.0,# TODO torch.cuda.max_memory_allocated() ) ) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info( "Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter) ) )
def test(name, model_name, bs): print("hello", name, model_name, bs) import numpy as np import time is_train = False _model_name = model_name if model_name.startswith("train_"): is_train = True model_name = model_name[6:] if name == "torch": import torch import torchvision.models as tcmodels from torch import optim from torch import nn torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = True model = tcmodels.__dict__[model_name]() model = model.cuda() else: import jittor as jt from jittor import optim from jittor import nn jt.flags.use_cuda = 1 jt.cudnn.set_algorithm_cache_size(10000) import jittor.models as jtmodels model = jtmodels.__dict__[model_name]() if (model == "resnet152" or model == "resnet101") and bs == 128 and is_train: jt.cudnn.set_max_workspace_ratio(0.05) if is_train: model.train() else: model.eval() img_size = 224 if model_name == "inception_v3": img_size = 300 test_img = np.random.random((bs, 3, img_size, img_size)).astype("float32") if is_train: label = (np.random.random((bs,)) * 1000).astype("int32") if name == "torch": test_img = torch.Tensor(test_img).cuda() if is_train: label = torch.LongTensor(label).cuda() opt = optim.SGD(model.parameters(), 0.001) sync = lambda: torch.cuda.synchronize() jt = torch else: test_img = jt.array(test_img).stop_grad() if is_train: label = jt.array(label).stop_grad() opt = optim.SGD(model.parameters(), 0.001) sync = lambda: jt.sync_all(True) sync() use_profiler = os.environ.get("use_profiler", "0") == "1" if hasattr(jt, "nograd"): ng = jt.no_grad() ng.__enter__() def iter(): x = model(test_img) if isinstance(x, tuple): x = x[0] if is_train: loss = nn.CrossEntropyLoss()(x, label) if name == "jittor": opt.step(loss) else: opt.zero_grad() loss.backward() opt.step() else: x.sync() sync() for i in time_iter(): iter() sync() for i in time_iter(): iter() sync() if use_profiler: if name == "torch": prof = torch.autograd.profiler.profile(use_cuda=True) else: prof = jt.profile_scope() prof.__enter__() if name == "jittor": if hasattr(jt.flags, "use_parallel_op_compiler"): jt.flags.use_parallel_op_compiler = 0 start = time.time() for i in time_iter(10): iter() sync() end = time.time() if use_profiler: prof.__exit__(None,None,None) if name == "torch": print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=30)) total_iter = i+1 print("duration:", end-start, "FPS:", total_iter*bs/(end-start)) fpath = f"{home_path}/.cache/jittor/{name}-{_model_name}-{bs}.txt" with open(fpath, 'w') as f: f.write(f"duration: {end-start} FPS: {total_iter*bs/(end-start)}") os.chmod(fpath, 0x666)
def lincomb_mask_loss(self, pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, inst_data, labels, interpolation_mode='bilinear'): mask_h = proto_data.shape[1] mask_w = proto_data.shape[2] process_gt_bboxes = cfg.mask_proto_normalize_emulate_roi_pooling or cfg.mask_proto_crop if cfg.mask_proto_remove_empty_masks: # Make sure to store a copy of this because we edit it to get rid of all-zero masks pos = pos.clone() loss_m = 0 loss_d = 0 # Coefficient diversity loss maskiou_t_list = [] maskiou_net_input_list = [] label_t_list = [] for idx in range(mask_data.shape[0]): with jt.no_grad(): downsampled_masks = nn.interpolate(masks[idx].unsqueeze(0), (mask_h, mask_w), mode=interpolation_mode, align_corners=False).squeeze(0) downsampled_masks = downsampled_masks.permute(1, 2, 0) if cfg.mask_proto_binarize_downsampled_gt: downsampled_masks = (downsampled_masks>0.5).float() if cfg.mask_proto_remove_empty_masks: # Get rid of gt masks that are so small they get downsampled away very_small_masks = (downsampled_masks.sum(0).sum(0) <= 0.0001) for i in range(very_small_masks.shape[0]): if very_small_masks[i]: pos[idx, idx_t[idx] == i] = 0 if cfg.mask_proto_reweight_mask_loss: # Ensure that the gt is binary if not cfg.mask_proto_binarize_downsampled_gt: bin_gt = (downsampled_masks>0.5).float() else: bin_gt = downsampled_masks gt_foreground_norm = bin_gt / (jt.sum(bin_gt, dim=(0,1), keepdim=True) + 0.0001) gt_background_norm = (1-bin_gt) / (jt.sum(1-bin_gt, dim=(0,1), keepdim=True) + 0.0001) mask_reweighting = gt_foreground_norm * cfg.mask_proto_reweight_coeff + gt_background_norm mask_reweighting *= mask_h * mask_w cur_pos = pos[idx] cur_pos = jt.where(cur_pos)[0] pos_idx_t = idx_t[idx, cur_pos] if process_gt_bboxes: # Note: this is in point-form if cfg.mask_proto_crop_with_pred_box: pos_gt_box_t = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors)[cur_pos] else: pos_gt_box_t = gt_box_t[idx, cur_pos] if pos_idx_t.shape[0] == 0: continue proto_masks = proto_data[idx] proto_coef = mask_data[idx, cur_pos, :] if cfg.use_mask_scoring: mask_scores = score_data[idx, cur_pos, :] if cfg.mask_proto_coeff_diversity_loss: if inst_data is not None: div_coeffs = inst_data[idx, cur_pos, :] else: div_coeffs = proto_coef loss_d += self.coeff_diversity_loss(div_coeffs, pos_idx_t) # If we have over the allowed number of masks, select a random sample old_num_pos = proto_coef.shape[0] if old_num_pos > cfg.masks_to_train: perm = jt.randperm(proto_coef.shape[0]) select = perm[:cfg.masks_to_train] proto_coef = proto_coef[select, :] pos_idx_t = pos_idx_t[select] if process_gt_bboxes: pos_gt_box_t = pos_gt_box_t[select, :] if cfg.use_mask_scoring: mask_scores = mask_scores[select, :] num_pos = proto_coef.shape[0] mask_t = downsampled_masks[:, :, pos_idx_t] label_t = labels[idx][pos_idx_t] # Size: [mask_h, mask_w, num_pos] pred_masks = proto_masks @ proto_coef.transpose(1,0) pred_masks = cfg.mask_proto_mask_activation(pred_masks) if cfg.mask_proto_double_loss: if cfg.mask_proto_mask_activation == activation_func.sigmoid: pre_loss = nn.bce_loss(jt.clamp(pred_masks, 0, 1), mask_t, size_average=False) else: pre_loss = nn.smooth_l1_loss(pred_masks, mask_t, reduction='sum') loss_m += cfg.mask_proto_double_loss_alpha * pre_loss if cfg.mask_proto_crop: pred_masks = crop(pred_masks, pos_gt_box_t) if cfg.mask_proto_mask_activation == activation_func.sigmoid: pre_loss = binary_cross_entropy(jt.clamp(pred_masks, 0, 1), mask_t) else: pre_loss = nn.smooth_l1_loss(pred_masks, mask_t, reduction='none') if cfg.mask_proto_normalize_mask_loss_by_sqrt_area: gt_area = jt.sum(mask_t, dim=(0, 1), keepdims=True) pre_loss = pre_loss / (jt.sqrt(gt_area) + 0.0001) if cfg.mask_proto_reweight_mask_loss: pre_loss = pre_loss * mask_reweighting[:, :, pos_idx_t] if cfg.mask_proto_normalize_emulate_roi_pooling: weight = mask_h * mask_w if cfg.mask_proto_crop else 1 pos_gt_csize = center_size(pos_gt_box_t) gt_box_width = pos_gt_csize[:, 2] * mask_w gt_box_height = pos_gt_csize[:, 3] * mask_h pre_loss = pre_loss.sum(0).sum(0) / gt_box_width / gt_box_height * weight # If the number of masks were limited scale the loss accordingly if old_num_pos > num_pos: pre_loss *= old_num_pos / num_pos loss_m += jt.sum(pre_loss) if cfg.use_maskiou: if cfg.discard_mask_area > 0: gt_mask_area = jt.sum(mask_t, dim=(0, 1)) select = gt_mask_area > cfg.discard_mask_area if jt.sum(select).item() < 1: continue pos_gt_box_t = pos_gt_box_t[select, :] pred_masks = pred_masks[:, :, select] mask_t = mask_t[:, :, select] label_t = label_t[select] maskiou_net_input = pred_masks.permute(2, 0, 1).unsqueeze(1) pred_masks = (pred_masks>0.5).float() maskiou_t = self._mask_iou(pred_masks, mask_t) maskiou_net_input_list.append(maskiou_net_input) maskiou_t_list.append(maskiou_t) label_t_list.append(label_t) losses = {'M': loss_m * cfg.mask_alpha / mask_h / mask_w} if cfg.mask_proto_coeff_diversity_loss: losses['D'] = loss_d if cfg.use_maskiou: # discard_mask_area discarded every mask in the batch, so nothing to do here if len(maskiou_t_list) == 0: return losses, None maskiou_t = jt.contrib.concat(maskiou_t_list) label_t = jt.contrib.concat(label_t_list) maskiou_net_input = jt.contrib.concat(maskiou_net_input_list) num_samples = maskiou_t.shape[0] if cfg.maskious_to_train > 0 and num_samples > cfg.maskious_to_train: perm = jt.randperm(num_samples) select = perm[:cfg.masks_to_train] maskiou_t = maskiou_t[select] label_t = label_t[select] maskiou_net_input = maskiou_net_input[select] return losses, [maskiou_net_input, maskiou_t, label_t] return losses
if args.config is None: model_path = SavePath.from_str(args.trained_model) # TODO: Bad practice? Probably want to do a name lookup instead. args.config = model_path.model_name + '_config' print('Config not specified. Parsed %s from the file name.\n' % args.config) set_cfg(args.config) if args.detect: cfg.eval_mask_branch = False if args.dataset is not None: set_dataset(args.dataset) with jt.no_grad(): if not os.path.exists('results'): os.makedirs('results') if args.resume and not args.display: with open(args.ap_data_file, 'rb') as f: ap_data = pickle.load(f) calc_map(ap_data) exit() if args.image is None and args.video is None and args.images is None: # dataset = COCODetection(cfg.dataset.valid_images, cfg.dataset.valid_info, # transform=BaseTransform(), has_gt=cfg.dataset.has_gt) dataset = EvalCOCODetection(cfg.dataset.valid_images, cfg.dataset.valid_info, transform=BaseTransform(),
def train(self, dataset, num_workers, epochs, batch_sizes, fade_in_percentage, logger, output, num_samples=36, start_depth=0, feedback_factor=100, checkpoint_factor=1): """ Utility method for training the GAN. Note that you don't have to necessarily use this you can use the optimize_generator and optimize_discriminator for your own training routine. :param dataset: object of the dataset used for training. Note that this is not the data loader (we create data loader in this method since the batch_sizes for resolutions can be different) :param num_workers: number of workers for reading the data. def=3 :param epochs: list of number of epochs to train the network for every resolution :param batch_sizes: list of batch_sizes for every resolution :param fade_in_percentage: list of percentages of epochs per resolution used for fading in the new layer not used for first resolution, but dummy value still needed. :param logger: :param output: Output dir for samples,models,and log. :param num_samples: number of samples generated in sample_sheet. def=36 :param start_depth: start training from this depth. def=0 :param feedback_factor: number of logs per epoch. def=100 :param checkpoint_factor: :return: None (Writes multiple files to disk) """ assert self.depth <= len(epochs), "epochs not compatible with depth" assert self.depth <= len( batch_sizes), "batch_sizes not compatible with depth" assert self.depth <= len( fade_in_percentage), "fade_in_percentage not compatible with depth" # turn the generator and discriminator into train mode self.gen.train() self.dis.train() if self.use_ema: self.gen_shadow.train() # create a global time counter global_time = time.time() # create fixed_input for debugging # fixed_input = torch.randn(num_samples, self.latent_size).to(self.device) fixed_input = jt.random([num_samples, self.latent_size], 'float32', 'normal').stop_grad() # config depend on structure logger.info("Starting the training process ... \n") if self.structure == 'fixed': start_depth = self.depth - 1 step = 1 # counter for number of iterations for current_depth in range(start_depth, self.depth): current_res = np.power(2, current_depth + 2) logger.info("Currently working on depth: %d", current_depth + 1) logger.info("Current resolution: %d x %d" % (current_res, current_res)) ticker = 1 # Choose training parameters and configure training ops. # TODO data = get_data_loader(dataset, batch_sizes[current_depth], num_workers) for epoch in range(1, epochs[current_depth] + 1): start = timeit.default_timer( ) # record time at the start of epoch logger.info("Epoch: [%d]" % epoch) # total_batches = len(iter(data)) total_batches = len(data) fade_point = int((fade_in_percentage[current_depth] / 100) * epochs[current_depth] * total_batches) for i, (batch, useless) in enumerate(data, 1): # calculate the alpha for fading in the layers alpha = ticker / fade_point if ticker <= fade_point else 1 # extract current batch of data for training # images = batch.to(self.device) # gan_input = torch.randn(images.shape[0], self.latent_size).to(self.device) images = batch gan_input = jt.random([images.shape[0], self.latent_size], 'float32', 'normal').stop_grad() # optimize the discriminator: dis_loss = self.optimize_discriminator( gan_input, images, current_depth, alpha) # optimize the generator: gen_loss = self.optimize_generator(gan_input, images, current_depth, alpha) # provide a loss feedback if i % int(total_batches / feedback_factor + 1) == 0 or i == 1: elapsed = time.time() - global_time elapsed = str( datetime.timedelta(seconds=elapsed)).split('.')[0] logger.info( "Elapsed: [%s] Step: %d Batch: %d D_Loss: %f G_Loss: %f" % (elapsed, step, i, dis_loss, gen_loss)) # create a grid of samples and save it os.makedirs(os.path.join(output, 'samples'), exist_ok=True) gen_img_file = os.path.join( output, 'samples', "gen_" + str(current_depth) + "_" + str(epoch) + "_" + str(i) + ".png") # with torch.no_grad(): with jt.no_grad(): self.create_grid( samples=self.gen(fixed_input, current_depth, alpha).detach() if not self.use_ema else self.gen_shadow( fixed_input, current_depth, alpha).detach(), scale_factor=int( np.power(2, self.depth - current_depth - 1)) if self.structure == 'linear' else 1, img_file=gen_img_file, ) # increment the alpha ticker and the step ticker += 1 step += 1 elapsed = timeit.default_timer() - start elapsed = str( datetime.timedelta(seconds=elapsed)).split('.')[0] logger.info("Time taken for epoch: %s\n" % elapsed) if epoch % checkpoint_factor == 0 or epoch == 1 or epoch == epochs[ current_depth]: save_dir = os.path.join(output, 'models') os.makedirs(save_dir, exist_ok=True) ''' gen_save_file = os.path.join(save_dir, "GAN_GEN_" + str(current_depth) + "_" + str(epoch) + ".pth") dis_save_file = os.path.join(save_dir, "GAN_DIS_" + str(current_depth) + "_" + str(epoch) + ".pth") gen_optim_save_file = os.path.join( save_dir, "GAN_GEN_OPTIM_" + str(current_depth) + "_" + str(epoch) + ".pth") dis_optim_save_file = os.path.join( save_dir, "GAN_DIS_OPTIM_" + str(current_depth) + "_" + str(epoch) + ".pth") ''' gen_save_file = os.path.join( save_dir, "GAN_GEN_" + str(current_depth) + "_" + str(epoch) + ".pkl") dis_save_file = os.path.join( save_dir, "GAN_DIS_" + str(current_depth) + "_" + str(epoch) + ".pkl") # torch.save(self.gen.state_dict(), gen_save_file) self.gen.save(gen_save_file) logger.info("Saving the model to: %s\n" % gen_save_file) # torch.save(self.dis.state_dict(), dis_save_file) self.dis.save(dis_save_file) # torch.save(self.gen_optim.state_dict(), gen_optim_save_file) # torch.save(self.dis_optim.state_dict(), dis_optim_save_file) # also save the shadow generator if use_ema is True if self.use_ema: # gen_shadow_save_file = os.path.join( # save_dir, "GAN_GEN_SHADOW_" + str(current_depth) + "_" + str(epoch) + ".pth") # torch.save(self.gen_shadow.state_dict(), gen_shadow_save_file) gen_shadow_save_file = os.path.join( save_dir, "GAN_GEN_SHADOW_" + str(current_depth) + "_" + str(epoch) + ".pkl") self.gen_shadow.save(gen_shadow_save_file) logger.info("Saving the model to: %s\n" % gen_shadow_save_file) logger.info('Training completed.\n')
def execute(self, imgs, size=640, augment=False): # Inference from various sources. For height=720, width=1280, RGB images example inputs are: # filename: imgs = 'data/samples/zidane.jpg' # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) # PIL: = Image.open('image.jpg') # HWC x(720,1280,3) # numpy: = np.zeros((720,1280,3)) # HWC # torch: = torch.zeros(16,3,720,1280) # BCHW # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images p = next(self.model.parameters()) # for device and type if isinstance(imgs, jt.Var): # torch return self.model(imgs.cast(p.dtype), augment) # inference # Pre-process n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else ( 1, [imgs]) # number of images, list of images shape0, shape1, files = [], [], [ ] # image and inference shapes, filenames for i, im in enumerate(imgs): if isinstance(im, str): # filename or uri im, f = Image.open( requests.get(im, stream=True).raw if im. startswith('http') else im), im # open im.filename = f # for uri files.append( Path(im.filename).with_suffix('.jpg'). name if isinstance(im, Image.Image) else f'image{i}.jpg') im = np.array(im) # to numpy if im.shape[0] < 5: # image in CHW im = im.transpose( (1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) im = im[:, :, :3] if im.ndim == 3 else np.tile( im[:, :, None], 3) # enforce 3ch input s = im.shape[:2] # HWC shape0.append(s) # image shape g = (size / max(s)) # gain shape1.append([y * g for y in s]) imgs[i] = im # update shape1 = [ make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0) ] # inference shape x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW x = jt.array(x).cast(p.dtype) / 255. # uint8 to fp16/32 # Inference with jt.no_grad(): y = self.model(x, augment)[0] # forward y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS # Post-process for i in range(n): y[i][:, :4] = scale_coords(shape1, y[i][:, :4], shape0[i]) return Detections(imgs, y, files, self.names)
def kaiming_normal_(var, a=0, mode='fan_in', nonlinearity='leaky_relu'): std = calculate_std(var, mode, nonlinearity, a) with jt.no_grad(): return gauss_(var, 0, std)
def test(cfg = None, data = None, weights=None, batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling save_hybrid=False, # for hybrid auto-labelling save_conf=False, # save auto-label confidences plots=True): # Initialize/load model and set device training = model is not None if not training: # called by train.py # called directly set_logging() # Directories save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model model = Model(cfg) model.load(weights) model = model.fuse() imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Configure model.eval() is_coco = data.endswith('coco.yaml') # is COCO dataset with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = jt.linspace(0.5, 0.95, 10) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: img = jt.zeros((1, 3, imgsz, imgsz)) # init img path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True, prefix=colorstr('test: ' if opt.task == 'test' else 'val: ')) seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = jt.zeros((3,)) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.float32() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets nb, _, height, width = img.shape # batch size, channels, height, width with jt.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model(img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= jt.array([width, height, width, height]) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append((jt.zeros((0, niou), dtype="bool"), jt.array([]), jt.array([]), tcls)) continue # Predictions predn = pred.clone() predn[:, :4] = scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = jt.array(shapes[si][0])[jt.array([1, 0, 1, 0])] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(jt.array(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({'image_id': image_id, 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5)}) # Assign all predictions as incorrect correct = jt.zeros((pred.shape[0], niou), dtype="bool") if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox = scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch(predn, jt.contrib.concat((labels[:, 0:1], tbox), 1)) # Per target class for cls in jt.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view(-1) # prediction indices pi = (cls == pred[:, 5]).nonzero().view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious i ,ious = box_iou(predn[pi, :4], tbox[ti]).argmax(1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.numpy(), pred[:, 4].numpy(), pred[:, 5].numpy(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # [email protected], [email protected]:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = np.zeros((1,)) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if (verbose or (nc <= 20 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_json = '../coco/annotations/instances_val2017.json' # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[:2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f'pycocotools unable to run: {e}') # Return results if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.numpy() / len(dataloader)).tolist()), maps, t
def kaiming_uniform_(var, a=0, mode='fan_in', nonlinearity='leaky_relu'): std = calculate_std(var, mode, nonlinearity, a) bound = math.sqrt(3.0) * std with jt.no_grad(): return uniform_(var, -bound, bound)
def draw_style_mixing_figure(png, gen, out_depth, src_seeds, dst_seeds, style_ranges): n_col = len(src_seeds) n_row = len(dst_seeds) w = h = 2**(out_depth + 2) # with torch.no_grad(): with jt.no_grad(): latent_size = gen.g_mapping.latent_size src_latents_np = np.stack([ np.random.RandomState(seed).randn(latent_size, ) for seed in src_seeds ]) dst_latents_np = np.stack([ np.random.RandomState(seed).randn(latent_size, ) for seed in dst_seeds ]) # src_latents = torch.from_numpy(src_latents_np.astype(np.float32)) # dst_latents = torch.from_numpy(dst_latents_np.astype(np.float32)) src_latents = jt.array(src_latents_np.astype(np.float32)) dst_latents = jt.array(dst_latents_np.astype(np.float32)) src_dlatents = gen.g_mapping(src_latents) # [seed, layer, component] dst_dlatents = gen.g_mapping(dst_latents) # [seed, layer, component] src_images = gen.g_synthesis(src_dlatents, depth=out_depth, alpha=1) dst_images = gen.g_synthesis(dst_dlatents, depth=out_depth, alpha=1) # src_dlatents_np = src_dlatents.numpy() # dst_dlatents_np = dst_dlatents.numpy() src_dlatents_np = src_dlatents.data dst_dlatents_np = dst_dlatents.data canvas = Image.new('RGB', (w * (n_col + 1), h * (n_row + 1)), 'white') for col, src_image in enumerate(list(src_images)): src_image = adjust_dynamic_range(src_image) # src_image = src_image.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy() src_image = src_image.multiply(255).clamp(0, 255).permute( 1, 2, 0).data.astype(np.uint8) canvas.paste(Image.fromarray(src_image, 'RGB'), ((col + 1) * w, 0)) for row, dst_image in enumerate(list(dst_images)): dst_image = adjust_dynamic_range(dst_image) # dst_image = dst_image.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy() dst_image = dst_image.multiply(255).clamp(0, 255).permute( 1, 2, 0).data.astype(np.uint8) canvas.paste(Image.fromarray(dst_image, 'RGB'), (0, (row + 1) * h)) row_dlatents = np.stack([dst_dlatents_np[row]] * n_col) row_dlatents[:, style_ranges[row]] = src_dlatents_np[:, style_ranges[row]] # row_dlatents = torch.from_numpy(row_dlatents) row_dlatents = jt.array(row_dlatents) row_images = gen.g_synthesis(row_dlatents, depth=out_depth, alpha=1) for col, image in enumerate(list(row_images)): image = adjust_dynamic_range(image) # image = image.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy() image = image.multiply(255).clamp(0, 255).permute( 1, 2, 0).data.astype(np.uint8) canvas.paste(Image.fromarray(image, 'RGB'), ((col + 1) * w, (row + 1) * h)) canvas.save(png)
def transform_frame(frames): with jt.no_grad(): frames = [jt.array(frame).float() for frame in frames] return frames, transform(jt.stack(frames, 0))
def test(model, dataset='cocoVal', logger=print, benchmark=False): if dataset == 'OCHumanVal': ImageRoot = './data/OCHuman/images' AnnoFile = './data/OCHuman/annotations/ochuman_coco_format_val_range_0.00_1.00.json' elif dataset == 'OCHumanTest': ImageRoot = './data/OCHuman/images' AnnoFile = './data/OCHuman/annotations/ochuman_coco_format_test_range_0.00_1.00.json' elif dataset == 'cocoVal': ImageRoot = './data/coco2017/val2017' AnnoFile = './data/coco2017/annotations/person_keypoints_val2017_pose2seg.json' datainfos = COCOTEST(ImageRoot, AnnoFile, onlyperson=True, loadimg=True, is_test=True) datainfos.batch_size = 1 datainfos.num_workers = 1 datainfos.collate_batch = collate_batch data_len = len(datainfos) #data_len = 1 model.eval() results_segm = [] imgIds = [] start_time = time.time() outputs = [] # jt.profiler.start(0, 0) # for i in tqdm(range(data_len)): for i, batch in tqdm(enumerate(datainfos)): #datainfos.display_worker_status() #if i>100:break # rawdata = datainfos[i] rawdata = batch[0] img = rawdata['data'] image_id = rawdata['id'] # height, width = img.shape[0:2] # gt_kpts = np.float32(rawdata['gt_keypoints']).transpose(0, 2, 1) # (N, 17, 3) # gt_segms = rawdata['segms'] # gt_masks = np.array([annToMask(segm, height, width) for segm in gt_segms]) gt_kpts = rawdata['gt_kpts'] gt_masks = rawdata['gt_masks'] with jt.no_grad(): output = model([img], [gt_kpts], [gt_masks], rawdata['test_input']) imgIds.append(image_id) #jt.display_memory_info() if benchmark: continue #outputs.append(output) for mask in output[0]: #print(np.sum(mask)) maskencode = maskUtils.encode(np.asfortranarray(mask)) maskencode['counts'] = maskencode['counts'].decode('ascii') results_segm.append({ "image_id": image_id, "category_id": 1, "score": 1.0, "segmentation": maskencode }) jt.sync_all(True) # jt.profiler.stop() # jt.profiler.report() ''' for output,image_id in zip(outputs,imgIds): for mask in output[0]: #print(np.sum(mask)) maskencode = maskUtils.encode(np.asfortranarray(mask)) maskencode['counts'] = maskencode['counts'].decode('ascii') results_segm.append({ "image_id": image_id, "category_id": 1, "score": 1.0, "segmentation": maskencode }) ''' # print(len(results_segm)) end_time = time.time() print('fps', data_len / (end_time - start_time)) if benchmark: return def do_eval_coco(image_ids, coco, results, flag): from pycocotools.cocoeval import COCOeval assert flag in ['bbox', 'segm', 'keypoints'] # Evaluate coco_results = coco.loadRes(results) cocoEval = COCOeval(coco, coco_results, flag) cocoEval.params.imgIds = image_ids cocoEval.params.catIds = [1] cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() return cocoEval cocoEval = do_eval_coco(imgIds, datainfos.COCO, results_segm, 'segm') logger('[POSE2SEG] AP|.5|.75| S| M| L| AR|.5|.75| S| M| L|') _str = '[segm_score] %s ' % dataset for value in cocoEval.stats.tolist(): _str += '%.3f ' % value logger(_str)