def save_result(self, output, batch, results): reg = output['reg'] if self.opt.reg_offset else None dets = ctdet_decode( output['hm'], output['wh'], reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets_out = ctdet_post_process( dets.copy(), batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1]) results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
def process(self, images, return_time=False): with torch.no_grad(): output = self.model(images) # print('real output: {}'.format(output)) # print(len(output)) hm = output[0].sigmoid_() wh = output[1] reg = output[2] torch.cuda.synchronize() dets = ctdet_decode(hm, wh, reg=reg, K=self.K) return output, dets
def debug(self, batch, output, iter_id): opt = self.opt ###是否进行坐标offset reg reg = output['reg'] if opt.reg_offset else None ###将网络输出的hms经过decode得到detections: [bboxes, scores, clses] dets = ctdet_decode( output['hm'], output['wh'], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) ####创建一个没有梯度的变量dets,shape为(1,batch*k,6) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) ####对预测坐标进行变换---->下采样, down_ratio默认值为4 dets[:, :, :4] *= opt.down_ratio ####把dets_gt的shape变为(1,batch*k, 6) ####dets_gt为gt_bbox的位置信息,shape为(1,batch*k,6) dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) ####对gt坐标进行变换---->下采样, down_ratio默认值为4 dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger( dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme) ###将输入图片转化为cpu上的没有梯度的张量img img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) ###对输入图像进行标准化处理:乘上标准差再加上均值 img = np.clip((( img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) ####gen_colormap又是什么玩意??? ####output----->pred, batch------>gt pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) ####add_blend_img是用来干嘛??? debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') ###此时len(dets[i]==dets[0])==batch*k, for k in range(len(dets[i])): ###即某个score>thresh if dets[i, k, 4] > opt.center_thresh: ####在图像上画出检测框,坐标,score和cls debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') ###len(dets_gt[i])为batch*k for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: ####画出gt_bbox debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None dets = ctdet_decode(output['hm'], output['wh'], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) if batch['input'][i].detach().cpu().numpy().shape[0] == 6: left_img = batch['input'][i].detach().cpu().numpy( )[:3, :, :].transpose(1, 2, 0) right_img = batch['input'][i].detach().cpu().numpy()[ 3:, :, :].transpose(1, 2, 0) img = np.concatenate((left_img, right_img), axis=1) else: img = batch['input'][i].detach().cpu().numpy().transpose( 1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def store_metric_coco(self, imgId, batch, output, opt, is_baseline=False): dets = ctdet_decode(output['hm'], output['wh'], reg=output['reg'], cat_spec_wh=opt.cat_spec_wh, K=opt.K) predictions = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) predictions[:, :, :4] *= opt.down_ratio * opt.downsample dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt = copy.deepcopy(dets_gt) dets_gt[:, :, :4] *= opt.down_ratio * opt.downsample self.add_det_to_coco(imgId, predictions[0], is_baseline=is_baseline) self.add_det_to_coco(imgId, dets_gt[0], is_gt=True)
def process(self, images, return_time=False): with torch.no_grad(): output = self.model(images) # print('real output: {}'.format(output)) # print(len(output)) hm = output[0].sigmoid_() # we want do maxpool inside hm, and do topk here # so there will only be indcies, hms out, we have to fix K value to 100 here wh = output[1] reg = output[2] torch.cuda.synchronize() dets = ctdet_decode(hm, wh, reg=reg, K=self.K) return output, dets
def process(self, images, gt=None, return_time=False): with torch.no_grad(): outputs = self.model(images)[-1] dets = [] for output in outputs: if self.opt.dataset == 'dota': output['a'].sigmoid_() if self.opt.a_method == 1: output['a'] = 2. * output['a'] - 1. if self.opt.replace_hm and self.opt.replace_wh: replace_ht_wh(gt, output, images.shape[2:]) elif self.opt.replace_hm and self.opt.replace_a: replace_ht_a(gt, output, images.shape[2:]) elif self.opt.replace_hm: replace_ht(gt, output, images.shape[2:]) else: output['hm'].sigmoid_() hm = output['hm'] # hm = _sigmoid(output['hm']) reg = output['reg'] if self.opt.reg_offset else None wh = output['wh'] if self.opt.dataset == 'dota': a = output['a'] if self.opt.flip_test: hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2 wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2 reg = reg[0:1] if reg is not None else None a = a[0:1] if a is not None else None # a = (a[0:1] - flip_tensor(a[1:2])) / 2. if a is not None else None torch.cuda.synchronize() forward_time = time.time() if self.opt.debug: pred_hm = output['hm'].cpu().numpy() gt_hm = hm.cpu().numpy() pred_wh = output['wh'].cpu().numpy() gt_wh = wh.cpu().numpy() det = ctdet_decode( hm, wh, reg=reg, a=a, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K, a_method=self.opt.a_method, debug=self.opt.debug ) dets.append(det) if return_time: return outputs, dets, forward_time else: return outputs, dets
def batch_accuracy(self, output, batch): reg = output['reg'] if self.opt.reg_offset else None dets = ctdet_decode(output['hm'], output['wh'], reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=1) dets = dets.detach().cpu().numpy().reshape(-1, dets.shape[2]) ious = [] for p, g in zip(dets, batch['meta']['gt_det']): i = iou(p[0:4], g[0][0:4]) ious.append(i) ious = np.array(ious) #print(ious) print("BATCH ACC: ", ious[ious > 0.5].sum() * 1.0 / len(dets))
def save_result(self, output, batch, results): reg = output['reg'] if self.opt.reg_offset else None dets = ctdet_decode( output['hm'], output['wh'], reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets_out = ctdet_post_process( dets.copy(), batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1]) results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0] if self.opt.dataset == 'bdd' or self.opt.dataset == 'bddstream': if 'map_img_id' not in results: results['map_img_id'] = {} results['map_img_id'][batch['meta']['img_id'].cpu().numpy()[0]] = batch['meta']['file_name']
def get_score(self, batch, output, u, is_baseline=False, save_class_score=False): dets = ctdet_decode(output['hm'], output['wh'], reg=output['reg'], cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) predictions = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) predictions[:, :, :4] *= self.opt.down_ratio * self.opt.downsample dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt = copy.deepcopy(dets_gt) dets_gt[:, :, :4] *= self.opt.down_ratio * self.opt.downsample # debatch predictions = predictions[0] dets_gt = dets_gt[0] thresholds = [0.5, 0.75] aps = np.zeros((len(thresholds), self.opt.num_classes)) num_gts = len(dets_gt) image_gt_checked = np.zeros((num_gts, len(thresholds))) filt_pred = predictions[predictions[:, 4] >= self.center_thresh] # if is_baseline: # make instance into a 3 class instance for bdd relabeled gt # d_map = get_remap(coco_class_groups) # pred = filt_pred[:, 5].astype(np.int64) # filt_pred = filt_pred[np.isin(pred, np.array(list(d_map.keys())))] # if len(filt_pred) != 0: # filt_pred[:, 5] = np.vectorize(d_map.get)(filt_pred[:, 5]) filt = np.zeros(self.opt.num_classes) for i in range(self.opt.num_classes): filt_pred_class = filt_pred[filt_pred[:, 5] == i] gt_class = dets_gt[dets_gt[:, 5] == i] filt[i] = (len(gt_class) > 0) recalls, precisions, ap = self.cat_ap(filt_pred_class, gt_class, thresholds) aps[:, i] = ap aps *= 100.0 nonzero_gt = aps[0][filt != 0] mean_aps = np.mean(nonzero_gt, axis=0) if save_class_score: return mean_aps, aps return mean_aps
def forward(self, x): x = (x / (torch.ones(x.shape, dtype=x.dtype, device=x.device) * 255) - self.mean) / self.std out = self.model(x) hm = torch.sigmoid(out[0]) wh = out[1] reg = wh[:, 2:, :, :] wh = wh[:, :2, :, :] dets = ctdet_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K, trt=True) return dets
def predict(self, images): images = images.to(self.device) with torch.no_grad(): torch.cuda.synchronize() s1 = time.time() output = self.model(images)[-1] torch.cuda.synchronize() s2 = time.time() for k, v in output.items(): print("output:", k, v.size()) print("inference time:", s2 - s1) hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if "reg" in output else None dets = ctdet_decode(hm, wh, reg=reg, K=self.K) torch.cuda.synchronize() return output, dets
def process(self, images, return_time=False): with torch.no_grad(): output = self.model(images)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if self.opt.reg_offset else None if self.opt.flip_test: hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2 wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2 reg = reg[0:1] if reg is not None else None torch.cuda.synchronize() forward_time = time.time() dets = ctdet_decode(hm, wh, reg=reg, K=self.opt.K) if return_time: return output, dets, forward_time else: return output, dets
def process(self, images, return_time=False): """ :param images: images tensor :param return_time: whether return forward time :return: if flip_test, dim0 = 2 output = { 'hm': [1, 20, 88, 160] (B, C, out_h, out_w) 'wh': [1, 2, 88, 160] 'reg': [1, 2, 88, 160] } dets: [B,K,6] """ with torch.no_grad(): # forward, define in lib.models.networks output = self.model(images)[-1] # forward return [ret] # print(type(output)) # dict # for k, v in output.items(): # print(k, v.size()) hm = output['hm'].sigmoid_() # in-place sigmoid, activation fn wh = output['wh'] reg = output[ 'reg'] if self.opt.reg_offset else None # center offset if self.opt.flip_test: # get mean hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2 # dim0=2 wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2 reg = reg[0:1] if reg is not None else None # not like wh torch.cuda.synchronize() forward_time = time.time() # [B,K,6] box,score,cls dets = ctdet_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) # print(dets) if return_time: return output, dets, forward_time else: return output, dets
def process(self, images, return_time=False): with torch.no_grad(): output = self.model(images)[-1] hm_act = output['hm_act_f'].sigmoid_() reg_act = None wh_act = output['wh_act'] if self.opt.flip_test: hm_act = (hm_act[0:1] + flip_tensor(hm_act[1:2])) / 2 wh_act = (wh_act[0:1] + flip_tensor(wh_act[1:2])) / 2 torch.cuda.synchronize() forward_time = time.time() dets_act = ctdet_decode(hm_act, wh_act, reg_act=reg_act, K=self.opt.K) if return_time: return output, dets_act, forward_time else: return output, dets_act
def process(self, images, return_time=False): with torch.no_grad(): print('=========== run process ========') print("input:",images.shape) output = self.model(images)[-1] print('output:',output.keys()) hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if self.opt.reg_offset else None if self.opt.flip_test: hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2 wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2 reg = reg[0:1] if reg is not None else None torch.cuda.synchronize() forward_time = time.time() dets = ctdet_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) if return_time: return output, dets, forward_time else: return output, dets
def save_result(self, output, batch, results): reg = output['reg'] if self.opt.reg_offset else None dets = ctdet_decode(output['hm'], output['wh'], reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=1) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets_out = ctdet_post_process(dets.copy(), batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1]) # batch * class * top_K -> dets_out[0][1][0] # results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0][1][0] #print(dets[0]) results.append({ 'predict': dets[0][0][0:4], 'gt_bbox': batch['meta']['gt_det'][0][0][0:4].tolist() })
def process(self, images, return_time=False): with torch.no_grad(): output = self.model(images)[-1] hm = output['hm'] wh = output['wh'] reg = output['reg'] if self.opt.reg_offset else None if len(self.opt.gpus) > 0: torch.cuda.synchronize() forward_time = time.time() dets = ctdet_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) if return_time: return output, dets, forward_time else: return output, dets
def save_result(self, output, batch, results): opt = self.opt if opt.task == 'ctdet': reg = output['reg'] if self.opt.reg_offset else None dets = ctdet_decode(output['hm'], output['wh'], reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets_out = ctdet_post_process(dets.copy(), batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1]) results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0] elif opt.task == 'multi_pose': reg = output['reg'] if self.opt.reg_offset else None hm_hp = output['hm_hp'] if self.opt.hm_hp else None hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None dets = multi_pose_decode(output['hm'], output['wh'], output['hps'], reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets_out = multi_pose_post_process( dets.copy(), batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3]) results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0] else: assert 0, 'task not defined!'
def process(self, images, return_time=False): with torch.no_grad(): if False: output = self.model(images)[-1] hm = output['hm'].sigmoid_() wh = output['wh'] reg = output['reg'] if self.opt.reg_offset else None if self.opt.flip_test: hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2 wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2 reg = reg[0:1] if reg is not None else None torch.cuda.synchronize() forward_time = time.time() dets = ctdet_decode(hm, wh, reg=reg, cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) else: hm, wh, reg = self.model(images) torch.onnx.export(self.model, images, "ctdet-resdcn18.onnx", opset_version=9, verbose=False, output_names=["hm", "wh", "reg"]) quit() if return_time: return output, dets, forward_time else: return output, dets
def process(self, images, return_time=False): """ Apply detection to input images. :param images: input images with "NCHW" format. :param return_time: if True, return processing time. :return: """ inputs = nn.Variable.from_numpy_array(images) outputs = self.model(inputs) hm = outputs[0] hm = F.sigmoid(hm) wh = outputs[1] reg = outputs[2] if self.opt.channel_last: hm = F.transpose(hm, (0, 3, 1, 2)) wh = F.transpose(wh, (0, 3, 1, 2)) reg = F.transpose(reg, (0, 3, 1, 2)) forward_time = time.time() dets = ctdet_decode(hm, wh, reg=reg, K=self.opt.K) if return_time: return outputs, dets, forward_time else: return outputs, dets
def train(self, cfg): # 设置gpu环境,考虑单卡多卡情况 gpus_str = '' if isinstance(cfg.gpus, (list, tuple)): cfg.gpus = [int(i) for i in cfg.gpus] for s in cfg.gpus: gpus_str += str(s) + ',' gpus_str = gpus_str[:-1] else: gpus_str = str(int(cfg.gpus)) cfg.gpus = [int(cfg.gpus)] os.environ['CUDA_VISIBLE_DEVICES'] = gpus_str cfg.gpus = [i for i in range(len(cfg.gpus)) ] if cfg.gpus[0] >= 0 else [-1] # 设置log model_dir = os.path.join(cfg.save_dir, cfg.id) debug_dir = os.path.join(model_dir, 'debug') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(debug_dir): os.makedirs(debug_dir) logger = setup_logger(cfg.id, os.path.join(model_dir, 'log')) if USE_TENSORBOARD: writer = tensorboardX.SummaryWriter( log_dir=os.path.join(model_dir, 'log')) logger.info(cfg) gpus = cfg.gpus device = torch.device('cpu' if gpus[0] < 0 else 'cuda') lr = cfg.lr lr_step = cfg.lr_step num_epochs = cfg.num_epochs val_step = cfg.val_step sample_size = cfg.sample_size # 设置数据集 dataset = YOLO(cfg.data_dir, cfg.hflip, cfg.vflip, cfg.rotation, cfg.scale, cfg.shear, opt=cfg, split='train') names = dataset.class_name std = dataset.std mean = dataset.mean # 用数据集类别数设置预测网络 cfg.setup_head(dataset) trainloader = DataLoader(dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.num_workers, pin_memory=True, drop_last=True) # val_dataset = YOLO(cfg.data_dir, cfg.hflip, cfg.vflip, cfg.rotation, cfg.scale, cfg.shear, opt=cfg, split='val') # valloader = DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=1, pin_memory=True) valid_file = cfg.val_dir if not cfg.val_dir == '' else os.path.join( cfg.data_dir, 'valid.txt') with open(valid_file, 'r') as f: val_list = [l.rstrip() for l in f.readlines()] net = create_model(cfg.arch, cfg.heads, cfg.head_conv, cfg.down_ratio, cfg.filters) optimizer = optim.Adam(net.parameters(), lr=lr) start_epoch = 0 if cfg.resume: pretrain = os.path.join(model_dir, 'model_last.pth') if os.path.exists(pretrain): print('resume model from %s' % pretrain) try: net, optimizer, start_epoch = load_model( net, pretrain, optimizer, True, lr, lr_step) except: print('\t... loading model error: ckpt may not compatible') model = ModleWithLoss(net, CtdetLoss(cfg)) if len(gpus) > 1: model = nn.DataParallel(model, device_ids=gpus).to(device) else: model = model.to(device) step = 0 best = 1e10 log_loss_stats = ['loss', 'hm_loss', 'wh_loss'] if cfg.reg_offset: log_loss_stats += ['off_loss'] if cfg.reg_obj: log_loss_stats += ['obj_loss'] for epoch in range(start_epoch + 1, num_epochs + 1): avg_loss_stats = {l: AverageMeter() for l in log_loss_stats} model.train() with tqdm(trainloader) as loader: for _, batch in enumerate(loader): for k in batch: if k != 'meta': batch[k] = batch[k].to(device=device, non_blocking=True) output, loss, loss_stats = model(batch) loss = loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() # 设置tqdm显示信息 lr = optimizer.param_groups[0]['lr'] poststr = '' for l in avg_loss_stats: avg_loss_stats[l].update(loss_stats[l].mean().item(), batch['input'].size(0)) poststr += '{}: {:.4f}; '.format( l, avg_loss_stats[l].avg) loader.set_description('Epoch %d' % (epoch)) poststr += 'lr: {:.4f}'.format(lr) loader.set_postfix_str(poststr) step += 1 # self.lossSignal.emit(loss.item(), step) del output, loss, loss_stats # valid if step % val_step == 0: if len(cfg.gpus) > 1: val_model = model.module else: val_model = model val_model.eval() torch.cuda.empty_cache() # 随机采样 idx = np.arange(len(val_list)) idx = np.random.permutation(idx)[:sample_size] for j, id in enumerate(idx): image = cv2.imread(val_list[id]) image = self.preprocess(image, cfg.input_h, cfg.input_w, mean, std) image = image.to(device) with torch.no_grad(): output = val_model.model(image)[-1] # 画图并保存 debugger = Debugger(dataset=names, down_ratio=cfg.down_ratio) reg = output['reg'] if cfg.reg_offset else None obj = output['obj'] if cfg.reg_obj else None dets = ctdet_decode(output['hm'].sigmoid_(), output['wh'], reg=reg, obj=obj, cat_spec_wh=cfg.cat_spec_wh, K=cfg.K) dets = dets.detach().cpu().numpy().reshape( -1, dets.shape[2]) dets[:, :4] *= cfg.down_ratio image = image[0].detach().cpu().numpy().transpose( 1, 2, 0) image = np.clip(((image * std + mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][0].detach().cpu().numpy()) debugger.add_blend_img(image, pred, 'pred_hm') debugger.add_img(image, img_id='out_pred') for k in range(len(dets)): if dets[k, 4] > cfg.vis_thresh: debugger.add_coco_bbox(dets[k, :4], dets[k, -1], dets[k, 4], img_id='out_pred') debugger.save_all_imgs(debug_dir, prefix='{}.{}_'.format( step, j)) del output, image, dets # 保存模型参数 save_model(os.path.join(model_dir, 'model_best.pth'), epoch, net) model.train() logstr = 'epoch {}'.format(epoch) for k, v in avg_loss_stats.items(): logstr += ' {}: {:.4f};'.format(k, v.avg) if USE_TENSORBOARD: writer.add_scalar('train_{}'.format(k), v.avg, epoch) logger.info(logstr) # if epoch % val_step == 0: # if len(cfg.gpus) > 1: # val_model = model.module # else: # val_model = model # val_model.eval() # torch.cuda.empty_cache() # # val_loss_stats = {l: AverageMeter() for l in log_loss_stats} # # with tqdm(valloader) as loader: # for j, batch in enumerate(loader): # for k in batch: # if k != 'meta': # batch[k] = batch[k].to(device=device, non_blocking=True) # with torch.no_grad(): # output, loss, loss_stats = val_model(batch) # # poststr = '' # for l in val_loss_stats: # val_loss_stats[l].update( # loss_stats[l].mean().item(), batch['input'].size(0)) # poststr += '{}: {:.4f}; '.format(l, val_loss_stats[l].avg) # loader.set_description('Epoch %d valid' % (epoch)) # poststr += 'lr: {:.4f}'.format(lr) # loader.set_postfix_str(poststr) # # if j < sample_size: # # 将预测结果画出来保存成jpg图片 # debugger = Debugger(dataset=names, down_ratio=cfg.down_ratio) # reg = output['reg'] if cfg.reg_offset else None # obj = output['obj'] if cfg.reg_obj else None # dets = ctdet_decode( # output['hm'], output['wh'], reg=reg, obj=obj, # cat_spec_wh=cfg.cat_spec_wh, K=cfg.K) # dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) # dets[:, :, :4] *= cfg.down_ratio # dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) # dets_gt[:, :, :4] *= cfg.down_ratio # for i in range(1): # img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) # img = np.clip(((img * std + mean) * 255.), 0, 255).astype(np.uint8) # pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) # gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) # debugger.add_blend_img(img, pred, 'pred_hm') # debugger.add_blend_img(img, gt, 'gt_hm') # debugger.add_img(img, img_id='out_pred') # for k in range(len(dets[i])): # if dets[i, k, 4] > cfg.vis_thresh: # debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], # dets[i, k, 4], img_id='out_pred') # # debugger.add_img(img, img_id='out_gt') # for k in range(len(dets_gt[i])): # if dets_gt[i, k, 4] > cfg.vis_thresh: # debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], # dets_gt[i, k, 4], img_id='out_gt') # # debugger.save_all_imgs(debug_dir, prefix='{}.{}_'.format(epoch, j)) # del output, loss, loss_stats # model.train() # logstr = 'epoch {} valid'.format(epoch) # for k, v in val_loss_stats.items(): # logstr += ' {}: {:.4f};'.format(k, v.avg) # if USE_TENSORBOARD: # writer.add_scalar('val_{}'.format(k), v.avg, epoch) # logger.info(logstr) # if val_loss_stats['loss'].avg < best: # best = val_loss_stats['loss'].avg # save_model(os.path.join(model_dir, 'model_best.pth'), epoch, net) save_model(os.path.join(model_dir, 'model_last.pth'), epoch, net, optimizer) if epoch in cfg.lr_step: save_model( os.path.join(model_dir, 'model_{}.pth'.format(epoch)), epoch, net, optimizer) lr = cfg.lr * (0.1**(cfg.lr_step.index(epoch) + 1)) logger.info('Drop LR to {}'.format(lr)) for param_group in optimizer.param_groups: param_group['lr'] = lr
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None dets = ctdet_decode(output['hm'], output['wh'], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.down_ratio if opt.task == 'ctdet_semseg': seg_gt = batch['seg'][0][0].cpu().numpy() seg_pred = output['seg'].max(1)[1].squeeze_(1).squeeze_( 0).cpu().numpy() for i in range(1): debugger = Debugger(opt, dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.vis_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.vis_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.save_video and opt.debug <= 1: # only save the predicted and gt images return debugger.imgs['out_pred'], debugger.imgs[ 'out_gt'] # , debugger.imgs['pred_hm'], debugger.imgs['gt_hm'] pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') if opt.task == 'ctdet_semseg': debugger.visualize_masks(seg_gt, img_id='out_mask_gt') debugger.visualize_masks(seg_pred, img_id='out_mask_pred') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix=iter_id) import pdb pdb.set_trace() if opt.save_video: return debugger.imgs['out_pred'], debugger.imgs['out_gt']
def debug(self, batch, output, iter_id): opt = self.opt if opt.task == 'ctdet': reg = output['reg'] if opt.reg_offset else None dets = ctdet_decode(output['hm'], output['wh'], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape( 1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose( 1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap( batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True) elif opt.task == 'multi_pose': reg = output['reg'] if opt.reg_offset else None hm_hp = output['hm_hp'] if opt.hm_hp else None hp_offset = output['hp_offset'] if opt.reg_hp_offset else None dets = multi_pose_decode(output['hm'], output['wh'], output['hps'], reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.input_res / opt.output_res dets[:, :, 5:39] *= opt.input_res / opt.output_res dets_gt = batch['meta']['gt_det'].numpy().reshape( 1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.input_res / opt.output_res dets_gt[:, :, 5:39] *= opt.input_res / opt.output_res for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose( 1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap( batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') # out_pred_id = 'out_pred {}'.format(iter_id) out_pred_id = 'out_pred' debugger.add_img(img, img_id=out_pred_id) for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id=out_pred_id) debugger.add_coco_hp(dets[i, k, 5:39], img_id=out_pred_id) # out_gt_id = 'out_gt {}'.format(iter_id) out_gt_id = 'out_gt' debugger.add_img(img, img_id=out_gt_id) for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id=out_gt_id) debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id=out_gt_id) if opt.hm_hp: pred = debugger.gen_colormap_hp( output['hm_hp'][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') debugger.add_blend_img(img, gt, 'gt_hmhp') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True) else: assert 0, 'task not defined!'
def process(self, images, return_time=False): with torch.no_grad(): output = self.model(images)[-1] if self.opt.mdn: BS = output['mdn_logits'].shape[0] M = self.opt.mdn_n_comps H, W = output['mdn_logits'].shape[-2:] K = self.opt.num_classes if self.opt.cat_spec_wh else 1 mdn_logits = output['mdn_logits'] mdn_logits = mdn_logits.reshape((BS, M, K, H, W)).permute( (2, 0, 1, 3, 4)) mdn_pi = torch.clamp( torch.nn.Softmax(dim=2)(mdn_logits), 1e-4, 1. - 1e-4) mdn_sigma = torch.clamp( torch.nn.ELU()(output['mdn_sigma']) + self.opt.mdn_min_sigma, 1e-4, 1e5) mdn_sigma = mdn_sigma.reshape((BS, M, 2, K, H, W)).permute( (3, 0, 1, 2, 4, 5)) #mdn_sigma = mdn_sigma.reshape((BS,M*2,K,H,W)).permute((2,0,1,3,4)) mdn_mu = output['wh'] mdn_mu = mdn_mu.reshape((BS, M, 2, K, H, W)).permute( (3, 0, 1, 2, 4, 5)) mdn_mu = mdn_mu.reshape((K * BS, M, 2, H, W)) mdn_sigma = mdn_sigma.reshape((K * BS, M, 2, H, W)) mdn_pi = mdn_pi.reshape((K * BS, M, H, W)) if self.opt.mdn_limit_comp is not None: cid = self.opt.mdn_limit_comp mdn_pi = mdn_pi[:, cid:cid + 1] mdn_sigma = mdn_sigma[:, cid:cid + 1] mdn_mu = mdn_mu[:, cid:cid + 1] M = 1 #print('mdn_mu.shape',mdn_mu.shape,'mdn_sigma.shape',mdn_sigma.shape,'mdn_pi.shape',mdn_pi.shape) C = 2 if self.opt.mdn_48: central = mdn_pi * torch.reciprocal( mdn_sigma[:, :, 0, :, :])**C * torch.reciprocal( mdn_sigma[:, :, 1, :, :])**C pi_max, pi_max_idx = torch.max(central, 1) else: pi_max, pi_max_idx = torch.max(mdn_pi, 1) if self.opt.mdn_max or self.opt.mdn_48: a = pi_max_idx.unsqueeze(1).repeat(1, C, 1, 1).reshape( BS * K, 1, C, H, W) wh = torch.gather(mdn_mu, 1, a).squeeze(1) a = pi_max_idx.unsqueeze(1).repeat(1, 2, 1, 1).reshape( BS * K, 1, 2, H, W) sigmas = torch.gather(mdn_sigma, 1, a).squeeze(1) elif self.opt.mdn_sum: wh = torch.sum(mdn_mu * mdn_pi.unsqueeze(2), 1) sigmas = torch.sum(mdn_sigma * mdn_pi.unsqueeze(2), 1) wh = wh.reshape((K, BS, 2, H, W)).permute( (1, 0, 2, 3, 4)).reshape((BS, 2 * K, H, W)) mdn_sigma = sigmas.reshape((K, BS, 2, H, W)).permute( (1, 0, 2, 3, 4)).reshape((BS, 2 * K, H, W)) mdn_pi = mdn_pi.reshape((K, BS, -1, H, W)).permute( (1, 0, 2, 3, 4)).reshape((BS, -1, H, W)) output.update({'wh': wh}) #if self.opt.debug == 4: output.update({ 'mdn_max_idx': pi_max_idx.unsqueeze(1), 'mdn_sigmas': mdn_sigma, 'mdn_max_pi': pi_max.unsqueeze(1) }) hm = output['hm'].sigmoid_() wh = output['wh'] # print('wh.shape',wh.shape,'hm.shape',hm.shape ) # wh.shape torch.Size([1, 160, <H>,<W>]) cswh # wh.shape torch.Size([1, 2, <H>,<W>]) reg = output['reg'] if self.opt.reg_offset else None if self.opt.flip_test: # if self.opts.mdn: # raise NotImplementedError hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2 wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2 reg = reg[0:1] if reg is not None else None if 'mdn_sigmas' in output: mdn_sigmas = output['mdn_sigmas'] output['mdn_sigmas'] = (mdn_sigmas[0:1] + flip_tensor(mdn_sigmas[1:2])) / 2 torch.cuda.synchronize() forward_time = time.time() dets = ctdet_decode(hm, wh, reg=reg, K=self.opt.K, cat_spec_wh=self.opt.cat_spec_wh, mdn_max_idx=output.get('mdn_max_idx'), mdn_max_pi=output.get('mdn_max_pi'), mdn_sigmas=output.get('mdn_sigmas')) if return_time: return output, dets, forward_time else: return output, dets
def debug(self, batch, output, iter_id): opt = self.opt # reg = output['reg'] if opt.reg_offset else None reg = output['reg'][0:1] if opt.reg_offset else None # dets = ctdet_decode( # output['hm'], output['wh'], reg=reg, # cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = ctdet_decode(output['hm'][0:1], output['wh'][0:1], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio # FIXME: change from tensor to list and then reshape # dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) # batch['meta_gt_det'] = [128, 128, 6] gt_det = batch['meta_gt_det'][0:1] gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) dets_gt = gt_det.reshape(1, -1, dets.shape[2]) # print(batch['meta_img_id'][0:1]) dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) elif opt.debug == 5: debugger.show_all_imgs(pause=opt.pause, logger=self.logger, step=iter_id) else: debugger.show_all_imgs(pause=opt.pause, step=iter_id)