def demo_image(image, model, opt): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] # 'hm': (1, 16, 64, 64), 'depth': (1, 16, 64, 64) preds, amb_idx = get_preds(out['hm'].detach().cpu().numpy()) pred = preds[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d, ignore_idx = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy(), amb_idx) pred_3d = pred_3d[0] ignore_idx = ignore_idx[0] debugger = Debugger() debugger.add_img(image) # デバッガークラスに画像をコピー debugger.add_point_2d(pred, (255, 0, 0)) debugger.add_point_3d(pred_3d, 'b', ignore_idx=ignore_idx) debugger.show_all_imgs(pause=False) debugger.show_3d() print("Done")
def demo_image(image, image_name, model, opt): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] path = "D:\\CV-Project\\pytorch-pose-hg-3d\\images\\last_save\\" _, image_name = os.path.split(image_name) image_name = image_name[:-4] debugger = Debugger() debugger.add_img(image, image_name) debugger.add_point_2d(pred, (255, 0, 0), image_name) debugger.add_point_3d(pred_3d, 'b') debugger.show_all_imgs(pause=False) debugger.show_3d(image_name, path) debugger.save_img(image_name, path)
def display_map(self, batch, tracks, idx): opt = self.opt for i in range(1): debugger = Debugger(opt, dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) debugger.add_img(img, img_id='track') for i in range(len(tracks)): dets = tracks[i].pred bbox = dets[:4] * self.opt.down_ratio w, h = bbox[2], bbox[3] bbox = np.array([ bbox[0] - w / 2, bbox[1] - h / 2, bbox[0] + w / 2, bbox[1] + h / 2 ]) debugger.add_coco_bbox(bbox, int(dets[-1]), tracks[i].track_id, img_id='track', tracking=True) debugger.save_all_imgs(opt.debug_dir, prefix=f'{idx}')
def demo_image(image, model, opt, save_path=None): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] debugger = Debugger() debugger.add_img(image) debugger.add_point_2d(pred, (255, 0, 0)) debugger.add_point_3d(pred_3d, 'b') # import pdb;pdb.set_trace() debugger.show_all_imgs(pause=True) debugger.show_3d() if save_path: debugger.save_3d(save_path)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None dets = ctdet_decode(output['hm'], output['wh'], reg=reg, cat_spec_wh=opt.cat_spec_wh, opt=opt) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') if opt.edge_hm: edge_hm = output['edge_hm'][i].detach().cpu().numpy() edge_hm = edge_hm.reshape(4 * opt.num_edge_hm, -1, edge_hm.shape[1], edge_hm.shape[2]) edge_hm = edge_hm.sum(axis=0) edge_hm = debugger.gen_colormap(edge_hm) debugger.add_blend_img(img, edge_hm, 'edge_hm') gt_edge_hm = batch['edge_hm'][i].detach().cpu().numpy() gt_edge_hm = gt_edge_hm.reshape(4 * opt.num_edge_hm, -1, gt_edge_hm.shape[1], gt_edge_hm.shape[2]) gt_edge_hm = gt_edge_hm.sum(axis=0) gt_edge_hm = debugger.gen_colormap(gt_edge_hm) debugger.add_blend_img(img, gt_edge_hm, 'gt_edge_hm') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None directs = output['direct'] dets = ctdet_line_decode(output['hm'], output['wh'], reg=reg, directs=directs, cat_spec_wh=opt.cat_spec_wh, K=opt.K, direct_loss=opt.direct_loss) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio if not self.opt.temporal_model and not opt.no_reconstruct_loss: dets_gt = batch['meta']['gt_line'].numpy().reshape( 1, -1, dets.shape[2] - 1 + 1) # +1 for direction dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose( 1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap( batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_bbox_line(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred', direct=dets[i, k, 5]) debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_bbox_line_gt( dets_gt[i, k, :4], dets_gt[i, k, 5], dets_gt[i, k, 4], img_id='out_gt', direct=dets_gt[i, k, -1]) # direct=directs_gt[i, k]) if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None dets = ctdet_decode(output['hm'], output['wh'], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.down_ratio if opt.task == 'ctdet_semseg': seg_gt = batch['seg'][0][0].cpu().numpy() seg_pred = output['seg'].max(1)[1].squeeze_(1).squeeze_( 0).cpu().numpy() for i in range(1): debugger = Debugger(opt, dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.vis_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.vis_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.save_video: # only save the predicted and gt images return debugger.imgs['out_pred'], debugger.imgs['out_gt'] pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') if opt.task == 'ctdet_semseg': debugger.visualize_masks(seg_gt, img_id='out_mask_gt') debugger.visualize_masks(seg_pred, img_id='out_mask_pred') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix=iter_id)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None angle = output['angle'] dets = ctdet_angle_decode(output['hm'], output['wh'], reg=reg, angle=angle, cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio dets_gt = batch['meta']['gt_det_angle'].numpy().reshape( 1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): # center predict scores if dets[i, k, 5] > opt.center_thresh: debugger.add_rotation_bbox(dets[i, k, :5], dets[i, k, -1], dets[i, k, 5], img_id='out_pred', show_txt=False) debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 5] > opt.center_thresh: debugger.add_rotation_bbox(dets_gt[i, k, :5], dets_gt[i, k, -1], dets_gt[i, k, 5], img_id='out_gt', show_txt=False) if opt.debug == 4: print(batch['meta']['img_id'][i]) a = batch['meta']['img_id'][i].detach().cpu() debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id), img_id=batch['meta']['img_id']) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt ###是否进行坐标offset reg reg = output['reg'] if opt.reg_offset else None ###将网络输出的hms经过decode得到detections: [bboxes, scores, clses] dets = ctdet_decode( output['hm'], output['wh'], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) ####创建一个没有梯度的变量dets,shape为(1,batch*k,6) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) ####对预测坐标进行变换---->下采样, down_ratio默认值为4 dets[:, :, :4] *= opt.down_ratio ####把dets_gt的shape变为(1,batch*k, 6) ####dets_gt为gt_bbox的位置信息,shape为(1,batch*k,6) dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) ####对gt坐标进行变换---->下采样, down_ratio默认值为4 dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger( dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme) ###将输入图片转化为cpu上的没有梯度的张量img img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) ###对输入图像进行标准化处理:乘上标准差再加上均值 img = np.clip((( img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) ####gen_colormap又是什么玩意??? ####output----->pred, batch------>gt pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) ####add_blend_img是用来干嘛??? debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') ###此时len(dets[i]==dets[0])==batch*k, for k in range(len(dets[i])): ###即某个score>thresh if dets[i, k, 4] > opt.center_thresh: ####在图像上画出检测框,坐标,score和cls debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') ###len(dets_gt[i])为batch*k for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: ####画出gt_bbox debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None hm_hp = output['hm_hp'] if opt.hm_hp else None hp_offset = output['hp_offset'] if opt.reg_hp_offset else None dets = multi_pose_decode( output['hm'], output['wh'], output['hps'], reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.input_res / opt.output_res dets[:, :, 5:39] *= opt.input_res / opt.output_res dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.input_res / opt.output_res dets_gt[:, :, 5:39] *= opt.input_res / opt.output_res for i in range(1): debugger = Debugger( dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip((( img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt') if opt.hm_hp: pred = debugger.gen_colormap_hp(output['hm_hp'][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp(batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') debugger.add_blend_img(img, gt, 'gt_hmhp') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): cfg = self.cfg reg = output[3] if cfg.LOSS.REG_OFFSET else None hm_hp = output[4] if cfg.LOSS.HM_HP else None hp_offset = output[5] if cfg.LOSS.REG_HP_OFFSET else None dets = multi_pose_decode( output[0], output[1], output[2], reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=cfg.TEST.TOPK) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES dets[:, :, 5:39] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES dets_gt[:, :, 5:39] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES for i in range(1): debugger = Debugger( dataset=cfg.SAMPLE_METHOD, ipynb=(cfg.DEBUG==3), theme=cfg.DEBUG_THEME) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip((( img * np.array(cfg.DATASET.STD).reshape(1,1,3).astype(np.float32) + cfg.DATASET.MEAN) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap(output[0][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > cfg.MODEL.CENTER_THRESH: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > cfg.MODEL.CENTER_THRESH: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt') if cfg.LOSS.HM_HP: pred = debugger.gen_colormap_hp(output[4][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp(batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') debugger.add_blend_img(img, gt, 'gt_hmhp') if cfg.DEBUG == 4: debugger.save_all_imgs(cfg.LOG_DIR, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def _debug(image, t_heat, l_heat, b_heat, r_heat, ct_heat): debugger = Debugger(num_classes=1) k = 0 t_heat = torch.sigmoid(t_heat) l_heat = torch.sigmoid(l_heat) b_heat = torch.sigmoid(b_heat) r_heat = torch.sigmoid(r_heat) aggr_weight = 0.1 t_heat = _h_aggregate(t_heat, aggr_weight=aggr_weight) l_heat = _v_aggregate(l_heat, aggr_weight=aggr_weight) b_heat = _h_aggregate(b_heat, aggr_weight=aggr_weight) r_heat = _v_aggregate(r_heat, aggr_weight=aggr_weight) t_heat[t_heat > 1] = 1 l_heat[l_heat > 1] = 1 b_heat[b_heat > 1] = 1 r_heat[r_heat > 1] = 1 ct_heat = torch.sigmoid(ct_heat) t_hm = debugger.gen_colormap(t_heat[k].cpu().data.numpy()) l_hm = debugger.gen_colormap(l_heat[k].cpu().data.numpy()) b_hm = debugger.gen_colormap(b_heat[k].cpu().data.numpy()) r_hm = debugger.gen_colormap(r_heat[k].cpu().data.numpy()) ct_hm = debugger.gen_colormap(ct_heat[k].cpu().data.numpy()) hms = np.maximum(np.maximum(t_hm, l_hm), np.maximum(b_hm, r_hm)) if image is not None: mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32).reshape(3, 1, 1) std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32).reshape(3, 1, 1) img = (image[k].cpu().data.numpy() * std + mean) * 255 img = img.astype(np.uint8).transpose(1, 2, 0) debugger.add_img(img, 'img') debugger.add_blend_img(img, t_hm, 't_hm') debugger.add_blend_img(img, l_hm, 'l_hm') debugger.add_blend_img(img, b_hm, 'b_hm') debugger.add_blend_img(img, r_hm, 'r_hm') debugger.add_blend_img(img, hms, 'extreme') debugger.add_blend_img(img, ct_hm, 'center') debugger.show_all_imgs(pause=False)
def debug(self, batch, output, iter_id): opt = self.opt detections = self.decode(output['hm_t'], output['hm_l'], output['hm_b'], output['hm_r'], output['hm_c']).detach().cpu().numpy() detections[:, :, :4] *= opt.input_res / opt.output_res for i in range(1): dataset = opt.dataset if opt.dataset == 'yolo': dataset = opt.names debugger = Debugger(dataset=dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) pred_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8) gt_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = ((img * self.opt.std + self.opt.mean) * 255.).astype( np.uint8) for p in self.parts: tag = 'hm_{}'.format(p) pred = debugger.gen_colormap( output[tag][i].detach().cpu().numpy()) gt = debugger.gen_colormap( batch[tag][i].detach().cpu().numpy()) if p != 'c': pred_hm = np.maximum(pred_hm, pred) gt_hm = np.maximum(gt_hm, gt) if p == 'c' or opt.debug > 2: debugger.add_blend_img(img, pred, 'pred_{}'.format(p)) debugger.add_blend_img(img, gt, 'gt_{}'.format(p)) debugger.add_blend_img(img, pred_hm, 'pred') debugger.add_blend_img(img, gt_hm, 'gt') debugger.add_img(img, img_id='out') for k in range(len(detections[i])): if detections[i, k, 4] > 0.1: debugger.add_coco_bbox(detections[i, k, :4], detections[i, k, -1], detections[i, k, 4], img_id='out') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def show_results(self, image, gts, dets, save_dir, img_name): debugger = Debugger(dataset='dota', ipynb=(self.opt.debug == 3), theme='white') debugger.add_img(image, img_name) for j in dets: for bbox in dets[j]: if bbox[5] > 0.01: debugger.add_rbbox(bbox[:5], j - 1, bbox[5], img_id=img_name) for ann in gts: bbox = ann['rbbox'] cat_id = ann['category_id'] debugger.add_rbbox(bbox, cat_id - 1, 1, img_id=img_name, gt=True) save_dir = os.path.join(save_dir, 'voc_results') debugger.save_all_imgs(save_dir)
def show_results(self, save_dir): with open(os.path.join(save_dir, "results.json")) as f: data = json.load(f) debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3), theme=self.opt.debugger_theme, class_names=self.opt.class_names) for i, img_details in enumerate(data): if (data[i]['score'] > self.opt.vis_thresh): img_path = os.path.join("../data/coco/test2019", str(img_details["image_id"]) + ".jpg") # TODO image = cv2.imread(img_path) debugger.add_img(image, img_id='ctdet') bbox = data[i]['bbox'] debugger.add_coco_bbox(bbox, data[i]['category_id'] - 1, data[i]['score'], img_id='ctdet') debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None dets = ctdet_decode(output['hm'], output['wh'], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt')
def debug_for_polygon(self, batch, output, iter_id): opt = self.opt output = output[0] batch = batch[0] for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') debugger.add_img(img, img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id, dataset): opt = self.opt if "pre_hm" in batch: output.update({"pre_hm": batch["pre_hm"]}) dets = generic_decode(output, K=opt.K, opt=opt) for k in dets: dets[k] = dets[k].detach().cpu().numpy() dets_gt = batch["meta"]["gt_det"] for i in range(1): debugger = Debugger(opt=opt, dataset=dataset) img = batch["image"][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * dataset.std + dataset.mean) * 255.0), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output["hm"][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch["hm"][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, "pred_hm") debugger.add_blend_img(img, gt, "gt_hm") if "pre_img" in batch: pre_img = batch["pre_img"][i].detach().cpu().numpy().transpose( 1, 2, 0) pre_img = np.clip( ((pre_img * dataset.std + dataset.mean) * 255), 0, 255).astype(np.uint8) debugger.add_img(pre_img, "pre_img_pred") debugger.add_img(pre_img, "pre_img_gt") if "pre_hm" in batch: pre_hm = debugger.gen_colormap( batch["pre_hm"][i].detach().cpu().numpy()) debugger.add_blend_img(pre_img, pre_hm, "pre_hm") debugger.add_img(img, img_id="out_pred") if "ltrb_amodal" in opt.heads: debugger.add_img(img, img_id="out_pred_amodal") debugger.add_img(img, img_id="out_gt_amodal") # Predictions for k in range(len(dets["scores"][i])): if dets["scores"][i, k] > opt.vis_thresh: debugger.add_coco_bbox( dets["bboxes"][i, k] * opt.down_ratio, dets["clses"][i, k], dets["scores"][i, k], img_id="out_pred", ) if "ltrb_amodal" in opt.heads: debugger.add_coco_bbox( dets["bboxes_amodal"][i, k] * opt.down_ratio, dets["clses"][i, k], dets["scores"][i, k], img_id="out_pred_amodal", ) if "hps" in opt.heads and int(dets["clses"][i, k]) == 0: debugger.add_coco_hp(dets["hps"][i, k] * opt.down_ratio, img_id="out_pred") if "tracking" in opt.heads: debugger.add_arrow( dets["cts"][i][k] * opt.down_ratio, dets["tracking"][i][k] * opt.down_ratio, img_id="out_pred", ) debugger.add_arrow( dets["cts"][i][k] * opt.down_ratio, dets["tracking"][i][k] * opt.down_ratio, img_id="pre_img_pred", ) # Ground truth debugger.add_img(img, img_id="out_gt") for k in range(len(dets_gt["scores"][i])): if dets_gt["scores"][i][k] > opt.vis_thresh: debugger.add_coco_bbox( dets_gt["bboxes"][i][k] * opt.down_ratio, dets_gt["clses"][i][k], dets_gt["scores"][i][k], img_id="out_gt", ) if "ltrb_amodal" in opt.heads: debugger.add_coco_bbox( dets_gt["bboxes_amodal"][i, k] * opt.down_ratio, dets_gt["clses"][i, k], dets_gt["scores"][i, k], img_id="out_gt_amodal", ) if "hps" in opt.heads and (int(dets["clses"][i, k]) == 0): debugger.add_coco_hp(dets_gt["hps"][i][k] * opt.down_ratio, img_id="out_gt") if "tracking" in opt.heads: debugger.add_arrow( dets_gt["cts"][i][k] * opt.down_ratio, dets_gt["tracking"][i][k] * opt.down_ratio, img_id="out_gt", ) debugger.add_arrow( dets_gt["cts"][i][k] * opt.down_ratio, dets_gt["tracking"][i][k] * opt.down_ratio, img_id="pre_img_gt", ) if "hm_hp" in opt.heads: pred = debugger.gen_colormap_hp( output["hm_hp"][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch["hm_hp"][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, "pred_hmhp") debugger.add_blend_img(img, gt, "gt_hmhp") if "rot" in opt.heads and "dim" in opt.heads and "dep" in opt.heads: dets_gt = {k: dets_gt[k].cpu().numpy() for k in dets_gt} calib = (batch["meta"]["calib"].detach().numpy() if "calib" in batch["meta"] else None) det_pred = generic_post_process( opt, dets, batch["meta"]["c"].cpu().numpy(), batch["meta"]["s"].cpu().numpy(), output["hm"].shape[2], output["hm"].shape[3], self.opt.num_classes, calib, ) det_gt = generic_post_process( opt, dets_gt, batch["meta"]["c"].cpu().numpy(), batch["meta"]["s"].cpu().numpy(), output["hm"].shape[2], output["hm"].shape[3], self.opt.num_classes, calib, ) debugger.add_3d_detection( batch["meta"]["img_path"][i], batch["meta"]["flipped"][i], det_pred[i], calib[i], vis_thresh=opt.vis_thresh, img_id="add_pred", ) debugger.add_3d_detection( batch["meta"]["img_path"][i], batch["meta"]["flipped"][i], det_gt[i], calib[i], vis_thresh=opt.vis_thresh, img_id="add_gt", ) debugger.add_bird_views( det_pred[i], det_gt[i], vis_thresh=opt.vis_thresh, img_id="bird_pred_gt", ) if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix="{}".format(iter_id)) else: debugger.show_all_imgs(pause=True)
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() # crit = torch.nn.MSELoss() # crit_3d = FusionLoss(opt.device, opt.weight_3d, opt.weight_var) # crit_ocv = nn.BCEWithLogitsLoss() crit_ocv = nn.CrossEntropyLoss() # acc_idxs = data_loader.dataset.acc_idxs # edges = data_loader.dataset.edges # edges_3d = data_loader.dataset.edges_3d # shuffle_ref = data_loader.dataset.shuffle_ref # mean = data_loader.dataset.mean # std = data_loader.dataset.std # convert_eval_format = data_loader.dataset.convert_eval_format # Loss, Loss3D = AverageMeter(), AverageMeter() # Acc, MPJPE = AverageMeter(), AverageMeter() Loss_ocv, Acc_ocv = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] time_str = '' nIters = len(data_loader) if opt.train_half: nIters = nIters / 2 bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): if i >= nIters: break data_time.update(time.time() - end) # for k in batch: # if k != 'meta': # batch[k] = batch[k].cuda(device=opt.device, non_blocking=True) # gt_2d = batch['meta']['pts_crop'].cuda( # device=opt.device, non_blocking=True).float() / opt.output_h img, ocv_gt, info = batch if i == 0: np.savez(split + '_debug.npz', img=img.numpy(), ocv_gt=ocv_gt.numpy(), info=info) img = img.cuda(device=opt.device, non_blocking=True) ocv_gt = ocv_gt.cuda(device=opt.device, non_blocking=True) output = model(img) # loss = crit(output[-1]['hm'], batch['target']) # loss_3d = crit_3d( # output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], # batch['reg_target'],gt_2d) # for k in range(opt.num_stacks - 1): # loss += crit(output[k], batch['target']) # loss_3d = crit_3d( # output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], # batch['reg_target'], gt_2d) # loss += loss_3d # loss = crit_ocv(output, ocv_gt) loss = crit_ocv(output, torch.argmax(ocv_gt, 1)) preds = torch.argmax(output, 1) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() # else: # input_ = batch['input'].cpu().numpy().copy() # input_[0] = flip(input_[0]).copy()[np.newaxis, ...] # input_flip_var = torch.from_numpy(input_).cuda( # device=opt.device, non_blocking=True) # output_flip_ = model(input_flip_var) # output_flip = shuffle_lr( # flip(output_flip_[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) # output_flip = output_flip.reshape( # 1, opt.num_output, opt.output_h, opt.output_w) # output_depth_flip = shuffle_lr( # flip(output_flip_[-1]['depth'].detach().cpu().numpy()[0]), shuffle_ref) # output_depth_flip = output_depth_flip.reshape( # 1, opt.num_output, opt.output_h, opt.output_w) # output_flip = torch.from_numpy(output_flip).cuda( # device=opt.device, non_blocking=True) # output_depth_flip = torch.from_numpy(output_depth_flip).cuda( # device=opt.device, non_blocking=True) # output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 # output[-1]['depth'] = (output[-1]['depth'] + output_depth_flip) / 2 # pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) # preds.append(convert_eval_format(pred, conf, meta)[0]) acc = accuracy_ocv(preds, torch.argmax(ocv_gt, 1)) Loss_ocv.update(loss.item(), img.size(0)) Acc_ocv.update(acc, img.size(0)) # Loss.update(loss.item(), batch['input'].size(0)) # Loss3D.update(loss_3d.item(), batch['input'].size(0)) # Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), # batch['target'].detach().cpu().numpy(), acc_idxs)) # mpeje_batch, mpjpe_cnt = mpjpe(output[-1]['hm'].detach().cpu().numpy(), # output[-1]['depth'].detach().cpu().numpy(), # batch['meta']['gt_3d'].detach().numpy(), # convert_func=convert_eval_format) # MPJPE.update(mpeje_batch, mpjpe_cnt) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) # Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\ # '|Loss {loss.avg:.5f} |Loss3D {loss_3d.avg:.5f}'\ # '|Acc {Acc.avg:.4f} |MPJPE {MPJPE.avg:.2f}'\ # '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, # eta=bar.eta_td, loss=Loss, Acc=Acc, # split=split, time_str=time_str, # MPJPE=MPJPE, loss_3d=Loss3D) Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\ '|Loss_ocv {loss.avg:.5f}'\ '|Acc_ocv {Acc.avg:.4f}'\ '|loss_batch {loss_batch:.4f}'\ '|acc_batch {acc_batch:.4f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss_ocv, Acc=Acc_ocv, loss_batch=loss.item(), acc_batch=acc, split=split, time_str=time_str) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt = get_preds(batch['target'].cpu().numpy()) * 4 pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) * 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = (batch['input'][0].cpu().numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(batch['target'][0].cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize( output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_3d(batch['meta']['gt_3d'].detach().numpy()[0], 'r', edges=edges_3d) pred_3d = get_preds_3d(output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy()) debugger.add_point_3d(convert_eval_format(pred_3d[0]), 'b', edges=edges_3d) debugger.show_all_imgs(pause=False) debugger.show_3d() # pdb.set_trace() bar.finish() # return {'loss': Loss.avg, # 'acc': Acc.avg, # 'mpjpe': MPJPE.avg, # 'time': bar.elapsed_td.total_seconds() / 60.}, preds return { 'loss': Loss_ocv.avg, 'acc': Acc_ocv.avg, 'time': bar.elapsed_td.total_seconds() / 60. }, preds
def run(self, image_or_path_or_tensor, meta=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, tot_time = 0, 0 debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3), theme=self.opt.debugger_theme) start_time = time.time() pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type(''): image = cv2.imread(image_or_path_or_tensor) else: image = image_or_path_or_tensor['image'][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += (loaded_time - start_time) detections = [] for scale in self.scales: scale_start_time = time.time() if not pre_processed: images, meta = self.pre_process(image, scale, meta) else: # import pdb; pdb.set_trace() images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} images = images.to(self.opt.device) torch.cuda.synchronize() pre_process_time = time.time() pre_time += pre_process_time - scale_start_time output, dets, forward_time = self.process(images, return_time=True) torch.cuda.synchronize() net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time if self.opt.debug >= 2: self.debug(debugger, images, dets, output, scale) dets = self.post_process(dets, meta, scale) torch.cuda.synchronize() post_process_time = time.time() post_time += post_process_time - decode_time detections.append(dets) results = self.merge_outputs(detections) torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time tot_time += end_time - start_time if self.opt.debug >= 1: # print('--->>> base_detector run show_results') # img_ = self.show_results(debugger, image, results) debugger.add_img(image, img_id='multi_pose') #---------------------------------------------------------------- NMS nms_dets_ = [] for bbox in results[1]: if bbox[4] > self.opt.vis_thresh: nms_dets_.append( (bbox[0], bbox[1], bbox[2], bbox[3], bbox[4])) if len(nms_dets_) > 0: keep_ = py_cpu_nms(np.array(nms_dets_), thresh=0.35) # print('keep_ : ',nms_dets_,keep_) #---------------------------------------------------------------- faces_boxes = [] person_boxes = [] idx = 0 for bbox in results[1]: if bbox[4] > self.opt.vis_thresh: idx += 1 if (idx - 1) not in keep_: continue # 绘制目标物体 # print('------------------>>>add_coco_bbox') debugger.add_coco_bbox(bbox[:4], 0, bbox[4], img_id='multi_pose') face_pts = debugger.add_coco_hp(bbox[5:39], img_id='multi_pose') # print('--------------------------------->>>>>>>>>>oou') if len(face_pts) == 5: # print('change box') person_boxes.append([ int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), bbox[4] ]) x_min = min( [face_pts[i][0] for i in range(len(face_pts))]) y_min = min( [face_pts[i][1] for i in range(len(face_pts))]) x_max = max( [face_pts[i][0] for i in range(len(face_pts))]) y_max = max( [face_pts[i][1] for i in range(len(face_pts))]) edge = abs(x_max - x_min) # bbox_x1 = int(max(0, (x_min - edge * 0.05))) bbox_x2 = int( min(image.shape[1] - 1, (x_max + edge * 0.05))) bbox_y1 = int(max(0, (y_min - edge * 0.32))) bbox_y2 = int( min(image.shape[0] - 1, (y_max + edge * 0.55))) # print('ppppp',face_pts,x1) # if ((bbox_x2-bbox_x1)*(bbox_y2-bbox_y1))>100: faces_boxes.append( [bbox_x1, bbox_y1, bbox_x2, bbox_y2, 1.]) # cv2.rectangle(image,(bbox_x1,bbox_y1),(bbox_x2,bbox_y2),(0,255,255),2) # print('-------->>> show_results debugger') img_ = debugger.show_all_imgs(pause=self.pause) return img_, { 'results': results, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time }, faces_boxes, person_boxes
def debug(self, batch, output, iter_id, dataset): opt = self.opt if 'pre_hm' in batch: output.update({'pre_hm': batch['pre_hm']}) dets = fusion_decode(output, K=opt.K, opt=opt) for k in dets: dets[k] = dets[k].detach().cpu().numpy() dets_gt = batch['meta']['gt_det'] for i in range(1): debugger = Debugger(opt=opt, dataset=dataset) img = batch['image'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * dataset.std + dataset.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm', trans=self.opt.hm_transparency) debugger.add_blend_img(img, gt, 'gt_hm', trans=self.opt.hm_transparency) debugger.add_img(img, img_id='img') # show point clouds if opt.pointcloud: pc_2d = batch['pc_2d'][i].detach().cpu().numpy() pc_3d = None pc_N = batch['pc_N'][i].detach().cpu().numpy() debugger.add_img(img, img_id='pc') debugger.add_pointcloud(pc_2d, pc_N, img_id='pc') if 'pc_hm' in opt.pc_feat_lvl: channel = opt.pc_feat_channels['pc_hm'] pc_hm = debugger.gen_colormap( batch['pc_hm'][i][channel].unsqueeze( 0).detach().cpu().numpy()) debugger.add_blend_img(img, pc_hm, 'pc_hm', trans=self.opt.hm_transparency) if 'pc_dep' in opt.pc_feat_lvl: channel = opt.pc_feat_channels['pc_dep'] pc_hm = batch['pc_hm'][i][channel].unsqueeze( 0).detach().cpu().numpy() pc_dep = debugger.add_overlay_img(img, pc_hm, 'pc_dep') if 'pre_img' in batch: pre_img = batch['pre_img'][i].detach().cpu().numpy().transpose( 1, 2, 0) pre_img = np.clip( ((pre_img * dataset.std + dataset.mean) * 255), 0, 255).astype(np.uint8) debugger.add_img(pre_img, 'pre_img_pred') debugger.add_img(pre_img, 'pre_img_gt') if 'pre_hm' in batch: pre_hm = debugger.gen_colormap( batch['pre_hm'][i].detach().cpu().numpy()) debugger.add_blend_img(pre_img, pre_hm, 'pre_hm', trans=self.opt.hm_transparency) debugger.add_img(img, img_id='out_pred') if 'ltrb_amodal' in opt.heads: debugger.add_img(img, img_id='out_pred_amodal') debugger.add_img(img, img_id='out_gt_amodal') # Predictions for k in range(len(dets['scores'][i])): if dets['scores'][i, k] > opt.vis_thresh: debugger.add_coco_bbox(dets['bboxes'][i, k] * opt.down_ratio, dets['clses'][i, k], dets['scores'][i, k], img_id='out_pred') if 'ltrb_amodal' in opt.heads: debugger.add_coco_bbox(dets['bboxes_amodal'][i, k] * opt.down_ratio, dets['clses'][i, k], dets['scores'][i, k], img_id='out_pred_amodal') if 'hps' in opt.heads and int(dets['clses'][i, k]) == 0: debugger.add_coco_hp(dets['hps'][i, k] * opt.down_ratio, img_id='out_pred') if 'tracking' in opt.heads: debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio, dets['tracking'][i][k] * opt.down_ratio, img_id='out_pred') debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio, dets['tracking'][i][k] * opt.down_ratio, img_id='pre_img_pred') # Ground truth debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt['scores'][i])): if dets_gt['scores'][i][k] > opt.vis_thresh: if 'dep' in dets_gt.keys(): dist = dets_gt['dep'][i][k] if len(dist) > 1: dist = dist[0] else: dist = -1 debugger.add_coco_bbox(dets_gt['bboxes'][i][k] * opt.down_ratio, dets_gt['clses'][i][k], dets_gt['scores'][i][k], img_id='out_gt', dist=dist) if 'ltrb_amodal' in opt.heads: debugger.add_coco_bbox(dets_gt['bboxes_amodal'][i, k] * opt.down_ratio, dets_gt['clses'][i, k], dets_gt['scores'][i, k], img_id='out_gt_amodal') if 'hps' in opt.heads and \ (int(dets['clses'][i, k]) == 0): debugger.add_coco_hp(dets_gt['hps'][i][k] * opt.down_ratio, img_id='out_gt') if 'tracking' in opt.heads: debugger.add_arrow( dets_gt['cts'][i][k] * opt.down_ratio, dets_gt['tracking'][i][k] * opt.down_ratio, img_id='out_gt') debugger.add_arrow( dets_gt['cts'][i][k] * opt.down_ratio, dets_gt['tracking'][i][k] * opt.down_ratio, img_id='pre_img_gt') if 'hm_hp' in opt.heads: pred = debugger.gen_colormap_hp( output['hm_hp'][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp', trans=self.opt.hm_transparency) debugger.add_blend_img(img, gt, 'gt_hmhp', trans=self.opt.hm_transparency) if 'rot' in opt.heads and 'dim' in opt.heads and 'dep' in opt.heads: dets_gt = {k: dets_gt[k].cpu().numpy() for k in dets_gt} calib = batch['meta']['calib'].detach().numpy() \ if 'calib' in batch['meta'] else None det_pred = generic_post_process( opt, dets, batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], self.opt.num_classes, calib) det_gt = generic_post_process(opt, dets_gt, batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], self.opt.num_classes, calib, is_gt=True) debugger.add_3d_detection(batch['meta']['img_path'][i], batch['meta']['flipped'][i], det_pred[i], calib[i], vis_thresh=opt.vis_thresh, img_id='add_pred') debugger.add_3d_detection(batch['meta']['img_path'][i], batch['meta']['flipped'][i], det_gt[i], calib[i], vis_thresh=opt.vis_thresh, img_id='add_gt') pc_3d = None if opt.pointcloud: pc_3d = batch['pc_3d'].cpu().numpy() debugger.add_bird_views(det_pred[i], det_gt[i], vis_thresh=opt.vis_thresh, img_id='bird_pred_gt', pc_3d=pc_3d, show_velocity=opt.show_velocity) debugger.add_bird_views([], det_gt[i], vis_thresh=opt.vis_thresh, img_id='bird_gt', pc_3d=pc_3d, show_velocity=opt.show_velocity) if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def run(self, image_or_path_or_tensor, meta=None): load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0 merge_time, tot_time = 0, 0 debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3), theme=self.opt.debugger_theme) start_time = time.time() pre_processed = False if isinstance(image_or_path_or_tensor, np.ndarray): image = image_or_path_or_tensor elif type(image_or_path_or_tensor) == type (''): image = cv2.imread(image_or_path_or_tensor) else: image = image_or_path_or_tensor['image'][0].numpy() pre_processed_images = image_or_path_or_tensor pre_processed = True loaded_time = time.time() load_time += (loaded_time - start_time) detections = [] for scale in self.scales: scale_start_time = time.time() if not pre_processed: images, meta = self.pre_process(image, scale, meta) else: # import pdb; pdb.set_trace() images = pre_processed_images['images'][scale][0] meta = pre_processed_images['meta'][scale] meta = {k: v.numpy()[0] for k, v in meta.items()} images = images.to(self.opt.device) torch.cuda.synchronize() pre_process_time = time.time() pre_time += pre_process_time - scale_start_time output, dets, forward_time = self.process(images, return_time=True) torch.cuda.synchronize() net_time += forward_time - pre_process_time decode_time = time.time() dec_time += decode_time - forward_time if self.opt.debug >= 2: self.debug(debugger, images, dets, output, scale) dets = self.post_process(dets, meta, scale) torch.cuda.synchronize() post_process_time = time.time() post_time += post_process_time - decode_time detections.append(dets) results = self.merge_outputs(detections) torch.cuda.synchronize() end_time = time.time() merge_time += end_time - post_process_time tot_time += end_time - start_time if self.opt.debug >= 1: # print('--->>> base_detector run show_results') # img_ = self.show_results(debugger, image, results) debugger.add_img(image, img_id='multi_pose') for bbox in results[1]: if bbox[4] > self.opt.vis_thresh: # 绘制目标物体 # print('------------------>>>add_coco_bbox') debugger.add_coco_bbox(bbox[:4], 0, bbox[4], img_id='multi_pose') debugger.add_coco_hp(bbox[5:39], img_id='multi_pose') # print('-------->>> show_results debugger') img_ = debugger.show_all_imgs(pause=self.pause) return img_,{'results': results, 'tot': tot_time, 'load': load_time, 'pre': pre_time, 'net': net_time, 'dec': dec_time, 'post': post_time, 'merge': merge_time}
def debug(self, batch, output, iter_id, dataset): opt = self.opt if 'pre_hm' in batch: output.update({'pre_hm': batch['pre_hm']}) dets = generic_decode(output, K=opt.K, opt=opt) for k in dets: dets[k] = dets[k].detach().cpu().numpy() dets_gt = batch['meta']['gt_det'] for i in range(1): debugger = Debugger(opt=opt, dataset=dataset) img = batch['image'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * dataset.std + dataset.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') if 'pre_img' in batch: pre_img = batch['pre_img'][i].detach().cpu().numpy().transpose( 1, 2, 0) pre_img = np.clip( ((pre_img * dataset.std + dataset.mean) * 255), 0, 255).astype(np.uint8) debugger.add_img(pre_img, 'pre_img_pred') debugger.add_img(pre_img, 'pre_img_gt') if 'pre_hm' in batch: pre_hm = debugger.gen_colormap( batch['pre_hm'][i].detach().cpu().numpy()) debugger.add_blend_img(pre_img, pre_hm, 'pre_hm') debugger.add_img(img, img_id='out_pred') if 'ltrb_amodal' in opt.heads: debugger.add_img(img, img_id='out_pred_amodal') debugger.add_img(img, img_id='out_gt_amodal') # Predictions for k in range(len(dets['scores'][i])): if dets['scores'][i, k] > opt.vis_thresh: debugger.add_coco_bbox(dets['bboxes'][i, k] * opt.down_ratio, dets['clses'][i, k], dets['scores'][i, k], img_id='out_pred') if 'ltrb_amodal' in opt.heads: debugger.add_coco_bbox(dets['bboxes_amodal'][i, k] * opt.down_ratio, dets['clses'][i, k], dets['scores'][i, k], img_id='out_pred_amodal') if 'hps' in opt.heads and int(dets['clses'][i, k]) == 0: debugger.add_coco_hp(dets['hps'][i, k] * opt.down_ratio, img_id='out_pred') if 'tracking' in opt.heads: debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio, dets['tracking'][i][k] * opt.down_ratio, img_id='out_pred') debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio, dets['tracking'][i][k] * opt.down_ratio, img_id='pre_img_pred') # Ground truth debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt['scores'][i])): if dets_gt['scores'][i][k] > opt.vis_thresh: debugger.add_coco_bbox(dets_gt['bboxes'][i][k] * opt.down_ratio, dets_gt['clses'][i][k], dets_gt['scores'][i][k], img_id='out_gt') if 'ltrb_amodal' in opt.heads: debugger.add_coco_bbox(dets_gt['bboxes_amodal'][i, k] * opt.down_ratio, dets_gt['clses'][i, k], dets_gt['scores'][i, k], img_id='out_gt_amodal') if 'hps' in opt.heads and \ (int(dets['clses'][i, k]) == 0): debugger.add_coco_hp(dets_gt['hps'][i][k] * opt.down_ratio, img_id='out_gt') if 'tracking' in opt.heads: debugger.add_arrow( dets_gt['cts'][i][k] * opt.down_ratio, dets_gt['tracking'][i][k] * opt.down_ratio, img_id='out_gt') debugger.add_arrow( dets_gt['cts'][i][k] * opt.down_ratio, dets_gt['tracking'][i][k] * opt.down_ratio, img_id='pre_img_gt') if 'hm_hp' in opt.heads: pred = debugger.gen_colormap_hp( output['hm_hp'][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') debugger.add_blend_img(img, gt, 'gt_hmhp') if 'rot' in opt.heads and 'dim' in opt.heads and 'dep' in opt.heads: dets_gt = {k: dets_gt[k].cpu().numpy() for k in dets_gt} calib = batch['meta']['calib'].detach().numpy() \ if 'calib' in batch['meta'] else None det_pred = generic_post_process( opt, dets, batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], self.opt.num_classes, calib) det_gt = generic_post_process(opt, dets_gt, batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], self.opt.num_classes, calib) debugger.add_3d_detection(batch['meta']['img_path'][i], batch['meta']['flipped'][i], det_pred[i], calib[i], vis_thresh=opt.vis_thresh, img_id='add_pred') debugger.add_3d_detection(batch['meta']['img_path'][i], batch['meta']['flipped'][i], det_gt[i], calib[i], vis_thresh=opt.vis_thresh, img_id='add_gt') debugger.add_bird_views(det_pred[i], det_gt[i], vis_thresh=opt.vis_thresh, img_id='bird_pred_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def run_one_scn(demo_scn, demo_dir, opt): Detector = detector_factory[opt.task] detector = Detector(opt) basename = os.path.basename(demo_scn) basename = basename.replace('.scn', '') basename = basename.replace('.svs', '') # basename = basename.replace(' ', '-') working_dir = os.path.join(demo_dir, basename) xml_file = os.path.join(working_dir, '%s.xml' % (basename)) if os.path.exists(xml_file): return patch_2d_dir, simg_big, simg = scn_to_patchs(demo_scn, working_dir, opt) if os.path.isdir(patch_2d_dir): image_names = [] ls = os.listdir(patch_2d_dir) for file_name in sorted(ls): ext = file_name[file_name.rfind('.') + 1:].lower() if ext in image_ext: image_names.append(os.path.join(patch_2d_dir, file_name)) else: image_names = [patch_2d_dir] detect_all = None count = 1 for (image_name) in image_names: ret = detector.run(image_name) results = ret['results'] res_strs = os.path.basename(image_name).replace('.png', '').split('-x-') lv_str = res_strs[0] patch_start_x = np.int(res_strs[3]) patch_start_y = np.int(res_strs[4]) if opt.filter_boarder: output_h = opt.input_h # hard coded output_w = opt.input_w # hard coded for j in range(1, opt.num_classes + 1): for i in range(len(results[j])): cp = [0, 0] cp[0] = results[j][i][0] cp[1] = results[j][i][1] cr = results[j][i][2] if cp[0] - cr < 0 or cp[0] + cr > output_w: results[j][i][3] = 0 continue if cp[1] - cr < 0 or cp[1] + cr > output_h: results[j][i][3] = 0 continue for j in range(1, opt.num_classes + 1): for circle in results[j]: if circle[3] > opt.vis_thresh: circle_out = circle[:] circle_out[0] = circle[0] + patch_start_x circle_out[1] = circle[1] + patch_start_y if detect_all is None: detect_all = [circle] else: detect_all = np.append(detect_all, [circle], axis=0) time_str = '' for stat in time_stats: time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat]) print(' %d/%d %s' % (count, len(image_names), time_str)) count = count + 1 num_classes = 1 scales = 1 max_per_image = 2000 run_nms = True results2 = merge_outputs(num_classes, max_per_image, run_nms, detect_all) detect_all = results2[1] # detections = [] # det_clss = {} # det_clss[1] = detect_all # detections.append(det_clss) # detect_all = merge_outputs(opt, detections) if not simg_big is None: debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) debugger.add_img(simg_big, img_id='') debugger.save_all_imgs(working_dir, prefix='%s' % (basename)) # save original image json_file = os.path.join(working_dir, '%s.json' % (basename)) debugger.save_detect_all_to_json(simg_big, detect_all, json_file, opt, simg) for circle in detect_all: debugger.add_coco_circle(circle[:3], circle[-1], circle[3], img_id='') debugger.save_all_imgs(working_dir, prefix='%s_overlay' % (basename)) # save original overlay # # make open slide file # with open("/media/huoy1/48EAE4F7EAE4E264/Projects/detection/test_demo/Case 01-3_manual_good.xml") as fd: # doc = xmltodict.parse(fd.read()) try: start_x = np.int(simg.properties['openslide.bounds-x']) start_y = np.int(simg.properties['openslide.bounds-y']) width_x = np.int(simg.properties['openslide.bounds-width']) height_y = np.int(simg.properties['openslide.bounds-height']) except: start_x = 0 start_y = 0 width_x = np.int(simg.properties['aperio.OriginalWidth']) height_y = np.int(simg.properties['aperio.OriginalHeight']) down_rate = simg.level_downsamples[opt.lv] detect_json = [] doc_out = {} doc_out['Annotations'] = {} doc_out['Annotations']['@MicronsPerPixel'] = simg.properties[ 'openslide.mpp-x'] doc_out['Annotations']['@Level'] = opt.lv doc_out['Annotations']['@DownRate'] = down_rate doc_out['Annotations']['@start_x'] = start_x doc_out['Annotations']['@start_y'] = start_y doc_out['Annotations']['@width_x'] = width_x doc_out['Annotations']['@height_y'] = height_y if 'leica.device-model' in simg.properties: doc_out['Annotations']['@Device'] = 'leica.device-model' else: doc_out['Annotations']['@Device'] = 'aperio.Filename' doc_out['Annotations']['Annotation'] = {} doc_out['Annotations']['Annotation']['@Id'] = '1' doc_out['Annotations']['Annotation']['@Name'] = '' doc_out['Annotations']['Annotation']['@ReadOnly'] = '0' doc_out['Annotations']['Annotation']['@LineColorReadOnly'] = '0' doc_out['Annotations']['Annotation']['@Incremental'] = '0' doc_out['Annotations']['Annotation']['@Type'] = '4' doc_out['Annotations']['Annotation']['@LineColor'] = '65280' doc_out['Annotations']['Annotation']['@Visible'] = '1' doc_out['Annotations']['Annotation']['@Selected'] = '1' doc_out['Annotations']['Annotation']['@MarkupImagePath'] = '' doc_out['Annotations']['Annotation']['@MacroName'] = '' doc_out['Annotations']['Annotation']['Attributes'] = {} doc_out['Annotations']['Annotation']['Attributes']['Attribute'] = {} doc_out['Annotations']['Annotation']['Attributes']['Attribute'][ '@Name'] = 'glomerulus' doc_out['Annotations']['Annotation']['Attributes']['Attribute'][ '@Id'] = '0' doc_out['Annotations']['Annotation']['Attributes']['Attribute'][ '@Value'] = '' doc_out['Annotations']['Annotation']['Plots'] = None doc_out['Annotations']['Annotation']['Regions'] = {} doc_out['Annotations']['Annotation']['Regions'][ 'RegionAttributeHeaders'] = {} doc_out['Annotations']['Annotation']['Regions']['AttributeHeader'] = [] doc_out['Annotations']['Annotation']['Regions']['Region'] = [] for di in range(len(detect_all)): detect_one = detect_all[di] detect_dict = {} detect_dict['@Id'] = str(di + 1) detect_dict['@Type'] = '2' detect_dict['@Zoom'] = '0.5' detect_dict['@ImageLocation'] = '' detect_dict['@ImageFocus'] = '-1' detect_dict['@Length'] = '2909.1' detect_dict['@Area'] = '673460.1' detect_dict['@LengthMicrons'] = '727.3' detect_dict['@AreaMicrons'] = '42091.3' detect_dict['@Text'] = ('%.3f' % detect_one[3]) detect_dict['@NegativeROA'] = '0' detect_dict['@InputRegionId'] = '0' detect_dict['@Analyze'] = '0' detect_dict['@DisplayId'] = str(di + 1) detect_dict['Attributes'] = None detect_dict['Vertices'] = '0' detect_dict['Vertices'] = {} detect_dict['Vertices']['Vertex'] = [] if 'leica.device-model' in simg.properties: #leica coord1 = {} coord1['@X'] = str(height_y - (detect_one[1] - detect_one[2]) * down_rate) coord1['@Y'] = str((detect_one[0] - detect_one[2]) * down_rate) coord1['@Z'] = '0' coord2 = {} coord2['@X'] = str( height_y - (detect_one[1] + detect_one[2]) * down_rate) # 左右 coord2['@Y'] = str( (detect_one[0] + detect_one[2]) * down_rate) # 上下 coord2['@Z'] = '0' detect_dict['Vertices']['Vertex'].append(coord1) detect_dict['Vertices']['Vertex'].append(coord2) elif 'aperio.Filename' in simg.properties: coord1 = {} coord1['@X'] = str((detect_one[0] - detect_one[2]) * down_rate) coord1['@Y'] = str((detect_one[1] - detect_one[2]) * down_rate) coord1['@Z'] = '0' coord2 = {} coord2['@X'] = str( (detect_one[0] + detect_one[2]) * down_rate) # 左右 coord2['@Y'] = str( (detect_one[1] + detect_one[2]) * down_rate) # 上下 coord2['@Z'] = '0' detect_dict['Vertices']['Vertex'].append(coord1) detect_dict['Vertices']['Vertex'].append(coord2) doc_out['Annotations']['Annotation']['Regions']['Region'].append( detect_dict) out = xmltodict.unparse(doc_out, pretty=True) with open(xml_file, 'wb') as file: file.write(out.encode('utf-8')) os.system('rm -r "%s"' % (os.path.join(working_dir, '2d_patch'))) return
def demo(opt): class_map = {1: 1, 2: 2} # color for boundingbox os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str # opt.debug = max(opt.debug, 1) Detector = detector_factory[opt.task] detector = Detector(opt) assert os.path.isdir(opt.demo), 'Need path to videos directory.' video_paths = [ os.path.join(opt.demo, video_name) for video_name in os.listdir(opt.demo) if video_name.split('.')[-1] == 'mp4' ] # video_paths = [ # os.path.join(opt.demo, 'cam_2.mp4') # ] debugger = Debugger(dataset=opt.dataset, theme=opt.debugger_theme) for video_path in sorted(video_paths): print('video_name = ', video_path) bboxes = [] video = cv2.VideoCapture(video_path) width, height = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int( video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # pointer pts = [] arr_name = os.path.basename(video_path).split('.')[0].split('_') cam_name = arr_name[0] + '_' + arr_name[1] print('cam_name = ', cam_name) with open('../ROIs/{}.txt'.format(cam_name)) as f: for line in f: pts.append([int(x) for x in line.split(',')]) pts = np.array(pts) # make mask mask = np.zeros((height, width), np.uint8) cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA) bbox_video = cv2.VideoWriter( filename='/home/leducthinh0409/centernet_visualize_{}/'.format( opt.arch) + os.path.basename(video_path), fourcc=cv2.VideoWriter_fourcc(*'mp4v'), fps=float(30), frameSize=(width, height), isColor=True) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) for i in tqdm(range(num_frames)): _, img_pre = video.read() ## do bit-op dst = cv2.bitwise_and(img_pre, img_pre, mask=mask) ## add the white background bg = np.ones_like(img_pre, np.uint8) * 255 cv2.bitwise_not(bg, bg, mask=mask) img = bg + dst ret = detector.run(img) bboxes.append(ret['results']) debugger.add_img(img, img_id='default') for class_id in class_map.keys(): for bbox in ret['results'][class_id]: if bbox[4] > opt.vis_thresh: debugger.add_coco_bbox(bbox[:4], class_map[class_id], bbox[4], img_id='default') bbox_img = debugger.imgs['default'] bbox_video.write(bbox_img) with open( '/home/leducthinh0409/bboxes_{}/'.format(opt.arch) + os.path.basename(video_path) + '.pkl', 'wb') as f: pickle.dump(bboxes, f)
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() crit = torch.nn.MSELoss() acc_idxs = data_loader.dataset.acc_idxs edges = data_loader.dataset.edges shuffle_ref = data_loader.dataset.shuffle_ref mean = data_loader.dataset.mean std = data_loader.dataset.std convert_eval_format = data_loader.dataset.convert_eval_format Loss, Acc = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] nIters = len(data_loader) bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): data_time.update(time.time() - end) input, target, meta = batch['input'], batch['target'], batch['meta'] input_var = input.cuda(device=opt.device, non_blocking=True) target_var = target.cuda(device=opt.device, non_blocking=True) output = model(input_var) loss = crit(output[-1]['hm'], target_var) for k in range(opt.num_stacks - 1): loss += crit(output[k], target_var) if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = input.cpu().numpy().copy() input_[0] = flip(input_[0]).copy()[np.newaxis, ...] input_flip_var = torch.from_numpy(input_).cuda( device=opt.device, non_blocking=True) output_flip = model(input_flip_var) output_flip = shuffle_lr( flip(output_flip[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) output_flip = output_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) # output_ = (output[-1].detach().cpu().numpy() + output_flip) / 2 output_flip = torch.from_numpy(output_flip).cuda( device=opt.device, non_blocking=True) output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 pred, conf = get_preds( output[-1]['hm'].detach().cpu().numpy(), True) preds.append(convert_eval_format(pred, conf, meta)[0]) Loss.update(loss.detach().item(), input.size(0)) Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), target_var.detach().cpu().numpy(), acc_idxs)) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) else: time_str = '' Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:}' \ '|Loss {loss.avg:.5f} |Acc {Acc.avg:.4f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, time_str=time_str) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt, amb_idx = get_preds(target.cpu().numpy()) gt *= 4 pred, amb_idx = get_preds(output[-1]['hm'].detach().cpu().numpy()) pred *= 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = (input[0].numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(target[0].numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.show_all_imgs(pause=True) bar.finish() return {'loss': Loss.avg, 'acc': Acc.avg, 'time': bar.elapsed_td.total_seconds() / 60.}, preds
def step(split, epoch, opt, data_loader, model, optimizer=None): if split == 'train': model.train() else: model.eval() crit = torch.nn.MSELoss() crit_3d = FusionLoss(opt.device, opt.weight_3d, opt.weight_var) acc_idxs = data_loader.dataset.acc_idxs edges = data_loader.dataset.edges edges_3d = data_loader.dataset.edges_3d shuffle_ref = data_loader.dataset.shuffle_ref mean = data_loader.dataset.mean std = data_loader.dataset.std convert_eval_format = data_loader.dataset.convert_eval_format Loss, Loss3D = AverageMeter(), AverageMeter() Acc, MPJPE = AverageMeter(), AverageMeter() data_time, batch_time = AverageMeter(), AverageMeter() preds = [] time_str = '' nIters = len(data_loader) bar = Bar('{}'.format(opt.exp_id), max=nIters) end = time.time() for i, batch in enumerate(data_loader): data_time.update(time.time() - end) for k in batch: if k != 'meta': batch[k] = batch[k].cuda(device=opt.device, non_blocking=True) gt_2d = batch['meta']['pts_crop'].cuda( device=opt.device, non_blocking=True).float() / opt.output_h output = model(batch['input']) loss = crit(output[-1]['hm'], batch['target']) loss_3d = crit_3d( output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], batch['reg_target'],gt_2d) for k in range(opt.num_stacks - 1): loss += crit(output[k], batch['target']) loss_3d = crit_3d( output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], batch['reg_target'], gt_2d) loss += loss_3d if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() else: input_ = batch['input'].cpu().numpy().copy() input_[0] = flip(input_[0]).copy()[np.newaxis, ...] input_flip_var = torch.from_numpy(input_).cuda( device=opt.device, non_blocking=True) output_flip_ = model(input_flip_var) output_flip = shuffle_lr( flip(output_flip_[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref) output_flip = output_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) output_depth_flip = shuffle_lr( flip(output_flip_[-1]['depth'].detach().cpu().numpy()[0]), shuffle_ref) output_depth_flip = output_depth_flip.reshape( 1, opt.num_output, opt.output_h, opt.output_w) output_flip = torch.from_numpy(output_flip).cuda( device=opt.device, non_blocking=True) output_depth_flip = torch.from_numpy(output_depth_flip).cuda( device=opt.device, non_blocking=True) output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2 output[-1]['depth'] = (output[-1]['depth'] + output_depth_flip) / 2 # pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) # preds.append(convert_eval_format(pred, conf, meta)[0]) Loss.update(loss.item(), batch['input'].size(0)) Loss3D.update(loss_3d.item(), batch['input'].size(0)) Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), batch['target'].detach().cpu().numpy(), acc_idxs)) mpeje_batch, mpjpe_cnt = mpjpe(output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy(), batch['meta']['gt_3d'].detach().numpy(), convert_func=convert_eval_format) MPJPE.update(mpeje_batch, mpjpe_cnt) batch_time.update(time.time() - end) end = time.time() if not opt.hide_data_time: time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \ ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\ '|Loss {loss.avg:.5f} |Loss3D {loss_3d.avg:.5f}'\ '|Acc {Acc.avg:.4f} |MPJPE {MPJPE.avg:.2f}'\ '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split=split, time_str=time_str, MPJPE=MPJPE, loss_3d=Loss3D) if opt.print_iter > 0: if i % opt.print_iter == 0: print('{}| {}'.format(opt.exp_id, Bar.suffix)) else: bar.next() if opt.debug >= 2: gt = get_preds(batch['target'].cpu().numpy()) * 4 pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) * 4 debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges) img = ( batch['input'][0].cpu().numpy().transpose(1, 2, 0) * std + mean) * 256 img = img.astype(np.uint8).copy() debugger.add_img(img) debugger.add_mask( cv2.resize(batch['target'][0].cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'target') debugger.add_mask( cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), (opt.input_w, opt.input_h)), img, 'pred') debugger.add_point_2d(gt[0], (0, 0, 255)) debugger.add_point_2d(pred[0], (255, 0, 0)) debugger.add_point_3d( batch['meta']['gt_3d'].detach().numpy()[0], 'r', edges=edges_3d) pred_3d = get_preds_3d(output[-1]['hm'].detach().cpu().numpy(), output[-1]['depth'].detach().cpu().numpy()) debugger.add_point_3d(convert_eval_format(pred_3d[0]), 'b',edges=edges_3d) debugger.show_all_imgs(pause=False) debugger.show_3d() bar.finish() return {'loss': Loss.avg, 'acc': Acc.avg, 'mpjpe': MPJPE.avg, 'time': bar.elapsed_td.total_seconds() / 60.}, preds
def train(self, cfg): # 设置gpu环境,考虑单卡多卡情况 gpus_str = '' if isinstance(cfg.gpus, (list, tuple)): cfg.gpus = [int(i) for i in cfg.gpus] for s in cfg.gpus: gpus_str += str(s) + ',' gpus_str = gpus_str[:-1] else: gpus_str = str(int(cfg.gpus)) cfg.gpus = [int(cfg.gpus)] os.environ['CUDA_VISIBLE_DEVICES'] = gpus_str cfg.gpus = [i for i in range(len(cfg.gpus)) ] if cfg.gpus[0] >= 0 else [-1] # 设置log model_dir = os.path.join(cfg.save_dir, cfg.id) debug_dir = os.path.join(model_dir, 'debug') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(debug_dir): os.makedirs(debug_dir) logger = setup_logger(cfg.id, os.path.join(model_dir, 'log')) if USE_TENSORBOARD: writer = tensorboardX.SummaryWriter( log_dir=os.path.join(model_dir, 'log')) logger.info(cfg) gpus = cfg.gpus device = torch.device('cpu' if gpus[0] < 0 else 'cuda') lr = cfg.lr lr_step = cfg.lr_step num_epochs = cfg.num_epochs val_step = cfg.val_step sample_size = cfg.sample_size # 设置数据集 dataset = YOLO(cfg.data_dir, cfg.hflip, cfg.vflip, cfg.rotation, cfg.scale, cfg.shear, opt=cfg, split='train') names = dataset.class_name std = dataset.std mean = dataset.mean # 用数据集类别数设置预测网络 cfg.setup_head(dataset) trainloader = DataLoader(dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.num_workers, pin_memory=True, drop_last=True) # val_dataset = YOLO(cfg.data_dir, cfg.hflip, cfg.vflip, cfg.rotation, cfg.scale, cfg.shear, opt=cfg, split='val') # valloader = DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=1, pin_memory=True) valid_file = cfg.val_dir if not cfg.val_dir == '' else os.path.join( cfg.data_dir, 'valid.txt') with open(valid_file, 'r') as f: val_list = [l.rstrip() for l in f.readlines()] net = create_model(cfg.arch, cfg.heads, cfg.head_conv, cfg.down_ratio, cfg.filters) optimizer = optim.Adam(net.parameters(), lr=lr) start_epoch = 0 if cfg.resume: pretrain = os.path.join(model_dir, 'model_last.pth') if os.path.exists(pretrain): print('resume model from %s' % pretrain) try: net, optimizer, start_epoch = load_model( net, pretrain, optimizer, True, lr, lr_step) except: print('\t... loading model error: ckpt may not compatible') model = ModleWithLoss(net, CtdetLoss(cfg)) if len(gpus) > 1: model = nn.DataParallel(model, device_ids=gpus).to(device) else: model = model.to(device) step = 0 best = 1e10 log_loss_stats = ['loss', 'hm_loss', 'wh_loss'] if cfg.reg_offset: log_loss_stats += ['off_loss'] if cfg.reg_obj: log_loss_stats += ['obj_loss'] for epoch in range(start_epoch + 1, num_epochs + 1): avg_loss_stats = {l: AverageMeter() for l in log_loss_stats} model.train() with tqdm(trainloader) as loader: for _, batch in enumerate(loader): for k in batch: if k != 'meta': batch[k] = batch[k].to(device=device, non_blocking=True) output, loss, loss_stats = model(batch) loss = loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() # 设置tqdm显示信息 lr = optimizer.param_groups[0]['lr'] poststr = '' for l in avg_loss_stats: avg_loss_stats[l].update(loss_stats[l].mean().item(), batch['input'].size(0)) poststr += '{}: {:.4f}; '.format( l, avg_loss_stats[l].avg) loader.set_description('Epoch %d' % (epoch)) poststr += 'lr: {:.4f}'.format(lr) loader.set_postfix_str(poststr) step += 1 # self.lossSignal.emit(loss.item(), step) del output, loss, loss_stats # valid if step % val_step == 0: if len(cfg.gpus) > 1: val_model = model.module else: val_model = model val_model.eval() torch.cuda.empty_cache() # 随机采样 idx = np.arange(len(val_list)) idx = np.random.permutation(idx)[:sample_size] for j, id in enumerate(idx): image = cv2.imread(val_list[id]) image = self.preprocess(image, cfg.input_h, cfg.input_w, mean, std) image = image.to(device) with torch.no_grad(): output = val_model.model(image)[-1] # 画图并保存 debugger = Debugger(dataset=names, down_ratio=cfg.down_ratio) reg = output['reg'] if cfg.reg_offset else None obj = output['obj'] if cfg.reg_obj else None dets = ctdet_decode(output['hm'].sigmoid_(), output['wh'], reg=reg, obj=obj, cat_spec_wh=cfg.cat_spec_wh, K=cfg.K) dets = dets.detach().cpu().numpy().reshape( -1, dets.shape[2]) dets[:, :4] *= cfg.down_ratio image = image[0].detach().cpu().numpy().transpose( 1, 2, 0) image = np.clip(((image * std + mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][0].detach().cpu().numpy()) debugger.add_blend_img(image, pred, 'pred_hm') debugger.add_img(image, img_id='out_pred') for k in range(len(dets)): if dets[k, 4] > cfg.vis_thresh: debugger.add_coco_bbox(dets[k, :4], dets[k, -1], dets[k, 4], img_id='out_pred') debugger.save_all_imgs(debug_dir, prefix='{}.{}_'.format( step, j)) del output, image, dets # 保存模型参数 save_model(os.path.join(model_dir, 'model_best.pth'), epoch, net) model.train() logstr = 'epoch {}'.format(epoch) for k, v in avg_loss_stats.items(): logstr += ' {}: {:.4f};'.format(k, v.avg) if USE_TENSORBOARD: writer.add_scalar('train_{}'.format(k), v.avg, epoch) logger.info(logstr) # if epoch % val_step == 0: # if len(cfg.gpus) > 1: # val_model = model.module # else: # val_model = model # val_model.eval() # torch.cuda.empty_cache() # # val_loss_stats = {l: AverageMeter() for l in log_loss_stats} # # with tqdm(valloader) as loader: # for j, batch in enumerate(loader): # for k in batch: # if k != 'meta': # batch[k] = batch[k].to(device=device, non_blocking=True) # with torch.no_grad(): # output, loss, loss_stats = val_model(batch) # # poststr = '' # for l in val_loss_stats: # val_loss_stats[l].update( # loss_stats[l].mean().item(), batch['input'].size(0)) # poststr += '{}: {:.4f}; '.format(l, val_loss_stats[l].avg) # loader.set_description('Epoch %d valid' % (epoch)) # poststr += 'lr: {:.4f}'.format(lr) # loader.set_postfix_str(poststr) # # if j < sample_size: # # 将预测结果画出来保存成jpg图片 # debugger = Debugger(dataset=names, down_ratio=cfg.down_ratio) # reg = output['reg'] if cfg.reg_offset else None # obj = output['obj'] if cfg.reg_obj else None # dets = ctdet_decode( # output['hm'], output['wh'], reg=reg, obj=obj, # cat_spec_wh=cfg.cat_spec_wh, K=cfg.K) # dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) # dets[:, :, :4] *= cfg.down_ratio # dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) # dets_gt[:, :, :4] *= cfg.down_ratio # for i in range(1): # img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) # img = np.clip(((img * std + mean) * 255.), 0, 255).astype(np.uint8) # pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) # gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) # debugger.add_blend_img(img, pred, 'pred_hm') # debugger.add_blend_img(img, gt, 'gt_hm') # debugger.add_img(img, img_id='out_pred') # for k in range(len(dets[i])): # if dets[i, k, 4] > cfg.vis_thresh: # debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], # dets[i, k, 4], img_id='out_pred') # # debugger.add_img(img, img_id='out_gt') # for k in range(len(dets_gt[i])): # if dets_gt[i, k, 4] > cfg.vis_thresh: # debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], # dets_gt[i, k, 4], img_id='out_gt') # # debugger.save_all_imgs(debug_dir, prefix='{}.{}_'.format(epoch, j)) # del output, loss, loss_stats # model.train() # logstr = 'epoch {} valid'.format(epoch) # for k, v in val_loss_stats.items(): # logstr += ' {}: {:.4f};'.format(k, v.avg) # if USE_TENSORBOARD: # writer.add_scalar('val_{}'.format(k), v.avg, epoch) # logger.info(logstr) # if val_loss_stats['loss'].avg < best: # best = val_loss_stats['loss'].avg # save_model(os.path.join(model_dir, 'model_best.pth'), epoch, net) save_model(os.path.join(model_dir, 'model_last.pth'), epoch, net, optimizer) if epoch in cfg.lr_step: save_model( os.path.join(model_dir, 'model_{}.pth'.format(epoch)), epoch, net, optimizer) lr = cfg.lr * (0.1**(cfg.lr_step.index(epoch) + 1)) logger.info('Drop LR to {}'.format(lr)) for param_group in optimizer.param_groups: param_group['lr'] = lr
def test_loader(cfg): debugger = Debugger((cfg.DEBUG == 3), theme=cfg.DEBUG_THEME, num_classes=cfg.MODEL.NUM_CLASSES, dataset=cfg.SAMPLE_METHOD, down_ratio=cfg.MODEL.DOWN_RATIO) Dataset = get_dataset(cfg.SAMPLE_METHOD, cfg.TASK) val_dataset = Dataset(cfg, 'val') val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) for i, batch_data in enumerate(val_loader): input_image = batch_data['input'] heat = batch_data['hm'] reg = batch_data['reg'] reg_mask = batch_data['reg_mask'] ind = batch_data['ind'] wh = batch_data['wh'] kps = batch_data['hps'] hps_mask = batch_data['hps_mask'] seg_feat = batch_data['seg'] hm_hp = batch_data['hm_hp'] hp_offset = batch_data['hp_offset'] hp_ind = batch_data['hp_ind'] hp_mask = batch_data['hp_mask'] meta = batch_data['meta'] for k, v in batch_data.items(): if type(v) == type(dict()): for k1, v1 in v.items(): print(k1) print(v1) else: print(k) print(v.shape) print(input_image.shape) print(hm_hp.shape) #handle image input_image = input_image[0].numpy().transpose(1, 2, 0) input_image = (input_image * STD) + MEAN input_image = input_image * 255 input_image = input_image.astype(np.uint8) heat = heat.sigmoid_() hm_hp = hm_hp.sigmoid_() num_joints = 17 K = cfg.TEST.TOPK # perform nms on heatmaps batch, cat, height, width = heat.size() heat = _nms(heat) scores, inds, clses, ys, xs = _topk(heat, K=K) kps = kps.view(batch, K, num_joints * 2) kps[..., ::2] += xs.view(batch, K, 1).expand(batch, K, num_joints) kps[..., 1::2] += ys.view(batch, K, 1).expand(batch, K, num_joints) xs = xs.view(batch, K, 1) + reg[:, :, 0:1] ys = ys.view(batch, K, 1) + reg[:, :, 1:2] wh = wh.view(batch, K, 2) #weight = _transpose_and_gather_feat(seg, inds) ## you can write (if weight.size(1)!=seg_feat.size(1): 3x3conv else 1x1conv ) here to select seg conv. ## for 3x3 #weight = weight.view([weight.size(1), -1, 3, 3]) pred_seg = seg_feat clses = clses.view(batch, K, 1).float() scores = scores.view(batch, K, 1) bboxes = torch.cat([ xs - wh[..., 0:1] / 2, ys - wh[..., 1:2] / 2, xs + wh[..., 0:1] / 2, ys + wh[..., 1:2] / 2 ], dim=2) if hm_hp is not None: hm_hp = _nms(hm_hp) thresh = 0.1 kps = kps.view(batch, K, num_joints, 2).permute(0, 2, 1, 3).contiguous() # b x J x K x 2 reg_kps = kps.unsqueeze(3).expand(batch, num_joints, K, K, 2) hm_score, hm_inds, hm_ys, hm_xs = _topk_channel(hm_hp, K=K) # b x J x K hp_offset = hp_offset.view(batch, num_joints, K, 2) hm_xs = hm_xs + hp_offset[:, :, :, 0] hm_ys = hm_ys + hp_offset[:, :, :, 1] mask = (hm_score > thresh).float() hm_score = (1 - mask) * -1 + mask * hm_score hm_ys = (1 - mask) * (-10000) + mask * hm_ys hm_xs = (1 - mask) * (-10000) + mask * hm_xs hm_kps = torch.stack([hm_xs, hm_ys], dim=-1).unsqueeze(2).expand( batch, num_joints, K, K, 2) dist = (((reg_kps - hm_kps)**2).sum(dim=4)**0.5) min_dist, min_ind = dist.min(dim=3) # b x J x K hm_score = hm_score.gather(2, min_ind).unsqueeze(-1) # b x J x K x 1 min_dist = min_dist.unsqueeze(-1) min_ind = min_ind.view(batch, num_joints, K, 1, 1).expand(batch, num_joints, K, 1, 2) hm_kps = hm_kps.gather(3, min_ind) hm_kps = hm_kps.view(batch, num_joints, K, 2) l = bboxes[:, :, 0].view(batch, 1, K, 1).expand(batch, num_joints, K, 1) t = bboxes[:, :, 1].view(batch, 1, K, 1).expand(batch, num_joints, K, 1) r = bboxes[:, :, 2].view(batch, 1, K, 1).expand(batch, num_joints, K, 1) b = bboxes[:, :, 3].view(batch, 1, K, 1).expand(batch, num_joints, K, 1) mask = (hm_kps[..., 0:1] < l) + (hm_kps[..., 0:1] > r) + \ (hm_kps[..., 1:2] < t) + (hm_kps[..., 1:2] > b) + \ (hm_score < thresh) + (min_dist > (torch.max(b - t, r - l) * 0.3)) mask = (mask > 0).float().expand(batch, num_joints, K, 2) kps = (1 - mask) * hm_kps + mask * kps kps = kps.permute(0, 2, 1, 3).contiguous().view(batch, K, num_joints * 2) dets = torch.cat([ bboxes, scores, kps, torch.transpose(hm_score.squeeze(dim=3), 1, 2) ], dim=2) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets, inds = whole_body_post_process(dets.copy(), [meta['c'].numpy()], [meta['s'].numpy()], 128, 128, 1) for j in range(1, cfg.MODEL.NUM_CLASSES + 1): dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 56) dets[0][j][:, :4] /= 1. dets[0][j][:, 5:39] /= 1. print(pred_seg.shape) seg = pred_seg[0] trans = get_affine_transform(meta['c'], meta['s'], 0, (meta['out_width'], meta['out_height']), inv=1) debugger.add_img(image, img_id='multi_pose') for j in range(1, self.num_classes + 1): for b_id, detection in enumerate(results[j]): bbox = detection[:4] bbox_prob = detection[4] keypoints = detection[5:39] keypoints_prob = detection[39:] if bbox_prob > self.cfg.TEST.VIS_THRESH: debugger.add_coco_bbox(bbox, 0, bbox_prob, img_id='multi_pose') segment = seg[b_id].detach().cpu().numpy() segment = cv2.warpAffine(segment, trans, (image.shape[1], image.shape[0]), flags=cv2.INTER_CUBIC) w, h = bbox[2:4] - bbox[:2] ct = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) segment_mask = np.zeros_like(segment) pad_rate = 0.3 x, y = np.clip([ct[0] - (1 + pad_rate) * w / 2, ct[0] + (1 + pad_rate) * w / 2], 0, segment.shape[1] - 1).astype(np.int), \ np.clip([ct[1] - (1 + pad_rate) * h / 2, ct[1] + (1 + pad_rate) * h / 2], 0, segment.shape[0] - 1).astype(np.int) segment_mask[y[0]:y[1], x[0]:x[1]] = 1 segment = segment_mask * segment debugger.add_coco_seg(segment, img_id='multi_pose') debugger.add_coco_hp(keypoints, keypoints_prob, img_id='multi_pose') debugger.show_all_imgs(pause=self.pause) save_path = os.path.join(SAVE_DIR, '{}.png'.format(i)) cv2.imwrite(save_path, input_image)
def error_bound_saliency(opt, img_id, loc=None, error_bound=0.1): os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str Dataset = dataset_factory[opt.dataset] opt = opts().update_dataset_info_and_set_heads(opt, Dataset) Detector = detector_factory[opt.task] ### simply run the detector and save the objectness heat map and the detection results split = 'val' if not opt.trainval else 'test' dataset = Dataset(opt, split) detector = Detector(opt) # use the FeatureExtractor to regester the hook to get activation value # to find the name of target_layers, see the model.named_modules() feature_extractor = FeatureExtractor(detector.model, target_layers='hm') detector.model = feature_extractor feature_extractor.eval() img_info = dataset.coco.loadImgs(ids=[img_id])[0] img_path = os.path.join(dataset.img_dir, img_info['file_name']) detector.run(img_path) ### get saliency mask ### Not due to the input image is usually resized and padding, we get the mask on the resized image ### for error, we use L1 loss. ## gradually increase the rect center on the image coor untile the error is lower the boundry debug_dir = detector.opt.debug_dir scale = 1.0 debugger = Debugger(dataset=detector.opt.dataset, ipynb=(detector.opt.debug == 3), theme=detector.opt.debugger_theme) image_org = cv2.imread(img_path) image, meta, resized_img = pre_process(detector, image_org, scale, mask=None, return_resized_img=True) _, _, h, w = image.size() down_sample_rate = h / feature_extractor.target_val.size(2) # get the loc[center_h,center_w] on the resized image and corresponding [fh,fw] on feature map if loc is None: # if loc [center_h,center_w] is not specified, use the location of the max value ind = torch.argmax(feature_extractor.target_val[0].sum(dim=0)) fh = ind // feature_extractor.target_val.size(3) fw = ind % feature_extractor.target_val.size(3) center_h = fh * down_sample_rate center_w = fw * down_sample_rate val = feature_extractor.target_val[0, :, fh, fw] print([center_h, center_w]) else: center_h, center_w = loc fh = int(center_h / down_sample_rate) fw = int(center_w / down_sample_rate) val = feature_extractor.target_val[0, :, fh, fw] loss_fn = lambda x: torch.mean(torch.pow((x - val), 2)) area_increment = np.prod(image.size()) / 1000.0 area = 0 ratio = 1.0 # w/h = 1.0 increased rect ratio error = 1e10 mask = np.zeros([h, w]) # [H,W] while (error > error_bound): print("it:{} error:{}".format(area // area_increment, error)) area += area_increment bh = np.sqrt(area / ratio) bw = area / bh mask = np.zeros([h, w]) hmin, hmax = max(int(center_h - bh / 2), 0), min(int(center_h + bh / 2) + 1, h - 1) wmin, wmax = max(int(center_w - bw / 2), 0), min(int(center_w + bw / 2) + 1, w - 1) mask[hmin:hmax, wmin:wmax] = 1 image_masked, _ = pre_process(detector, image_org, 1.0, mask) image_masked = image_masked.to(opt.device) with torch.no_grad(): feature_extractor(image_masked) error = loss_fn(feature_extractor.target_val[0, :, fh, fw]) print("it:{} error:{}".format(area // area_increment, error)) # draw the rect mask on resized_image and save rect_mask_img_save_name = 'rect_mask_{:.1f}'.format(scale) debugger.add_blend_img(resized_img, debugger.gen_colormap(mask[np.newaxis, :, :]), rect_mask_img_save_name) kernel_hmin, kernel_hmax = max( int(center_h - down_sample_rate / 2), 0), min(int(center_h + down_sample_rate / 2) + 1, h - 1) kernel_wmin, kernel_wmax = max( int(center_w - down_sample_rate / 2), 0), min(int(center_w + down_sample_rate / 2) + 1, w - 1) debugger.imgs[rect_mask_img_save_name][kernel_hmin:kernel_hmax, kernel_wmin:kernel_wmax] = [ 255, 0, 0 ] # green ## get saliency superpixel rect_img = resized_img[hmin:hmax, wmin:wmax] segments = slic(rect_img, n_segments=30) #[hmin:hmax, wmin:wmax] un_removed_superpixel = list(np.unique(segments)) rect_segment_mask = np.ones_like(segments) while (error < error_bound): # find superpixel whose removement leads to lowest error lowest_error = 1e10 lowest_error_ind = -1 for i in un_removed_superpixel: mask = np.zeros([h, w]) mask[hmin:hmax, wmin:wmax] = rect_segment_mask * (segments != i) image_masked, _ = pre_process(detector, image_org, 1.0, mask) image_masked = image_masked.to(opt.device) with torch.no_grad(): feature_extractor(image_masked) cur_error = loss_fn(feature_extractor.target_val[0, :, fh, fw]) if cur_error < lowest_error: lowest_error = cur_error lowest_error_ind = i if not lowest_error < error_bound: break else: un_removed_superpixel.remove(lowest_error_ind) error = lowest_error rect_segment_mask = rect_segment_mask * (segments != lowest_error_ind) print("error={} remaining super pixel:{}".format( error, len(un_removed_superpixel))) # draw the segmentation saliency mask on resized_image and save mask = np.zeros([h, w]) mask[hmin:hmax, wmin:wmax] = rect_segment_mask inp_image = resized_img * mask[:, :, np.newaxis].astype(np.uint8) debugger.add_img(inp_image, 'masked_img') mask_img_save_name = 'mask_{:.1f}'.format(scale) debugger.add_blend_img(resized_img, debugger.gen_colormap(mask[np.newaxis, :, :]), mask_img_save_name) debugger.imgs[mask_img_save_name][kernel_hmin:kernel_hmax, kernel_wmin:kernel_wmax] = [255, 0, 0] # blue debugger.save_all_imgs(debug_dir, prefix='{}'.format(opt.img_id)) opt.prefix = '{}masked'.format(opt.img_id) detector.run(inp_image) return