def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None dets = ctdet_decode( output['hm'], output['wh'], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger( dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme, class_names=opt.class_names) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip((( img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs( opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def _debug(image, t_heat, l_heat, b_heat, r_heat, ct_heat): debugger = Debugger(num_classes=1) k = 0 t_heat = torch.sigmoid(t_heat) l_heat = torch.sigmoid(l_heat) b_heat = torch.sigmoid(b_heat) r_heat = torch.sigmoid(r_heat) aggr_weight = 0.1 t_heat = _h_aggregate(t_heat, aggr_weight=aggr_weight) l_heat = _v_aggregate(l_heat, aggr_weight=aggr_weight) b_heat = _h_aggregate(b_heat, aggr_weight=aggr_weight) r_heat = _v_aggregate(r_heat, aggr_weight=aggr_weight) t_heat[t_heat > 1] = 1 l_heat[l_heat > 1] = 1 b_heat[b_heat > 1] = 1 r_heat[r_heat > 1] = 1 ct_heat = torch.sigmoid(ct_heat) t_hm = debugger.gen_colormap(t_heat[k].cpu().data.numpy()) l_hm = debugger.gen_colormap(l_heat[k].cpu().data.numpy()) b_hm = debugger.gen_colormap(b_heat[k].cpu().data.numpy()) r_hm = debugger.gen_colormap(r_heat[k].cpu().data.numpy()) ct_hm = debugger.gen_colormap(ct_heat[k].cpu().data.numpy()) hms = np.maximum(np.maximum(t_hm, l_hm), np.maximum(b_hm, r_hm)) if image is not None: mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32).reshape(3, 1, 1) std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32).reshape(3, 1, 1) img = (image[k].cpu().data.numpy() * std + mean) * 255 img = img.astype(np.uint8).transpose(1, 2, 0) debugger.add_img(img, 'img') debugger.add_blend_img(img, t_hm, 't_hm') debugger.add_blend_img(img, l_hm, 'l_hm') debugger.add_blend_img(img, b_hm, 'b_hm') debugger.add_blend_img(img, r_hm, 'r_hm') debugger.add_blend_img(img, hms, 'extreme') debugger.add_blend_img(img, ct_hm, 'center') debugger.show_all_imgs(pause=False)
def debug_for_polygon(self, batch, output, iter_id): opt = self.opt output = output[0] batch = batch[0] for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') debugger.add_img(img, img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt detections = self.decode(output['hm_t'], output['hm_l'], output['hm_b'], output['hm_r'], output['hm_c']).detach().cpu().numpy() detections[:, :, :4] *= opt.input_res / opt.output_res for i in range(1): dataset = opt.dataset if opt.dataset == 'yolo': dataset = opt.names debugger = Debugger(dataset=dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) pred_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8) gt_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = ((img * self.opt.std + self.opt.mean) * 255.).astype( np.uint8) for p in self.parts: tag = 'hm_{}'.format(p) pred = debugger.gen_colormap( output[tag][i].detach().cpu().numpy()) gt = debugger.gen_colormap( batch[tag][i].detach().cpu().numpy()) if p != 'c': pred_hm = np.maximum(pred_hm, pred) gt_hm = np.maximum(gt_hm, gt) if p == 'c' or opt.debug > 2: debugger.add_blend_img(img, pred, 'pred_{}'.format(p)) debugger.add_blend_img(img, gt, 'gt_{}'.format(p)) debugger.add_blend_img(img, pred_hm, 'pred') debugger.add_blend_img(img, gt_hm, 'gt') debugger.add_img(img, img_id='out') for k in range(len(detections[i])): if detections[i, k, 4] > 0.1: debugger.add_coco_bbox(detections[i, k, :4], detections[i, k, -1], detections[i, k, 4], img_id='out') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None dets = ctdet_decode(output['hm'], output['wh'], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.down_ratio if opt.task == 'ctdet_semseg': seg_gt = batch['seg'][0][0].cpu().numpy() seg_pred = output['seg'].max(1)[1].squeeze_(1).squeeze_( 0).cpu().numpy() for i in range(1): debugger = Debugger(opt, dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.vis_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.vis_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.save_video and opt.debug <= 1: # only save the predicted and gt images return debugger.imgs['out_pred'], debugger.imgs[ 'out_gt'] # , debugger.imgs['pred_hm'], debugger.imgs['gt_hm'] pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') if opt.task == 'ctdet_semseg': debugger.visualize_masks(seg_gt, img_id='out_mask_gt') debugger.visualize_masks(seg_pred, img_id='out_mask_pred') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix=iter_id) import pdb pdb.set_trace() if opt.save_video: return debugger.imgs['out_pred'], debugger.imgs['out_gt']
def debug(self, batch, output, iter_id, dataset): opt = self.opt if 'pre_hm' in batch: output.update({'pre_hm': batch['pre_hm']}) dets = fusion_decode(output, K=opt.K, opt=opt) for k in dets: dets[k] = dets[k].detach().cpu().numpy() dets_gt = batch['meta']['gt_det'] for i in range(1): debugger = Debugger(opt=opt, dataset=dataset) img = batch['image'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * dataset.std + dataset.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm', trans=self.opt.hm_transparency) debugger.add_blend_img(img, gt, 'gt_hm', trans=self.opt.hm_transparency) debugger.add_img(img, img_id='img') # show point clouds if opt.pointcloud: pc_2d = batch['pc_2d'][i].detach().cpu().numpy() pc_3d = None pc_N = batch['pc_N'][i].detach().cpu().numpy() debugger.add_img(img, img_id='pc') debugger.add_pointcloud(pc_2d, pc_N, img_id='pc') if 'pc_hm' in opt.pc_feat_lvl: channel = opt.pc_feat_channels['pc_hm'] pc_hm = debugger.gen_colormap( batch['pc_hm'][i][channel].unsqueeze( 0).detach().cpu().numpy()) debugger.add_blend_img(img, pc_hm, 'pc_hm', trans=self.opt.hm_transparency) if 'pc_dep' in opt.pc_feat_lvl: channel = opt.pc_feat_channels['pc_dep'] pc_hm = batch['pc_hm'][i][channel].unsqueeze( 0).detach().cpu().numpy() pc_dep = debugger.add_overlay_img(img, pc_hm, 'pc_dep') if 'pre_img' in batch: pre_img = batch['pre_img'][i].detach().cpu().numpy().transpose( 1, 2, 0) pre_img = np.clip( ((pre_img * dataset.std + dataset.mean) * 255), 0, 255).astype(np.uint8) debugger.add_img(pre_img, 'pre_img_pred') debugger.add_img(pre_img, 'pre_img_gt') if 'pre_hm' in batch: pre_hm = debugger.gen_colormap( batch['pre_hm'][i].detach().cpu().numpy()) debugger.add_blend_img(pre_img, pre_hm, 'pre_hm', trans=self.opt.hm_transparency) debugger.add_img(img, img_id='out_pred') if 'ltrb_amodal' in opt.heads: debugger.add_img(img, img_id='out_pred_amodal') debugger.add_img(img, img_id='out_gt_amodal') # Predictions for k in range(len(dets['scores'][i])): if dets['scores'][i, k] > opt.vis_thresh: debugger.add_coco_bbox(dets['bboxes'][i, k] * opt.down_ratio, dets['clses'][i, k], dets['scores'][i, k], img_id='out_pred') if 'ltrb_amodal' in opt.heads: debugger.add_coco_bbox(dets['bboxes_amodal'][i, k] * opt.down_ratio, dets['clses'][i, k], dets['scores'][i, k], img_id='out_pred_amodal') if 'hps' in opt.heads and int(dets['clses'][i, k]) == 0: debugger.add_coco_hp(dets['hps'][i, k] * opt.down_ratio, img_id='out_pred') if 'tracking' in opt.heads: debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio, dets['tracking'][i][k] * opt.down_ratio, img_id='out_pred') debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio, dets['tracking'][i][k] * opt.down_ratio, img_id='pre_img_pred') # Ground truth debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt['scores'][i])): if dets_gt['scores'][i][k] > opt.vis_thresh: if 'dep' in dets_gt.keys(): dist = dets_gt['dep'][i][k] if len(dist) > 1: dist = dist[0] else: dist = -1 debugger.add_coco_bbox(dets_gt['bboxes'][i][k] * opt.down_ratio, dets_gt['clses'][i][k], dets_gt['scores'][i][k], img_id='out_gt', dist=dist) if 'ltrb_amodal' in opt.heads: debugger.add_coco_bbox(dets_gt['bboxes_amodal'][i, k] * opt.down_ratio, dets_gt['clses'][i, k], dets_gt['scores'][i, k], img_id='out_gt_amodal') if 'hps' in opt.heads and \ (int(dets['clses'][i, k]) == 0): debugger.add_coco_hp(dets_gt['hps'][i][k] * opt.down_ratio, img_id='out_gt') if 'tracking' in opt.heads: debugger.add_arrow( dets_gt['cts'][i][k] * opt.down_ratio, dets_gt['tracking'][i][k] * opt.down_ratio, img_id='out_gt') debugger.add_arrow( dets_gt['cts'][i][k] * opt.down_ratio, dets_gt['tracking'][i][k] * opt.down_ratio, img_id='pre_img_gt') if 'hm_hp' in opt.heads: pred = debugger.gen_colormap_hp( output['hm_hp'][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp', trans=self.opt.hm_transparency) debugger.add_blend_img(img, gt, 'gt_hmhp', trans=self.opt.hm_transparency) if 'rot' in opt.heads and 'dim' in opt.heads and 'dep' in opt.heads: dets_gt = {k: dets_gt[k].cpu().numpy() for k in dets_gt} calib = batch['meta']['calib'].detach().numpy() \ if 'calib' in batch['meta'] else None det_pred = generic_post_process( opt, dets, batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], self.opt.num_classes, calib) det_gt = generic_post_process(opt, dets_gt, batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], self.opt.num_classes, calib, is_gt=True) debugger.add_3d_detection(batch['meta']['img_path'][i], batch['meta']['flipped'][i], det_pred[i], calib[i], vis_thresh=opt.vis_thresh, img_id='add_pred') debugger.add_3d_detection(batch['meta']['img_path'][i], batch['meta']['flipped'][i], det_gt[i], calib[i], vis_thresh=opt.vis_thresh, img_id='add_gt') pc_3d = None if opt.pointcloud: pc_3d = batch['pc_3d'].cpu().numpy() debugger.add_bird_views(det_pred[i], det_gt[i], vis_thresh=opt.vis_thresh, img_id='bird_pred_gt', pc_3d=pc_3d, show_velocity=opt.show_velocity) debugger.add_bird_views([], det_gt[i], vis_thresh=opt.vis_thresh, img_id='bird_gt', pc_3d=pc_3d, show_velocity=opt.show_velocity) if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id, dataset): opt = self.opt if 'pre_hm' in batch: output.update({'pre_hm': batch['pre_hm']}) dets = generic_decode(output, K=opt.K, opt=opt) for k in dets: dets[k] = dets[k].detach().cpu().numpy() dets_gt = batch['meta']['gt_det'] for i in range(1): debugger = Debugger(opt=opt, dataset=dataset) img = batch['image'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * dataset.std + dataset.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') if 'pre_img' in batch: pre_img = batch['pre_img'][i].detach().cpu().numpy().transpose( 1, 2, 0) pre_img = np.clip( ((pre_img * dataset.std + dataset.mean) * 255), 0, 255).astype(np.uint8) debugger.add_img(pre_img, 'pre_img_pred') debugger.add_img(pre_img, 'pre_img_gt') if 'pre_hm' in batch: pre_hm = debugger.gen_colormap( batch['pre_hm'][i].detach().cpu().numpy()) debugger.add_blend_img(pre_img, pre_hm, 'pre_hm') debugger.add_img(img, img_id='out_pred') if 'ltrb_amodal' in opt.heads: debugger.add_img(img, img_id='out_pred_amodal') debugger.add_img(img, img_id='out_gt_amodal') # Predictions for k in range(len(dets['scores'][i])): if dets['scores'][i, k] > opt.vis_thresh: debugger.add_coco_bbox(dets['bboxes'][i, k] * opt.down_ratio, dets['clses'][i, k], dets['scores'][i, k], img_id='out_pred') if 'ltrb_amodal' in opt.heads: debugger.add_coco_bbox(dets['bboxes_amodal'][i, k] * opt.down_ratio, dets['clses'][i, k], dets['scores'][i, k], img_id='out_pred_amodal') if 'hps' in opt.heads and int(dets['clses'][i, k]) == 0: debugger.add_coco_hp(dets['hps'][i, k] * opt.down_ratio, img_id='out_pred') if 'tracking' in opt.heads: debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio, dets['tracking'][i][k] * opt.down_ratio, img_id='out_pred') debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio, dets['tracking'][i][k] * opt.down_ratio, img_id='pre_img_pred') # Ground truth debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt['scores'][i])): if dets_gt['scores'][i][k] > opt.vis_thresh: debugger.add_coco_bbox(dets_gt['bboxes'][i][k] * opt.down_ratio, dets_gt['clses'][i][k], dets_gt['scores'][i][k], img_id='out_gt') if 'ltrb_amodal' in opt.heads: debugger.add_coco_bbox(dets_gt['bboxes_amodal'][i, k] * opt.down_ratio, dets_gt['clses'][i, k], dets_gt['scores'][i, k], img_id='out_gt_amodal') if 'hps' in opt.heads and \ (int(dets['clses'][i, k]) == 0): debugger.add_coco_hp(dets_gt['hps'][i][k] * opt.down_ratio, img_id='out_gt') if 'tracking' in opt.heads: debugger.add_arrow( dets_gt['cts'][i][k] * opt.down_ratio, dets_gt['tracking'][i][k] * opt.down_ratio, img_id='out_gt') debugger.add_arrow( dets_gt['cts'][i][k] * opt.down_ratio, dets_gt['tracking'][i][k] * opt.down_ratio, img_id='pre_img_gt') if 'hm_hp' in opt.heads: pred = debugger.gen_colormap_hp( output['hm_hp'][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') debugger.add_blend_img(img, gt, 'gt_hmhp') if 'rot' in opt.heads and 'dim' in opt.heads and 'dep' in opt.heads: dets_gt = {k: dets_gt[k].cpu().numpy() for k in dets_gt} calib = batch['meta']['calib'].detach().numpy() \ if 'calib' in batch['meta'] else None det_pred = generic_post_process( opt, dets, batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], self.opt.num_classes, calib) det_gt = generic_post_process(opt, dets_gt, batch['meta']['c'].cpu().numpy(), batch['meta']['s'].cpu().numpy(), output['hm'].shape[2], output['hm'].shape[3], self.opt.num_classes, calib) debugger.add_3d_detection(batch['meta']['img_path'][i], batch['meta']['flipped'][i], det_pred[i], calib[i], vis_thresh=opt.vis_thresh, img_id='add_pred') debugger.add_3d_detection(batch['meta']['img_path'][i], batch['meta']['flipped'][i], det_gt[i], calib[i], vis_thresh=opt.vis_thresh, img_id='add_gt') debugger.add_bird_views(det_pred[i], det_gt[i], vis_thresh=opt.vis_thresh, img_id='bird_pred_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def kp_detection(db, k_ind, data_aug, debug): data_rng = system_configs.data_rng batch_size = system_configs.batch_size categories = db.configs["categories"] input_size = db.configs["input_size"] output_size = db.configs["output_sizes"][0] border = db.configs["border"] lighting = db.configs["lighting"] rand_crop = db.configs["rand_crop"] rand_color = db.configs["rand_color"] rand_scales = db.configs["rand_scales"] gaussian_bump = db.configs["gaussian_bump"] gaussian_iou = db.configs["gaussian_iou"] gaussian_rad = db.configs["gaussian_radius"] max_tag_len = 128 # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) t_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) l_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) b_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) r_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) ct_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) t_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) l_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) b_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) r_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) t_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) l_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) b_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) r_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) tag_lens = np.zeros((batch_size, ), dtype=np.int32) db_size = db.db_inds.size for b_ind in range(batch_size): if not debug and k_ind == 0: db.shuffle_inds() db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading image image_file = db.image_file(db_ind) image = cv2.imread(image_file) # reading detections detections, extreme_pts = db.detections(db_ind) # cropping an image randomly if rand_crop: image, detections, extreme_pts = random_crop_pts(image, detections, extreme_pts, rand_scales, input_size, border=border) else: assert 0 # image, detections = _full_image_crop(image, detections) image, detections, extreme_pts = _resize_image_pts( image, detections, extreme_pts, input_size) detections, extreme_pts = _clip_detections_pts(image, detections, extreme_pts) width_ratio = output_size[1] / input_size[1] height_ratio = output_size[0] / input_size[0] # flipping an image randomly if np.random.uniform() > 0.5: image[:] = image[:, ::-1, :] width = image.shape[1] detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1 extreme_pts[:, :, 0] = width - extreme_pts[:, :, 0] - 1 extreme_pts[:, 1, :], extreme_pts[:, 3, :] = \ extreme_pts[:, 3, :].copy(), extreme_pts[:, 1, :].copy() image = image.astype(np.float32) / 255. if not debug: if rand_color: color_jittering_(data_rng, image) if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) normalize_(image, db.mean, db.std) images[b_ind] = image.transpose((2, 0, 1)) for ind, detection in enumerate(detections): category = int(detection[-1]) - 1 extreme_pt = extreme_pts[ind] xt, yt = extreme_pt[0, 0], extreme_pt[0, 1] xl, yl = extreme_pt[1, 0], extreme_pt[1, 1] xb, yb = extreme_pt[2, 0], extreme_pt[2, 1] xr, yr = extreme_pt[3, 0], extreme_pt[3, 1] xct = (xl + xr) / 2 yct = (yt + yb) / 2 fxt = (xt * width_ratio) fyt = (yt * height_ratio) fxl = (xl * width_ratio) fyl = (yl * height_ratio) fxb = (xb * width_ratio) fyb = (yb * height_ratio) fxr = (xr * width_ratio) fyr = (yr * height_ratio) fxct = (xct * width_ratio) fyct = (yct * height_ratio) xt = int(fxt) yt = int(fyt) xl = int(fxl) yl = int(fyl) xb = int(fxb) yb = int(fyb) xr = int(fxr) yr = int(fyr) xct = int(fxct) yct = int(fyct) if gaussian_bump: width = detection[2] - detection[0] height = detection[3] - detection[1] width = math.ceil(width * width_ratio) height = math.ceil(height * height_ratio) if gaussian_rad == -1: radius = gaussian_radius((height, width), gaussian_iou) radius = max(0, int(radius)) else: radius = gaussian_rad draw_gaussian(t_heatmaps[b_ind, category], [xt, yt], radius) draw_gaussian(l_heatmaps[b_ind, category], [xl, yl], radius) draw_gaussian(b_heatmaps[b_ind, category], [xb, yb], radius) draw_gaussian(r_heatmaps[b_ind, category], [xr, yr], radius) draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius) else: t_heatmaps[b_ind, category, yt, xt] = 1 l_heatmaps[b_ind, category, yl, xl] = 1 b_heatmaps[b_ind, category, yb, xb] = 1 r_heatmaps[b_ind, category, yr, xr] = 1 tag_ind = tag_lens[b_ind] t_regrs[b_ind, tag_ind, :] = [fxt - xt, fyt - yt] l_regrs[b_ind, tag_ind, :] = [fxl - xl, fyl - yl] b_regrs[b_ind, tag_ind, :] = [fxb - xb, fyb - yb] r_regrs[b_ind, tag_ind, :] = [fxr - xr, fyr - yr] t_tags[b_ind, tag_ind] = yt * output_size[1] + xt l_tags[b_ind, tag_ind] = yl * output_size[1] + xl b_tags[b_ind, tag_ind] = yb * output_size[1] + xb r_tags[b_ind, tag_ind] = yr * output_size[1] + xr ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct tag_lens[b_ind] += 1 for b_ind in range(batch_size): tag_len = tag_lens[b_ind] tag_masks[b_ind, :tag_len] = 1 if debug: debugger = Debugger(num_classes=80) t_hm = debugger.gen_colormap(t_heatmaps[0]) l_hm = debugger.gen_colormap(l_heatmaps[0]) b_hm = debugger.gen_colormap(b_heatmaps[0]) r_hm = debugger.gen_colormap(r_heatmaps[0]) ct_hm = debugger.gen_colormap(ct_heatmaps[0]) img = images[0] * db.std.reshape(3, 1, 1) + db.mean.reshape(3, 1, 1) img = (img * 255).astype(np.uint8).transpose(1, 2, 0) debugger.add_blend_img(img, t_hm, 't_hm') debugger.add_blend_img(img, l_hm, 'l_hm') debugger.add_blend_img(img, b_hm, 'b_hm') debugger.add_blend_img(img, r_hm, 'r_hm') debugger.add_blend_img( img, np.maximum(np.maximum(t_hm, l_hm), np.maximum(b_hm, r_hm)), 'extreme') debugger.add_blend_img(img, ct_hm, 'center') debugger.show_all_imgs(pause=True) images = torch.from_numpy(images) t_heatmaps = torch.from_numpy(t_heatmaps) l_heatmaps = torch.from_numpy(l_heatmaps) b_heatmaps = torch.from_numpy(b_heatmaps) r_heatmaps = torch.from_numpy(r_heatmaps) ct_heatmaps = torch.from_numpy(ct_heatmaps) t_regrs = torch.from_numpy(t_regrs) l_regrs = torch.from_numpy(l_regrs) b_regrs = torch.from_numpy(b_regrs) r_regrs = torch.from_numpy(r_regrs) t_tags = torch.from_numpy(t_tags) l_tags = torch.from_numpy(l_tags) b_tags = torch.from_numpy(b_tags) r_tags = torch.from_numpy(r_tags) ct_tags = torch.from_numpy(ct_tags) tag_masks = torch.from_numpy(tag_masks) return { "xs": [images, t_tags, l_tags, b_tags, r_tags, ct_tags], "ys": [ t_heatmaps, l_heatmaps, b_heatmaps, r_heatmaps, ct_heatmaps, tag_masks, t_regrs, l_regrs, b_regrs, r_regrs ] }, k_ind
def train(self, cfg): # 设置gpu环境,考虑单卡多卡情况 gpus_str = '' if isinstance(cfg.gpus, (list, tuple)): cfg.gpus = [int(i) for i in cfg.gpus] for s in cfg.gpus: gpus_str += str(s) + ',' gpus_str = gpus_str[:-1] else: gpus_str = str(int(cfg.gpus)) cfg.gpus = [int(cfg.gpus)] os.environ['CUDA_VISIBLE_DEVICES'] = gpus_str cfg.gpus = [i for i in range(len(cfg.gpus)) ] if cfg.gpus[0] >= 0 else [-1] # 设置log model_dir = os.path.join(cfg.save_dir, cfg.id) debug_dir = os.path.join(model_dir, 'debug') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(debug_dir): os.makedirs(debug_dir) logger = setup_logger(cfg.id, os.path.join(model_dir, 'log')) if USE_TENSORBOARD: writer = tensorboardX.SummaryWriter( log_dir=os.path.join(model_dir, 'log')) logger.info(cfg) gpus = cfg.gpus device = torch.device('cpu' if gpus[0] < 0 else 'cuda') lr = cfg.lr lr_step = cfg.lr_step num_epochs = cfg.num_epochs val_step = cfg.val_step sample_size = cfg.sample_size # 设置数据集 dataset = YOLO(cfg.data_dir, cfg.hflip, cfg.vflip, cfg.rotation, cfg.scale, cfg.shear, opt=cfg, split='train') names = dataset.class_name std = dataset.std mean = dataset.mean # 用数据集类别数设置预测网络 cfg.setup_head(dataset) trainloader = DataLoader(dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.num_workers, pin_memory=True, drop_last=True) # val_dataset = YOLO(cfg.data_dir, cfg.hflip, cfg.vflip, cfg.rotation, cfg.scale, cfg.shear, opt=cfg, split='val') # valloader = DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=1, pin_memory=True) valid_file = cfg.val_dir if not cfg.val_dir == '' else os.path.join( cfg.data_dir, 'valid.txt') with open(valid_file, 'r') as f: val_list = [l.rstrip() for l in f.readlines()] net = create_model(cfg.arch, cfg.heads, cfg.head_conv, cfg.down_ratio, cfg.filters) optimizer = optim.Adam(net.parameters(), lr=lr) start_epoch = 0 if cfg.resume: pretrain = os.path.join(model_dir, 'model_last.pth') if os.path.exists(pretrain): print('resume model from %s' % pretrain) try: net, optimizer, start_epoch = load_model( net, pretrain, optimizer, True, lr, lr_step) except: print('\t... loading model error: ckpt may not compatible') model = ModleWithLoss(net, CtdetLoss(cfg)) if len(gpus) > 1: model = nn.DataParallel(model, device_ids=gpus).to(device) else: model = model.to(device) step = 0 best = 1e10 log_loss_stats = ['loss', 'hm_loss', 'wh_loss'] if cfg.reg_offset: log_loss_stats += ['off_loss'] if cfg.reg_obj: log_loss_stats += ['obj_loss'] for epoch in range(start_epoch + 1, num_epochs + 1): avg_loss_stats = {l: AverageMeter() for l in log_loss_stats} model.train() with tqdm(trainloader) as loader: for _, batch in enumerate(loader): for k in batch: if k != 'meta': batch[k] = batch[k].to(device=device, non_blocking=True) output, loss, loss_stats = model(batch) loss = loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() # 设置tqdm显示信息 lr = optimizer.param_groups[0]['lr'] poststr = '' for l in avg_loss_stats: avg_loss_stats[l].update(loss_stats[l].mean().item(), batch['input'].size(0)) poststr += '{}: {:.4f}; '.format( l, avg_loss_stats[l].avg) loader.set_description('Epoch %d' % (epoch)) poststr += 'lr: {:.4f}'.format(lr) loader.set_postfix_str(poststr) step += 1 # self.lossSignal.emit(loss.item(), step) del output, loss, loss_stats # valid if step % val_step == 0: if len(cfg.gpus) > 1: val_model = model.module else: val_model = model val_model.eval() torch.cuda.empty_cache() # 随机采样 idx = np.arange(len(val_list)) idx = np.random.permutation(idx)[:sample_size] for j, id in enumerate(idx): image = cv2.imread(val_list[id]) image = self.preprocess(image, cfg.input_h, cfg.input_w, mean, std) image = image.to(device) with torch.no_grad(): output = val_model.model(image)[-1] # 画图并保存 debugger = Debugger(dataset=names, down_ratio=cfg.down_ratio) reg = output['reg'] if cfg.reg_offset else None obj = output['obj'] if cfg.reg_obj else None dets = ctdet_decode(output['hm'].sigmoid_(), output['wh'], reg=reg, obj=obj, cat_spec_wh=cfg.cat_spec_wh, K=cfg.K) dets = dets.detach().cpu().numpy().reshape( -1, dets.shape[2]) dets[:, :4] *= cfg.down_ratio image = image[0].detach().cpu().numpy().transpose( 1, 2, 0) image = np.clip(((image * std + mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][0].detach().cpu().numpy()) debugger.add_blend_img(image, pred, 'pred_hm') debugger.add_img(image, img_id='out_pred') for k in range(len(dets)): if dets[k, 4] > cfg.vis_thresh: debugger.add_coco_bbox(dets[k, :4], dets[k, -1], dets[k, 4], img_id='out_pred') debugger.save_all_imgs(debug_dir, prefix='{}.{}_'.format( step, j)) del output, image, dets # 保存模型参数 save_model(os.path.join(model_dir, 'model_best.pth'), epoch, net) model.train() logstr = 'epoch {}'.format(epoch) for k, v in avg_loss_stats.items(): logstr += ' {}: {:.4f};'.format(k, v.avg) if USE_TENSORBOARD: writer.add_scalar('train_{}'.format(k), v.avg, epoch) logger.info(logstr) # if epoch % val_step == 0: # if len(cfg.gpus) > 1: # val_model = model.module # else: # val_model = model # val_model.eval() # torch.cuda.empty_cache() # # val_loss_stats = {l: AverageMeter() for l in log_loss_stats} # # with tqdm(valloader) as loader: # for j, batch in enumerate(loader): # for k in batch: # if k != 'meta': # batch[k] = batch[k].to(device=device, non_blocking=True) # with torch.no_grad(): # output, loss, loss_stats = val_model(batch) # # poststr = '' # for l in val_loss_stats: # val_loss_stats[l].update( # loss_stats[l].mean().item(), batch['input'].size(0)) # poststr += '{}: {:.4f}; '.format(l, val_loss_stats[l].avg) # loader.set_description('Epoch %d valid' % (epoch)) # poststr += 'lr: {:.4f}'.format(lr) # loader.set_postfix_str(poststr) # # if j < sample_size: # # 将预测结果画出来保存成jpg图片 # debugger = Debugger(dataset=names, down_ratio=cfg.down_ratio) # reg = output['reg'] if cfg.reg_offset else None # obj = output['obj'] if cfg.reg_obj else None # dets = ctdet_decode( # output['hm'], output['wh'], reg=reg, obj=obj, # cat_spec_wh=cfg.cat_spec_wh, K=cfg.K) # dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) # dets[:, :, :4] *= cfg.down_ratio # dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) # dets_gt[:, :, :4] *= cfg.down_ratio # for i in range(1): # img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) # img = np.clip(((img * std + mean) * 255.), 0, 255).astype(np.uint8) # pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) # gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) # debugger.add_blend_img(img, pred, 'pred_hm') # debugger.add_blend_img(img, gt, 'gt_hm') # debugger.add_img(img, img_id='out_pred') # for k in range(len(dets[i])): # if dets[i, k, 4] > cfg.vis_thresh: # debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], # dets[i, k, 4], img_id='out_pred') # # debugger.add_img(img, img_id='out_gt') # for k in range(len(dets_gt[i])): # if dets_gt[i, k, 4] > cfg.vis_thresh: # debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], # dets_gt[i, k, 4], img_id='out_gt') # # debugger.save_all_imgs(debug_dir, prefix='{}.{}_'.format(epoch, j)) # del output, loss, loss_stats # model.train() # logstr = 'epoch {} valid'.format(epoch) # for k, v in val_loss_stats.items(): # logstr += ' {}: {:.4f};'.format(k, v.avg) # if USE_TENSORBOARD: # writer.add_scalar('val_{}'.format(k), v.avg, epoch) # logger.info(logstr) # if val_loss_stats['loss'].avg < best: # best = val_loss_stats['loss'].avg # save_model(os.path.join(model_dir, 'model_best.pth'), epoch, net) save_model(os.path.join(model_dir, 'model_last.pth'), epoch, net, optimizer) if epoch in cfg.lr_step: save_model( os.path.join(model_dir, 'model_{}.pth'.format(epoch)), epoch, net, optimizer) lr = cfg.lr * (0.1**(cfg.lr_step.index(epoch) + 1)) logger.info('Drop LR to {}'.format(lr)) for param_group in optimizer.param_groups: param_group['lr'] = lr
def debug(self, batch, output, iter_id): cfg = self.cfg reg = output[3] if cfg.LOSS.REG_OFFSET else None hm_hp = output[4] if cfg.LOSS.HM_HP else None hp_offset = output[5] if cfg.LOSS.REG_HP_OFFSET else None dets = multi_pose_decode(output[0], output[1], output[2], reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=cfg.TEST.TOPK) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES dets[:, :, 5:39] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES dets_gt[:, :, 5:39] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES for i in range(1): debugger = Debugger(dataset=cfg.SAMPLE_METHOD, ipynb=(cfg.DEBUG == 3), theme=cfg.DEBUG_THEME) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * np.array(cfg.DATASET.STD).reshape( 1, 1, 3).astype(np.float32) + cfg.DATASET.MEAN) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap(output[0][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > cfg.MODEL.CENTER_THRESH: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > cfg.MODEL.CENTER_THRESH: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt') if cfg.LOSS.HM_HP: pred = debugger.gen_colormap_hp( output[4][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') debugger.add_blend_img(img, gt, 'gt_hmhp') if cfg.DEBUG == 4: debugger.save_all_imgs(cfg.LOG_DIR, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None # print(output) dets = circledet_decode(output['hm'], output['cl'], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) # print(dets) if opt.filter_boarder: output_h = self.opt.default_resolution[ 0] // self.opt.down_ratio #hard coded output_w = self.opt.default_resolution[ 1] // self.opt.down_ratio #hard coded for i in range(dets.shape[1]): cp = [0, 0] cp[0] = dets[0, i, 0] cp[1] = dets[0, i, 1] cr = dets[0, i, 2] if cp[0] - cr < 0 or cp[0] + cr > output_w: dets[0, i, 3] = 0 continue if cp[1] - cr < 0 or cp[1] + cr > output_h: dets[0, i, 3] = 0 continue dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :3] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :3] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): # print('risk = %f' % dets[i, k, 3]) if dets[i, k, 3] > opt.center_thresh: debugger.add_coco_circle(dets[i, k, :3], dets[i, k, -1], dets[i, k, 3], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 3] > opt.center_thresh: debugger.add_coco_circle(dets_gt[i, k, :3], dets_gt[i, k, -1], dets_gt[i, k, 3], img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def detect(opt): os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str split = 'val' if not opt.trainval else 'test' dataset = YOLO(opt.data_dir, opt.flip, opt.vflip, opt.rotate, opt.scale, opt.shear, opt, split) opt = opts().update_dataset_info_and_set_heads(opt, dataset) print(opt) # log = Logger(opt) Detector = detector_factory[opt.task] detector = Detector(opt) debugger = Debugger(dataset=opt.names) dir_path = os.path.join(opt.save_dir, 'detect') if not os.path.exists(dir_path): os.mkdir(dir_path) images = [] if os.path.isfile(opt.image): if os.path.splitext(opt.image)[1] == '.txt': name = os.path.splitext(os.path.basename(opt.image))[0] dir_path = os.path.join(dir_path, name) if not os.path.exists(dir_path): os.mkdir(dir_path) with open(opt.image, 'r') as f: images.extend([l.rstrip().replace('.txt', '.jpg') for l in f.readlines()]) elif os.path.splitext(opt.image)[1] in ['.jpg', '.png', '.bmp']: images.append(opt.image) else: raise Exception('NOT SUPPORT FILE TYPE!!!') else: for file in os.listdir(opt.image): if os.path.splitext(file)[1] in ['.jpg', '.png', '.bmp']: images.append(os.path.join(opt.image, file)) num_iters = len(images) bar = Bar('{}'.format(opt.exp_id), max=num_iters) time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] avg_time_stats = {t: AverageMeter() for t in time_stats} for ind in range(num_iters): img_id = images[ind] ret = detector.run(img_id) Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td) for t in avg_time_stats: avg_time_stats[t].update(ret[t]) Bar.suffix = Bar.suffix + '|{} {tm.val:.3f}s ({tm.avg:.3f}s) '.format( t, tm=avg_time_stats[t]) bar.next() img_name = os.path.splitext(os.path.basename(img_id))[0] img = cv2.imread(img_id) h, w = img.shape[:2] pred = debugger.gen_colormap(ret['output']['hm'][0].detach().cpu().numpy()) debugger.add_blend_img(img, pred, img_name+'pred_hm') debugger.add_img(img, img_id=img_name) gt = np.loadtxt(img_id.replace('.jpg', '.txt')).reshape(-1, 5) if gt.size: x1 = w * (gt[:, 1] - gt[:, 3] / 2) y1 = h * (gt[:, 2] - gt[:, 4] / 2) x2 = w * (gt[:, 1] + gt[:, 3] / 2) y2 = h * (gt[:, 2] + gt[:, 4] / 2) gt[:, 1] = x1 gt[:, 2] = y1 gt[:, 3] = x2 gt[:, 4] = y2 for g in gt: debugger.add_gt_bbox(g, img_id=img_name) path = os.path.join(dir_path, os.path.basename(img_id).replace('.jpg', '.txt')) dets = np.zeros((0, 6), dtype=np.float32) for cls, det in ret['results'].items(): cls_id = np.ones((len(det), 1), dtype=np.float32) * (cls - 1) dets = np.append(dets, np.hstack((det, cls_id)), 0) for d in det: if d[-1] >= opt.vis_thresh: debugger.add_coco_bbox(d[:4], cls-1, d[-1], img_id=img_name) np.savetxt(path, dets) bar.finish() debugger.save_all_imgs(path=dir_path)
def debug(self, batch, output, iter_id): opt = self.opt # reg = output['reg'] if opt.reg_offset else None reg = output['reg'][0:1] if opt.reg_offset else None # dets = ctdet_decode( # output['hm'], output['wh'], reg=reg, # cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = ctdet_decode(output['hm'][0:1], output['wh'][0:1], reg=reg, cat_spec_wh=opt.cat_spec_wh, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio # FIXME: change from tensor to list and then reshape # dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) # batch['meta_gt_det'] = [128, 128, 6] gt_det = batch['meta_gt_det'][0:1] gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ np.zeros((1, 6), dtype=np.float32) dets_gt = gt_det.reshape(1, -1, dets.shape[2]) # print(batch['meta_img_id'][0:1]) dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) elif opt.debug == 5: debugger.show_all_imgs(pause=opt.pause, logger=self.logger, step=iter_id) else: debugger.show_all_imgs(pause=opt.pause, step=iter_id)
def debug(self, batch, output, iter_id, dataset): opt = self.opt if "pre_hm" in batch: output.update({"pre_hm": batch["pre_hm"]}) dets = generic_decode(output, K=opt.K, opt=opt) for k in dets: dets[k] = dets[k].detach().cpu().numpy() dets_gt = batch["meta"]["gt_det"] for i in range(1): debugger = Debugger(opt=opt, dataset=dataset) img = batch["image"][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * dataset.std + dataset.mean) * 255.0), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output["hm"][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch["hm"][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, "pred_hm") debugger.add_blend_img(img, gt, "gt_hm") if "pre_img" in batch: pre_img = batch["pre_img"][i].detach().cpu().numpy().transpose( 1, 2, 0) pre_img = np.clip( ((pre_img * dataset.std + dataset.mean) * 255), 0, 255).astype(np.uint8) debugger.add_img(pre_img, "pre_img_pred") debugger.add_img(pre_img, "pre_img_gt") if "pre_hm" in batch: pre_hm = debugger.gen_colormap( batch["pre_hm"][i].detach().cpu().numpy()) debugger.add_blend_img(pre_img, pre_hm, "pre_hm") debugger.add_img(img, img_id="out_pred") if "ltrb_amodal" in opt.heads: debugger.add_img(img, img_id="out_pred_amodal") debugger.add_img(img, img_id="out_gt_amodal") # Predictions for k in range(len(dets["scores"][i])): if dets["scores"][i, k] > opt.vis_thresh: debugger.add_coco_bbox( dets["bboxes"][i, k] * opt.down_ratio, dets["clses"][i, k], dets["scores"][i, k], img_id="out_pred", ) if "ltrb_amodal" in opt.heads: debugger.add_coco_bbox( dets["bboxes_amodal"][i, k] * opt.down_ratio, dets["clses"][i, k], dets["scores"][i, k], img_id="out_pred_amodal", ) if "hps" in opt.heads and int(dets["clses"][i, k]) == 0: debugger.add_coco_hp(dets["hps"][i, k] * opt.down_ratio, img_id="out_pred") if "tracking" in opt.heads: debugger.add_arrow( dets["cts"][i][k] * opt.down_ratio, dets["tracking"][i][k] * opt.down_ratio, img_id="out_pred", ) debugger.add_arrow( dets["cts"][i][k] * opt.down_ratio, dets["tracking"][i][k] * opt.down_ratio, img_id="pre_img_pred", ) # Ground truth debugger.add_img(img, img_id="out_gt") for k in range(len(dets_gt["scores"][i])): if dets_gt["scores"][i][k] > opt.vis_thresh: debugger.add_coco_bbox( dets_gt["bboxes"][i][k] * opt.down_ratio, dets_gt["clses"][i][k], dets_gt["scores"][i][k], img_id="out_gt", ) if "ltrb_amodal" in opt.heads: debugger.add_coco_bbox( dets_gt["bboxes_amodal"][i, k] * opt.down_ratio, dets_gt["clses"][i, k], dets_gt["scores"][i, k], img_id="out_gt_amodal", ) if "hps" in opt.heads and (int(dets["clses"][i, k]) == 0): debugger.add_coco_hp(dets_gt["hps"][i][k] * opt.down_ratio, img_id="out_gt") if "tracking" in opt.heads: debugger.add_arrow( dets_gt["cts"][i][k] * opt.down_ratio, dets_gt["tracking"][i][k] * opt.down_ratio, img_id="out_gt", ) debugger.add_arrow( dets_gt["cts"][i][k] * opt.down_ratio, dets_gt["tracking"][i][k] * opt.down_ratio, img_id="pre_img_gt", ) if "hm_hp" in opt.heads: pred = debugger.gen_colormap_hp( output["hm_hp"][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch["hm_hp"][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, "pred_hmhp") debugger.add_blend_img(img, gt, "gt_hmhp") if "rot" in opt.heads and "dim" in opt.heads and "dep" in opt.heads: dets_gt = {k: dets_gt[k].cpu().numpy() for k in dets_gt} calib = (batch["meta"]["calib"].detach().numpy() if "calib" in batch["meta"] else None) det_pred = generic_post_process( opt, dets, batch["meta"]["c"].cpu().numpy(), batch["meta"]["s"].cpu().numpy(), output["hm"].shape[2], output["hm"].shape[3], self.opt.num_classes, calib, ) det_gt = generic_post_process( opt, dets_gt, batch["meta"]["c"].cpu().numpy(), batch["meta"]["s"].cpu().numpy(), output["hm"].shape[2], output["hm"].shape[3], self.opt.num_classes, calib, ) debugger.add_3d_detection( batch["meta"]["img_path"][i], batch["meta"]["flipped"][i], det_pred[i], calib[i], vis_thresh=opt.vis_thresh, img_id="add_pred", ) debugger.add_3d_detection( batch["meta"]["img_path"][i], batch["meta"]["flipped"][i], det_gt[i], calib[i], vis_thresh=opt.vis_thresh, img_id="add_gt", ) debugger.add_bird_views( det_pred[i], det_gt[i], vis_thresh=opt.vis_thresh, img_id="bird_pred_gt", ) if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix="{}".format(iter_id)) else: debugger.show_all_imgs(pause=True)
import numpy as np from opts import opts from datasets.dataset.yolo import YOLO from utils.debugger import Debugger if __name__ == '__main__': opt = opts().parse() dataset = YOLO(opt.data_dir, opt.flip, opt.vflip, opt.rotate, opt.scale, opt.shear, opt, 'train') opt = opts().update_dataset_info_and_set_heads(opt, dataset) for i in range(len(dataset)): debugger = Debugger(dataset=opt.names) data = dataset[i] img = data['input'].transpose(1, 2, 0) hm = data['hm'] dets_gt = data['meta']['gt_det'] dets_gt[:, :4] *= opt.down_ratio img = np.clip(((img * dataset.std + dataset.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap(hm) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets_gt)): debugger.add_coco_bbox(dets_gt[k, :4], dets_gt[k, -1], dets_gt[k, 4], img_id='out_pred') debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt wh = output['wh'] if opt.reg_bbox else None reg = output['reg'] if opt.reg_offset else None dets = ctadd_decode(output['hm'], output['rot'], output['dep'], output['dim'], wh=wh, reg=reg, K=opt.K) # x, y, score, r1-r8, depth, dim1-dim3, cls dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) calib = batch['meta']['calib'].detach().numpy() # x, y, score, rot, depth, dim1, dim2, dim3 # if opt.dataset == 'gta': # dets[:, 12:15] /= 3 dets_pred = ctadd_post_process(dets.copy(), batch['meta']['c'].detach().numpy(), batch['meta']['s'].detach().numpy(), calib, opt) dets_gt = ctadd_post_process( batch['meta']['gt_det'].detach().numpy().copy(), batch['meta']['c'].detach().numpy(), batch['meta']['s'].detach().numpy(), calib, opt) #for i in range(input.size(0)): for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = ((img * self.opt.std + self.opt.mean) * 255.).astype( np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'hm_pred') debugger.add_blend_img(img, gt, 'hm_gt') # decode debugger.add_ct_detection(img, dets[i], show_box=opt.reg_bbox, center_thresh=opt.center_thresh, img_id='det_pred') debugger.add_ct_detection( img, batch['meta']['gt_det'][i].cpu().numpy().copy(), show_box=opt.reg_bbox, img_id='det_gt') debugger.add_3d_detection(batch['meta']['image_path'][i], dets_pred[i], calib[i], center_thresh=opt.center_thresh, img_id='add_pred') debugger.add_3d_detection(batch['meta']['image_path'][i], dets_gt[i], calib[i], center_thresh=opt.center_thresh, img_id='add_gt') # debugger.add_bird_view( # dets_pred[i], center_thresh=opt.center_thresh, img_id='bird_pred') # debugger.add_bird_view(dets_gt[i], img_id='bird_gt') debugger.add_bird_views(dets_pred[i], dets_gt[i], center_thresh=opt.center_thresh, img_id='bird_pred_gt') # debugger.add_blend_img(img, pred, 'out', white=True) debugger.compose_vis_add(batch['meta']['image_path'][i], dets_pred[i], calib[i], opt.center_thresh, pred, 'bird_pred_gt', img_id='out') # debugger.add_img(img, img_id='out') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def error_bound_saliency(opt, img_id, loc=None, error_bound=0.1): os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str Dataset = dataset_factory[opt.dataset] opt = opts().update_dataset_info_and_set_heads(opt, Dataset) Detector = detector_factory[opt.task] ### simply run the detector and save the objectness heat map and the detection results split = 'val' if not opt.trainval else 'test' dataset = Dataset(opt, split) detector = Detector(opt) # use the FeatureExtractor to regester the hook to get activation value # to find the name of target_layers, see the model.named_modules() feature_extractor = FeatureExtractor(detector.model, target_layers='hm') detector.model = feature_extractor feature_extractor.eval() img_info = dataset.coco.loadImgs(ids=[img_id])[0] img_path = os.path.join(dataset.img_dir, img_info['file_name']) detector.run(img_path) ### get saliency mask ### Not due to the input image is usually resized and padding, we get the mask on the resized image ### for error, we use L1 loss. ## gradually increase the rect center on the image coor untile the error is lower the boundry debug_dir = detector.opt.debug_dir scale = 1.0 debugger = Debugger(dataset=detector.opt.dataset, ipynb=(detector.opt.debug == 3), theme=detector.opt.debugger_theme) image_org = cv2.imread(img_path) image, meta, resized_img = pre_process(detector, image_org, scale, mask=None, return_resized_img=True) _, _, h, w = image.size() down_sample_rate = h / feature_extractor.target_val.size(2) # get the loc[center_h,center_w] on the resized image and corresponding [fh,fw] on feature map if loc is None: # if loc [center_h,center_w] is not specified, use the location of the max value ind = torch.argmax(feature_extractor.target_val[0].sum(dim=0)) fh = ind // feature_extractor.target_val.size(3) fw = ind % feature_extractor.target_val.size(3) center_h = fh * down_sample_rate center_w = fw * down_sample_rate val = feature_extractor.target_val[0, :, fh, fw] print([center_h, center_w]) else: center_h, center_w = loc fh = int(center_h / down_sample_rate) fw = int(center_w / down_sample_rate) val = feature_extractor.target_val[0, :, fh, fw] loss_fn = lambda x: torch.mean(torch.pow((x - val), 2)) area_increment = np.prod(image.size()) / 1000.0 area = 0 ratio = 1.0 # w/h = 1.0 increased rect ratio error = 1e10 mask = np.zeros([h, w]) # [H,W] while (error > error_bound): print("it:{} error:{}".format(area // area_increment, error)) area += area_increment bh = np.sqrt(area / ratio) bw = area / bh mask = np.zeros([h, w]) hmin, hmax = max(int(center_h - bh / 2), 0), min(int(center_h + bh / 2) + 1, h - 1) wmin, wmax = max(int(center_w - bw / 2), 0), min(int(center_w + bw / 2) + 1, w - 1) mask[hmin:hmax, wmin:wmax] = 1 image_masked, _ = pre_process(detector, image_org, 1.0, mask) image_masked = image_masked.to(opt.device) with torch.no_grad(): feature_extractor(image_masked) error = loss_fn(feature_extractor.target_val[0, :, fh, fw]) print("it:{} error:{}".format(area // area_increment, error)) # draw the rect mask on resized_image and save rect_mask_img_save_name = 'rect_mask_{:.1f}'.format(scale) debugger.add_blend_img(resized_img, debugger.gen_colormap(mask[np.newaxis, :, :]), rect_mask_img_save_name) kernel_hmin, kernel_hmax = max( int(center_h - down_sample_rate / 2), 0), min(int(center_h + down_sample_rate / 2) + 1, h - 1) kernel_wmin, kernel_wmax = max( int(center_w - down_sample_rate / 2), 0), min(int(center_w + down_sample_rate / 2) + 1, w - 1) debugger.imgs[rect_mask_img_save_name][kernel_hmin:kernel_hmax, kernel_wmin:kernel_wmax] = [ 255, 0, 0 ] # green ## get saliency superpixel rect_img = resized_img[hmin:hmax, wmin:wmax] segments = slic(rect_img, n_segments=30) #[hmin:hmax, wmin:wmax] un_removed_superpixel = list(np.unique(segments)) rect_segment_mask = np.ones_like(segments) while (error < error_bound): # find superpixel whose removement leads to lowest error lowest_error = 1e10 lowest_error_ind = -1 for i in un_removed_superpixel: mask = np.zeros([h, w]) mask[hmin:hmax, wmin:wmax] = rect_segment_mask * (segments != i) image_masked, _ = pre_process(detector, image_org, 1.0, mask) image_masked = image_masked.to(opt.device) with torch.no_grad(): feature_extractor(image_masked) cur_error = loss_fn(feature_extractor.target_val[0, :, fh, fw]) if cur_error < lowest_error: lowest_error = cur_error lowest_error_ind = i if not lowest_error < error_bound: break else: un_removed_superpixel.remove(lowest_error_ind) error = lowest_error rect_segment_mask = rect_segment_mask * (segments != lowest_error_ind) print("error={} remaining super pixel:{}".format( error, len(un_removed_superpixel))) # draw the segmentation saliency mask on resized_image and save mask = np.zeros([h, w]) mask[hmin:hmax, wmin:wmax] = rect_segment_mask inp_image = resized_img * mask[:, :, np.newaxis].astype(np.uint8) debugger.add_img(inp_image, 'masked_img') mask_img_save_name = 'mask_{:.1f}'.format(scale) debugger.add_blend_img(resized_img, debugger.gen_colormap(mask[np.newaxis, :, :]), mask_img_save_name) debugger.imgs[mask_img_save_name][kernel_hmin:kernel_hmax, kernel_wmin:kernel_wmax] = [255, 0, 0] # blue debugger.save_all_imgs(debug_dir, prefix='{}'.format(opt.img_id)) opt.prefix = '{}masked'.format(opt.img_id) detector.run(inp_image) return
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None hm_hp = output['hm_hp'] if opt.hm_hp else None hp_offset = output['hp_offset'] if opt.reg_hp_offset else None #使用multi_pose_decode来获取检测解码的结果 dets = kps_decode(output['hm'], output['wh'], output['hps'], reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio #opt.input_res / opt.output_res dets[:, :, 5:17] *= opt.down_ratio #opt.input_res / opt.output_res print('dets shape[2] is : ', dets.shape[2]) dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) dets_gt[:, :, :4] *= opt.down_ratio #opt.input_res / opt.output_res dets_gt[:, :, 5:17] *= opt.down_ratio #opt.input_res / opt.output_res for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hm') debugger.add_blend_img(img, gt, 'gt_hm') debugger.add_img(img, img_id='out_pred') for k in range(len(dets[i])): if dets[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], dets[i, k, 4], img_id='out_pred') debugger.add_KPS_hp(dets[i, k, 5:17], img_id='out_pred') debugger.add_img(img, img_id='out_gt') for k in range(len(dets_gt[i])): if dets_gt[i, k, 4] > opt.center_thresh: debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], dets_gt[i, k, 4], img_id='out_gt') debugger.add_KPS_hp(dets_gt[i, k, 5:17], img_id='out_gt') if opt.hm_hp: pred = debugger.gen_colormap_hp( output['hm_hp'][i].detach().cpu().numpy()) gt = debugger.gen_colormap_hp( batch['hm_hp'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, 'pred_hmhp') debugger.add_blend_img(img, gt, 'gt_hmhp') if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)
def debug(self, batch, output, iter_id): opt = self.opt reg = output['reg'] if opt.reg_offset else None dets = rodet_decode(output['hm'], output['wh'], output['angle'], reg=reg, cat_spec_wh=opt.cat_spec_wh, cat_spec_angle=opt.cat_spec_angle, K=opt.K) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) dets[:, :, :4] *= opt.down_ratio # dets_gt_dense = rodet_decode( # batch['hm'], batch['dense_wh'], batch['dense_angle'], reg=reg, # cat_spec_wh=opt.cat_spec_wh, cat_spec_angle=opt.cat_spec_angle, K=opt.K) # dets_gt_dense = dets_gt_dense.detach().cpu().numpy().reshape(1, -1, dets_gt_dense.shape[2]) # dets_gt_dense[:, :, :4] *= opt.down_ratio dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) img_name = batch['meta']['img_name'] dets_gt[:, :, :4] *= opt.down_ratio for i in range(1): debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) img = np.clip(((img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) pred = debugger.gen_colormap( output['hm'][i].detach().cpu().numpy()) # gt_ang_mask = debugger.gen_colormap(batch['dense_angle_mask'][i].detach().cpu().numpy()) # gt_ang = debugger.gen_colormap(batch['dense_angle'][i].detach().cpu().numpy()) gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) debugger.add_blend_img(img, pred, '{}_pred_hm'.format(img_name)) # debugger.add_blend_img(img, gt_ang_mask, 'gt_angle_mask') # debugger.add_blend_img(img, gt_ang, 'gt_angle') debugger.add_blend_img(img, gt, '{}_gt_hm'.format(img_name)) debugger.add_img(img, img_id='{}_out_pred'.format(img_name)) for k in range(len(dets[i])): if dets[i, k, 5] > opt.center_thresh: # print("pred dets add_rbbox=======================") debugger.add_rbbox(dets[i, k, :5], dets[i, k, -1], dets[i, k, 5], show_txt=False, img_id='{}_out_pred'.format(img_name)) # debugger.add_img(img, img_id='{}_dets_gt_dense'.format(img_name)) # for k in range(len(dets_gt_dense[i])): # if dets_gt_dense[i, k, 5] > opt.center_thresh: # # print("pred dets add_rbbox=======================") # debugger.add_rbbox(dets_gt_dense[i, k, :5], dets_gt_dense[i, k, -1], # dets_gt_dense[i, k, 5], show_txt=False, img_id='{}_dets_gt_dense'.format(img_name)) debugger.add_img(img, img_id='{}_out_gt'.format(img_name)) for k in range(len(dets_gt[i])): if dets_gt[i, k, 5] > opt.center_thresh: # print("GT add_rbbox=======================") # 说明add_rbbox(self, rbbox, cat, conf=1, show_txt=True, img_id='default') # gt格式 gt_det.append([ct[0], ct[1], w, h, a, 1, cls_id]) debugger.add_rbbox(dets_gt[i, k, :5], dets_gt[i, k, -1], dets_gt[i, k, 5], show_txt=False, img_id='{}_out_gt'.format(img_name)) if opt.debug == 4: debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) else: debugger.show_all_imgs(pause=True)