Beispiel #1
0
def demo_image(image, model, opt):
    s = max(image.shape[0], image.shape[1]) * 1.0
    c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32)
    trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h])
    inp = cv2.warpAffine(image,
                         trans_input, (opt.input_w, opt.input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp / 255. - mean) / std
    inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
    inp = torch.from_numpy(inp).to(opt.device)
    out = model(inp)[-1]  # 'hm': (1, 16, 64, 64), 'depth': (1, 16, 64, 64)
    preds, amb_idx = get_preds(out['hm'].detach().cpu().numpy())
    pred = preds[0]
    pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h))
    pred_3d, ignore_idx = get_preds_3d(out['hm'].detach().cpu().numpy(),
                                       out['depth'].detach().cpu().numpy(),
                                       amb_idx)
    pred_3d = pred_3d[0]
    ignore_idx = ignore_idx[0]

    debugger = Debugger()
    debugger.add_img(image)  # デバッガークラスに画像をコピー
    debugger.add_point_2d(pred, (255, 0, 0))
    debugger.add_point_3d(pred_3d, 'b', ignore_idx=ignore_idx)
    debugger.show_all_imgs(pause=False)
    debugger.show_3d()

    print("Done")
Beispiel #2
0
def demo_image(image, image_name, model, opt):
    s = max(image.shape[0], image.shape[1]) * 1.0
    c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32)
    trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h])
    inp = cv2.warpAffine(image,
                         trans_input, (opt.input_w, opt.input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp / 255. - mean) / std
    inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
    inp = torch.from_numpy(inp).to(opt.device)
    out = model(inp)[-1]
    pred = get_preds(out['hm'].detach().cpu().numpy())[0]
    pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h))
    pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(),
                           out['depth'].detach().cpu().numpy())[0]

    path = "D:\\CV-Project\\pytorch-pose-hg-3d\\images\\last_save\\"
    _, image_name = os.path.split(image_name)
    image_name = image_name[:-4]

    debugger = Debugger()
    debugger.add_img(image, image_name)
    debugger.add_point_2d(pred, (255, 0, 0), image_name)
    debugger.add_point_3d(pred_3d, 'b')
    debugger.show_all_imgs(pause=False)
    debugger.show_3d(image_name, path)
    debugger.save_img(image_name, path)
Beispiel #3
0
    def display_map(self, batch, tracks, idx):
        opt = self.opt
        for i in range(1):
            debugger = Debugger(opt,
                                dataset=opt.dataset,
                                ipynb=(opt.debug == 3),
                                theme=opt.debugger_theme)
            img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * opt.std + opt.mean) * 255.), 0,
                          255).astype(np.uint8)

            debugger.add_img(img, img_id='track')
            for i in range(len(tracks)):
                dets = tracks[i].pred
                bbox = dets[:4] * self.opt.down_ratio
                w, h = bbox[2], bbox[3]
                bbox = np.array([
                    bbox[0] - w / 2, bbox[1] - h / 2, bbox[0] + w / 2,
                    bbox[1] + h / 2
                ])
                debugger.add_coco_bbox(bbox,
                                       int(dets[-1]),
                                       tracks[i].track_id,
                                       img_id='track',
                                       tracking=True)

            debugger.save_all_imgs(opt.debug_dir, prefix=f'{idx}')
Beispiel #4
0
def demo_image(image, model, opt, save_path=None):
    s = max(image.shape[0], image.shape[1]) * 1.0
    c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32)
    trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h])
    inp = cv2.warpAffine(image,
                         trans_input, (opt.input_w, opt.input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp / 255. - mean) / std
    inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
    inp = torch.from_numpy(inp).to(opt.device)
    out = model(inp)[-1]
    pred = get_preds(out['hm'].detach().cpu().numpy())[0]
    pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h))
    pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(),
                           out['depth'].detach().cpu().numpy())[0]

    debugger = Debugger()
    debugger.add_img(image)
    debugger.add_point_2d(pred, (255, 0, 0))
    debugger.add_point_3d(pred_3d, 'b')
    #  import pdb;pdb.set_trace()
    debugger.show_all_imgs(pause=True)
    debugger.show_3d()
    if save_path:
        debugger.save_3d(save_path)
Beispiel #5
0
    def debug(self, batch, output, iter_id):
        opt = self.opt
        reg = output['reg'] if opt.reg_offset else None
        dets = ctdet_decode(output['hm'],
                            output['wh'],
                            reg=reg,
                            cat_spec_wh=opt.cat_spec_wh,
                            opt=opt)
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
        dets[:, :, :4] *= opt.down_ratio
        dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
        dets_gt[:, :, :4] *= opt.down_ratio
        for i in range(1):
            debugger = Debugger(dataset=opt.dataset,
                                ipynb=(opt.debug == 3),
                                theme=opt.debugger_theme)
            img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * opt.std + opt.mean) * 255.), 0,
                          255).astype(np.uint8)
            pred = debugger.gen_colormap(
                output['hm'][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hm')
            debugger.add_blend_img(img, gt, 'gt_hm')
            debugger.add_img(img, img_id='out_pred')
            if opt.edge_hm:
                edge_hm = output['edge_hm'][i].detach().cpu().numpy()
                edge_hm = edge_hm.reshape(4 * opt.num_edge_hm, -1,
                                          edge_hm.shape[1], edge_hm.shape[2])
                edge_hm = edge_hm.sum(axis=0)
                edge_hm = debugger.gen_colormap(edge_hm)
                debugger.add_blend_img(img, edge_hm, 'edge_hm')
                gt_edge_hm = batch['edge_hm'][i].detach().cpu().numpy()
                gt_edge_hm = gt_edge_hm.reshape(4 * opt.num_edge_hm, -1,
                                                gt_edge_hm.shape[1],
                                                gt_edge_hm.shape[2])
                gt_edge_hm = gt_edge_hm.sum(axis=0)
                gt_edge_hm = debugger.gen_colormap(gt_edge_hm)
                debugger.add_blend_img(img, gt_edge_hm, 'gt_edge_hm')
            for k in range(len(dets[i])):
                if dets[i, k, 4] > opt.center_thresh:
                    debugger.add_coco_bbox(dets[i, k, :4],
                                           dets[i, k, -1],
                                           dets[i, k, 4],
                                           img_id='out_pred')

            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt[i])):
                if dets_gt[i, k, 4] > opt.center_thresh:
                    debugger.add_coco_bbox(dets_gt[i, k, :4],
                                           dets_gt[i, k, -1],
                                           dets_gt[i, k, 4],
                                           img_id='out_gt')

            if opt.debug == 4:
                debugger.save_all_imgs(opt.debug_dir,
                                       prefix='{}'.format(iter_id))
            else:
                debugger.show_all_imgs(pause=True)
Beispiel #6
0
    def debug(self, batch, output, iter_id):
        opt = self.opt
        reg = output['reg'] if opt.reg_offset else None

        directs = output['direct']
        dets = ctdet_line_decode(output['hm'],
                                 output['wh'],
                                 reg=reg,
                                 directs=directs,
                                 cat_spec_wh=opt.cat_spec_wh,
                                 K=opt.K,
                                 direct_loss=opt.direct_loss)

        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
        dets[:, :, :4] *= opt.down_ratio

        if not self.opt.temporal_model and not opt.no_reconstruct_loss:
            dets_gt = batch['meta']['gt_line'].numpy().reshape(
                1, -1, dets.shape[2] - 1 + 1)  # +1 for direction
            dets_gt[:, :, :4] *= opt.down_ratio
            for i in range(1):
                debugger = Debugger(dataset=opt.dataset,
                                    ipynb=(opt.debug == 3),
                                    theme=opt.debugger_theme)
                img = batch['input'][i].detach().cpu().numpy().transpose(
                    1, 2, 0)
                img = np.clip(((img * opt.std + opt.mean) * 255.), 0,
                              255).astype(np.uint8)
                pred = debugger.gen_colormap(
                    output['hm'][i].detach().cpu().numpy())
                gt = debugger.gen_colormap(
                    batch['hm'][i].detach().cpu().numpy())
                debugger.add_blend_img(img, pred, 'pred_hm')
                debugger.add_blend_img(img, gt, 'gt_hm')
                debugger.add_img(img, img_id='out_pred')
                for k in range(len(dets[i])):
                    if dets[i, k, 4] > opt.center_thresh:
                        debugger.add_bbox_line(dets[i, k, :4],
                                               dets[i, k, -1],
                                               dets[i, k, 4],
                                               img_id='out_pred',
                                               direct=dets[i, k, 5])

                debugger.add_img(img, img_id='out_gt')
                for k in range(len(dets_gt[i])):
                    if dets_gt[i, k, 4] > opt.center_thresh:
                        debugger.add_bbox_line_gt(
                            dets_gt[i, k, :4],
                            dets_gt[i, k, 5],
                            dets_gt[i, k, 4],
                            img_id='out_gt',
                            direct=dets_gt[i, k,
                                           -1])  # direct=directs_gt[i, k])

                if opt.debug == 4:
                    debugger.save_all_imgs(opt.debug_dir,
                                           prefix='{}'.format(iter_id))
                else:
                    debugger.show_all_imgs(pause=True)
Beispiel #7
0
    def debug(self, batch, output, iter_id):
        opt = self.opt
        reg = output['reg'] if opt.reg_offset else None
        dets = ctdet_decode(output['hm'],
                            output['wh'],
                            reg=reg,
                            cat_spec_wh=opt.cat_spec_wh,
                            K=opt.K)
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
        dets[:, :, :4] *= opt.down_ratio
        dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
        dets_gt[:, :, :4] *= opt.down_ratio
        if opt.task == 'ctdet_semseg':
            seg_gt = batch['seg'][0][0].cpu().numpy()
            seg_pred = output['seg'].max(1)[1].squeeze_(1).squeeze_(
                0).cpu().numpy()

        for i in range(1):
            debugger = Debugger(opt,
                                dataset=opt.dataset,
                                ipynb=(opt.debug == 3),
                                theme=opt.debugger_theme)
            img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * opt.std + opt.mean) * 255.), 0,
                          255).astype(np.uint8)

            debugger.add_img(img, img_id='out_pred')
            for k in range(len(dets[i])):
                if dets[i, k, 4] > opt.vis_thresh:
                    debugger.add_coco_bbox(dets[i, k, :4],
                                           dets[i, k, -1],
                                           dets[i, k, 4],
                                           img_id='out_pred')

            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt[i])):
                if dets_gt[i, k, 4] > opt.vis_thresh:
                    debugger.add_coco_bbox(dets_gt[i, k, :4],
                                           dets_gt[i, k, -1],
                                           dets_gt[i, k, 4],
                                           img_id='out_gt')
            if opt.save_video:  # only save the predicted and gt images
                return debugger.imgs['out_pred'], debugger.imgs['out_gt']

            pred = debugger.gen_colormap(
                output['hm'][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hm')
            debugger.add_blend_img(img, gt, 'gt_hm')

            if opt.task == 'ctdet_semseg':
                debugger.visualize_masks(seg_gt, img_id='out_mask_gt')
                debugger.visualize_masks(seg_pred, img_id='out_mask_pred')

            if opt.debug == 4:
                debugger.save_all_imgs(opt.debug_dir, prefix=iter_id)
Beispiel #8
0
    def debug(self, batch, output, iter_id):
        opt = self.opt
        reg = output['reg'] if opt.reg_offset else None
        angle = output['angle']
        dets = ctdet_angle_decode(output['hm'],
                                  output['wh'],
                                  reg=reg,
                                  angle=angle,
                                  cat_spec_wh=opt.cat_spec_wh,
                                  K=opt.K)
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
        dets[:, :, :4] *= opt.down_ratio
        dets_gt = batch['meta']['gt_det_angle'].numpy().reshape(
            1, -1, dets.shape[2])
        dets_gt[:, :, :4] *= opt.down_ratio
        for i in range(1):
            debugger = Debugger(dataset=opt.dataset,
                                ipynb=(opt.debug == 3),
                                theme=opt.debugger_theme)
            img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * opt.std + opt.mean) * 255.), 0,
                          255).astype(np.uint8)
            pred = debugger.gen_colormap(
                output['hm'][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hm')
            debugger.add_blend_img(img, gt, 'gt_hm')
            debugger.add_img(img, img_id='out_pred')
            for k in range(len(dets[i])):
                # center predict scores
                if dets[i, k, 5] > opt.center_thresh:
                    debugger.add_rotation_bbox(dets[i, k, :5],
                                               dets[i, k, -1],
                                               dets[i, k, 5],
                                               img_id='out_pred',
                                               show_txt=False)

            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt[i])):
                if dets_gt[i, k, 5] > opt.center_thresh:
                    debugger.add_rotation_bbox(dets_gt[i, k, :5],
                                               dets_gt[i, k, -1],
                                               dets_gt[i, k, 5],
                                               img_id='out_gt',
                                               show_txt=False)

            if opt.debug == 4:
                print(batch['meta']['img_id'][i])
                a = batch['meta']['img_id'][i].detach().cpu()
                debugger.save_all_imgs(opt.debug_dir,
                                       prefix='{}'.format(iter_id),
                                       img_id=batch['meta']['img_id'])
            else:
                debugger.show_all_imgs(pause=True)
  def debug(self, batch, output, iter_id):
    opt = self.opt
    ###是否进行坐标offset reg
    reg = output['reg'] if opt.reg_offset else None
    ###将网络输出的hms经过decode得到detections: [bboxes, scores, clses]
    dets = ctdet_decode(
      output['hm'], output['wh'], reg=reg,
      cat_spec_wh=opt.cat_spec_wh, K=opt.K)
    ####创建一个没有梯度的变量dets,shape为(1,batch*k,6)
    dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
    ####对预测坐标进行变换---->下采样, down_ratio默认值为4
    dets[:, :, :4] *= opt.down_ratio
    ####把dets_gt的shape变为(1,batch*k, 6)
    ####dets_gt为gt_bbox的位置信息,shape为(1,batch*k,6)
    dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
    ####对gt坐标进行变换---->下采样, down_ratio默认值为4
    dets_gt[:, :, :4] *= opt.down_ratio
    for i in range(1):
      debugger = Debugger(
        dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme)
      ###将输入图片转化为cpu上的没有梯度的张量img
      img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
      ###对输入图像进行标准化处理:乘上标准差再加上均值
      img = np.clip(((
        img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8)
      ####gen_colormap又是什么玩意???
      ####output----->pred, batch------>gt
      pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
      gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
      ####add_blend_img是用来干嘛???
      debugger.add_blend_img(img, pred, 'pred_hm')
      debugger.add_blend_img(img, gt, 'gt_hm')
      debugger.add_img(img, img_id='out_pred')
      ###此时len(dets[i]==dets[0])==batch*k,
      for k in range(len(dets[i])):
        ###即某个score>thresh
        if dets[i, k, 4] > opt.center_thresh:
          ####在图像上画出检测框,坐标,score和cls
          debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
                                 dets[i, k, 4], img_id='out_pred')

      debugger.add_img(img, img_id='out_gt')
      ###len(dets_gt[i])为batch*k
      for k in range(len(dets_gt[i])):
        if dets_gt[i, k, 4] > opt.center_thresh:
          ####画出gt_bbox
          debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
                                 dets_gt[i, k, 4], img_id='out_gt')

      if opt.debug == 4:
        debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id))
      else:
        debugger.show_all_imgs(pause=True)
Beispiel #10
0
  def debug(self, batch, output, iter_id):
    opt = self.opt
    reg = output['reg'] if opt.reg_offset else None
    hm_hp = output['hm_hp'] if opt.hm_hp else None
    hp_offset = output['hp_offset'] if opt.reg_hp_offset else None
    dets = multi_pose_decode(
      output['hm'], output['wh'], output['hps'], 
      reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=opt.K)
    dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])

    dets[:, :, :4] *= opt.input_res / opt.output_res
    dets[:, :, 5:39] *= opt.input_res / opt.output_res
    dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
    dets_gt[:, :, :4] *= opt.input_res / opt.output_res
    dets_gt[:, :, 5:39] *= opt.input_res / opt.output_res
    for i in range(1):
      debugger = Debugger(
        dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme)
      img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
      img = np.clip(((
        img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8)
      pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
      gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
      debugger.add_blend_img(img, pred, 'pred_hm')
      debugger.add_blend_img(img, gt, 'gt_hm')

      debugger.add_img(img, img_id='out_pred')
      for k in range(len(dets[i])):
        if dets[i, k, 4] > opt.center_thresh:
          debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
                                 dets[i, k, 4], img_id='out_pred')
          debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred')

      debugger.add_img(img, img_id='out_gt')
      for k in range(len(dets_gt[i])):
        if dets_gt[i, k, 4] > opt.center_thresh:
          debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
                                 dets_gt[i, k, 4], img_id='out_gt')
          debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt')

      if opt.hm_hp:
        pred = debugger.gen_colormap_hp(output['hm_hp'][i].detach().cpu().numpy())
        gt = debugger.gen_colormap_hp(batch['hm_hp'][i].detach().cpu().numpy())
        debugger.add_blend_img(img, pred, 'pred_hmhp')
        debugger.add_blend_img(img, gt, 'gt_hmhp')

      if opt.debug == 4:
        debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id))
      else:
        debugger.show_all_imgs(pause=True)
Beispiel #11
0
    def debug(self, batch, output, iter_id):
        cfg = self.cfg
        reg = output[3] if cfg.LOSS.REG_OFFSET else None
        hm_hp = output[4] if cfg.LOSS.HM_HP else None
        hp_offset = output[5] if cfg.LOSS.REG_HP_OFFSET else None
        dets = multi_pose_decode(
          output[0], output[1], output[2], 
          reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=cfg.TEST.TOPK)
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])

        dets[:, :, :4] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES
        dets[:, :, 5:39] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES
        dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
        dets_gt[:, :, :4] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES
        dets_gt[:, :, 5:39] *= cfg.MODEL.INPUT_RES / cfg.MODEL.OUTPUT_RES
        for i in range(1):
            debugger = Debugger(
            dataset=cfg.SAMPLE_METHOD, ipynb=(cfg.DEBUG==3), theme=cfg.DEBUG_THEME)
            img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((
            img * np.array(cfg.DATASET.STD).reshape(1,1,3).astype(np.float32) + cfg.DATASET.MEAN) * 255.), 0, 255).astype(np.uint8)
            pred = debugger.gen_colormap(output[0][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hm')
            debugger.add_blend_img(img, gt, 'gt_hm')

            debugger.add_img(img, img_id='out_pred')
            for k in range(len(dets[i])):
                if dets[i, k, 4] > cfg.MODEL.CENTER_THRESH:
                    debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
                                         dets[i, k, 4], img_id='out_pred')
                    debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred')

            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt[i])):
                if dets_gt[i, k, 4] > cfg.MODEL.CENTER_THRESH:
                    debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
                                 dets_gt[i, k, 4], img_id='out_gt')
                    debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt')

            if cfg.LOSS.HM_HP:
                pred = debugger.gen_colormap_hp(output[4][i].detach().cpu().numpy())
                gt = debugger.gen_colormap_hp(batch['hm_hp'][i].detach().cpu().numpy())
                debugger.add_blend_img(img, pred, 'pred_hmhp')
                debugger.add_blend_img(img, gt, 'gt_hmhp')

            if cfg.DEBUG == 4:
                debugger.save_all_imgs(cfg.LOG_DIR, prefix='{}'.format(iter_id))
            else:
                debugger.show_all_imgs(pause=True)
Beispiel #12
0
def _debug(image, t_heat, l_heat, b_heat, r_heat, ct_heat):

    debugger = Debugger(num_classes=1)
    k = 0

    t_heat = torch.sigmoid(t_heat)
    l_heat = torch.sigmoid(l_heat)
    b_heat = torch.sigmoid(b_heat)
    r_heat = torch.sigmoid(r_heat)

    aggr_weight = 0.1
    t_heat = _h_aggregate(t_heat, aggr_weight=aggr_weight)

    l_heat = _v_aggregate(l_heat, aggr_weight=aggr_weight)
    b_heat = _h_aggregate(b_heat, aggr_weight=aggr_weight)
    r_heat = _v_aggregate(r_heat, aggr_weight=aggr_weight)
    t_heat[t_heat > 1] = 1
    l_heat[l_heat > 1] = 1
    b_heat[b_heat > 1] = 1
    r_heat[r_heat > 1] = 1

    ct_heat = torch.sigmoid(ct_heat)

    t_hm = debugger.gen_colormap(t_heat[k].cpu().data.numpy())
    l_hm = debugger.gen_colormap(l_heat[k].cpu().data.numpy())
    b_hm = debugger.gen_colormap(b_heat[k].cpu().data.numpy())
    r_hm = debugger.gen_colormap(r_heat[k].cpu().data.numpy())
    ct_hm = debugger.gen_colormap(ct_heat[k].cpu().data.numpy())

    hms = np.maximum(np.maximum(t_hm, l_hm), np.maximum(b_hm, r_hm))

    if image is not None:
        mean = np.array([0.40789654, 0.44719302, 0.47026115],
                        dtype=np.float32).reshape(3, 1, 1)
        std = np.array([0.28863828, 0.27408164, 0.27809835],
                       dtype=np.float32).reshape(3, 1, 1)
        img = (image[k].cpu().data.numpy() * std + mean) * 255
        img = img.astype(np.uint8).transpose(1, 2, 0)

        debugger.add_img(img, 'img')

        debugger.add_blend_img(img, t_hm, 't_hm')
        debugger.add_blend_img(img, l_hm, 'l_hm')
        debugger.add_blend_img(img, b_hm, 'b_hm')
        debugger.add_blend_img(img, r_hm, 'r_hm')
        debugger.add_blend_img(img, hms, 'extreme')
        debugger.add_blend_img(img, ct_hm, 'center')
    debugger.show_all_imgs(pause=False)
Beispiel #13
0
 def debug(self, batch, output, iter_id):
     opt = self.opt
     detections = self.decode(output['hm_t'], output['hm_l'],
                              output['hm_b'], output['hm_r'],
                              output['hm_c']).detach().cpu().numpy()
     detections[:, :, :4] *= opt.input_res / opt.output_res
     for i in range(1):
         dataset = opt.dataset
         if opt.dataset == 'yolo':
             dataset = opt.names
         debugger = Debugger(dataset=dataset,
                             ipynb=(opt.debug == 3),
                             theme=opt.debugger_theme)
         pred_hm = np.zeros((opt.input_res, opt.input_res, 3),
                            dtype=np.uint8)
         gt_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8)
         img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
         img = ((img * self.opt.std + self.opt.mean) * 255.).astype(
             np.uint8)
         for p in self.parts:
             tag = 'hm_{}'.format(p)
             pred = debugger.gen_colormap(
                 output[tag][i].detach().cpu().numpy())
             gt = debugger.gen_colormap(
                 batch[tag][i].detach().cpu().numpy())
             if p != 'c':
                 pred_hm = np.maximum(pred_hm, pred)
                 gt_hm = np.maximum(gt_hm, gt)
             if p == 'c' or opt.debug > 2:
                 debugger.add_blend_img(img, pred, 'pred_{}'.format(p))
                 debugger.add_blend_img(img, gt, 'gt_{}'.format(p))
         debugger.add_blend_img(img, pred_hm, 'pred')
         debugger.add_blend_img(img, gt_hm, 'gt')
         debugger.add_img(img, img_id='out')
         for k in range(len(detections[i])):
             if detections[i, k, 4] > 0.1:
                 debugger.add_coco_bbox(detections[i, k, :4],
                                        detections[i, k, -1],
                                        detections[i, k, 4],
                                        img_id='out')
         if opt.debug == 4:
             debugger.save_all_imgs(opt.debug_dir,
                                    prefix='{}'.format(iter_id))
         else:
             debugger.show_all_imgs(pause=True)
Beispiel #14
0
 def show_results(self, image, gts, dets, save_dir, img_name):
     debugger = Debugger(dataset='dota',
                         ipynb=(self.opt.debug == 3),
                         theme='white')
     debugger.add_img(image, img_name)
     for j in dets:
         for bbox in dets[j]:
             if bbox[5] > 0.01:
                 debugger.add_rbbox(bbox[:5],
                                    j - 1,
                                    bbox[5],
                                    img_id=img_name)
     for ann in gts:
         bbox = ann['rbbox']
         cat_id = ann['category_id']
         debugger.add_rbbox(bbox, cat_id - 1, 1, img_id=img_name, gt=True)
     save_dir = os.path.join(save_dir, 'voc_results')
     debugger.save_all_imgs(save_dir)
Beispiel #15
0
 def show_results(self, save_dir):
     with open(os.path.join(save_dir, "results.json")) as f:
         data = json.load(f)
         debugger = Debugger(dataset=self.opt.dataset,
                             ipynb=(self.opt.debug == 3),
                             theme=self.opt.debugger_theme,
                             class_names=self.opt.class_names)
         for i, img_details in enumerate(data):
             if (data[i]['score'] > self.opt.vis_thresh):
                 img_path = os.path.join("../data/coco/test2019",
                                         str(img_details["image_id"]) +
                                         ".jpg")  # TODO
                 image = cv2.imread(img_path)
                 debugger.add_img(image, img_id='ctdet')
                 bbox = data[i]['bbox']
                 debugger.add_coco_bbox(bbox,
                                        data[i]['category_id'] - 1,
                                        data[i]['score'],
                                        img_id='ctdet')
                 debugger.show_all_imgs(pause=True)
Beispiel #16
0
    def debug(self, batch, output, iter_id):
        opt = self.opt
        reg = output['reg'] if opt.reg_offset else None
        dets = ctdet_decode(output['hm'],
                            output['wh'],
                            reg=reg,
                            cat_spec_wh=opt.cat_spec_wh,
                            K=opt.K)
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
        dets[:, :, :4] *= opt.down_ratio
        dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
        dets_gt[:, :, :4] *= opt.down_ratio
        for i in range(1):
            debugger = Debugger(dataset=opt.dataset,
                                ipynb=(opt.debug == 3),
                                theme=opt.debugger_theme)
            img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * opt.std + opt.mean) * 255.), 0,
                          255).astype(np.uint8)
            pred = debugger.gen_colormap(
                output['hm'][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hm')
            debugger.add_blend_img(img, gt, 'gt_hm')
            debugger.add_img(img, img_id='out_pred')
            for k in range(len(dets[i])):
                if dets[i, k, 4] > opt.center_thresh:
                    debugger.add_coco_bbox(dets[i, k, :4],
                                           dets[i, k, -1],
                                           dets[i, k, 4],
                                           img_id='out_pred')

            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt[i])):
                if dets_gt[i, k, 4] > opt.center_thresh:
                    debugger.add_coco_bbox(dets_gt[i, k, :4],
                                           dets_gt[i, k, -1],
                                           dets_gt[i, k, 4],
                                           img_id='out_gt')
Beispiel #17
0
 def debug_for_polygon(self, batch, output, iter_id):
     opt = self.opt
     output = output[0]
     batch = batch[0]
     for i in range(1):
         debugger = Debugger(dataset=opt.dataset,
                             ipynb=(opt.debug == 3),
                             theme=opt.debugger_theme)
         img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
         img = np.clip(((img * opt.std + opt.mean) * 255.), 0,
                       255).astype(np.uint8)
         pred = debugger.gen_colormap(
             output['hm'][i].detach().cpu().numpy())
         gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
         debugger.add_blend_img(img, pred, 'pred_hm')
         debugger.add_blend_img(img, gt, 'gt_hm')
         debugger.add_img(img, img_id='out_pred')
         debugger.add_img(img, img_id='out_gt')
         if opt.debug == 4:
             debugger.save_all_imgs(opt.debug_dir,
                                    prefix='{}'.format(iter_id))
         else:
             debugger.show_all_imgs(pause=True)
Beispiel #18
0
    def debug(self, batch, output, iter_id, dataset):
        opt = self.opt
        if "pre_hm" in batch:
            output.update({"pre_hm": batch["pre_hm"]})
        dets = generic_decode(output, K=opt.K, opt=opt)
        for k in dets:
            dets[k] = dets[k].detach().cpu().numpy()
        dets_gt = batch["meta"]["gt_det"]
        for i in range(1):
            debugger = Debugger(opt=opt, dataset=dataset)
            img = batch["image"][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * dataset.std + dataset.mean) * 255.0), 0,
                          255).astype(np.uint8)
            pred = debugger.gen_colormap(
                output["hm"][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch["hm"][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, "pred_hm")
            debugger.add_blend_img(img, gt, "gt_hm")

            if "pre_img" in batch:
                pre_img = batch["pre_img"][i].detach().cpu().numpy().transpose(
                    1, 2, 0)
                pre_img = np.clip(
                    ((pre_img * dataset.std + dataset.mean) * 255), 0,
                    255).astype(np.uint8)
                debugger.add_img(pre_img, "pre_img_pred")
                debugger.add_img(pre_img, "pre_img_gt")
                if "pre_hm" in batch:
                    pre_hm = debugger.gen_colormap(
                        batch["pre_hm"][i].detach().cpu().numpy())
                    debugger.add_blend_img(pre_img, pre_hm, "pre_hm")

            debugger.add_img(img, img_id="out_pred")
            if "ltrb_amodal" in opt.heads:
                debugger.add_img(img, img_id="out_pred_amodal")
                debugger.add_img(img, img_id="out_gt_amodal")

            # Predictions
            for k in range(len(dets["scores"][i])):
                if dets["scores"][i, k] > opt.vis_thresh:
                    debugger.add_coco_bbox(
                        dets["bboxes"][i, k] * opt.down_ratio,
                        dets["clses"][i, k],
                        dets["scores"][i, k],
                        img_id="out_pred",
                    )

                    if "ltrb_amodal" in opt.heads:
                        debugger.add_coco_bbox(
                            dets["bboxes_amodal"][i, k] * opt.down_ratio,
                            dets["clses"][i, k],
                            dets["scores"][i, k],
                            img_id="out_pred_amodal",
                        )

                    if "hps" in opt.heads and int(dets["clses"][i, k]) == 0:
                        debugger.add_coco_hp(dets["hps"][i, k] *
                                             opt.down_ratio,
                                             img_id="out_pred")

                    if "tracking" in opt.heads:
                        debugger.add_arrow(
                            dets["cts"][i][k] * opt.down_ratio,
                            dets["tracking"][i][k] * opt.down_ratio,
                            img_id="out_pred",
                        )
                        debugger.add_arrow(
                            dets["cts"][i][k] * opt.down_ratio,
                            dets["tracking"][i][k] * opt.down_ratio,
                            img_id="pre_img_pred",
                        )

            # Ground truth
            debugger.add_img(img, img_id="out_gt")
            for k in range(len(dets_gt["scores"][i])):
                if dets_gt["scores"][i][k] > opt.vis_thresh:
                    debugger.add_coco_bbox(
                        dets_gt["bboxes"][i][k] * opt.down_ratio,
                        dets_gt["clses"][i][k],
                        dets_gt["scores"][i][k],
                        img_id="out_gt",
                    )

                    if "ltrb_amodal" in opt.heads:
                        debugger.add_coco_bbox(
                            dets_gt["bboxes_amodal"][i, k] * opt.down_ratio,
                            dets_gt["clses"][i, k],
                            dets_gt["scores"][i, k],
                            img_id="out_gt_amodal",
                        )

                    if "hps" in opt.heads and (int(dets["clses"][i, k]) == 0):
                        debugger.add_coco_hp(dets_gt["hps"][i][k] *
                                             opt.down_ratio,
                                             img_id="out_gt")

                    if "tracking" in opt.heads:
                        debugger.add_arrow(
                            dets_gt["cts"][i][k] * opt.down_ratio,
                            dets_gt["tracking"][i][k] * opt.down_ratio,
                            img_id="out_gt",
                        )
                        debugger.add_arrow(
                            dets_gt["cts"][i][k] * opt.down_ratio,
                            dets_gt["tracking"][i][k] * opt.down_ratio,
                            img_id="pre_img_gt",
                        )

            if "hm_hp" in opt.heads:
                pred = debugger.gen_colormap_hp(
                    output["hm_hp"][i].detach().cpu().numpy())
                gt = debugger.gen_colormap_hp(
                    batch["hm_hp"][i].detach().cpu().numpy())
                debugger.add_blend_img(img, pred, "pred_hmhp")
                debugger.add_blend_img(img, gt, "gt_hmhp")

            if "rot" in opt.heads and "dim" in opt.heads and "dep" in opt.heads:
                dets_gt = {k: dets_gt[k].cpu().numpy() for k in dets_gt}
                calib = (batch["meta"]["calib"].detach().numpy()
                         if "calib" in batch["meta"] else None)
                det_pred = generic_post_process(
                    opt,
                    dets,
                    batch["meta"]["c"].cpu().numpy(),
                    batch["meta"]["s"].cpu().numpy(),
                    output["hm"].shape[2],
                    output["hm"].shape[3],
                    self.opt.num_classes,
                    calib,
                )
                det_gt = generic_post_process(
                    opt,
                    dets_gt,
                    batch["meta"]["c"].cpu().numpy(),
                    batch["meta"]["s"].cpu().numpy(),
                    output["hm"].shape[2],
                    output["hm"].shape[3],
                    self.opt.num_classes,
                    calib,
                )

                debugger.add_3d_detection(
                    batch["meta"]["img_path"][i],
                    batch["meta"]["flipped"][i],
                    det_pred[i],
                    calib[i],
                    vis_thresh=opt.vis_thresh,
                    img_id="add_pred",
                )
                debugger.add_3d_detection(
                    batch["meta"]["img_path"][i],
                    batch["meta"]["flipped"][i],
                    det_gt[i],
                    calib[i],
                    vis_thresh=opt.vis_thresh,
                    img_id="add_gt",
                )
                debugger.add_bird_views(
                    det_pred[i],
                    det_gt[i],
                    vis_thresh=opt.vis_thresh,
                    img_id="bird_pred_gt",
                )

            if opt.debug == 4:
                debugger.save_all_imgs(opt.debug_dir,
                                       prefix="{}".format(iter_id))
            else:
                debugger.show_all_imgs(pause=True)
Beispiel #19
0
def step(split, epoch, opt, data_loader, model, optimizer=None):
    if split == 'train':
        model.train()
    else:
        model.eval()

    # crit = torch.nn.MSELoss()
    # crit_3d = FusionLoss(opt.device, opt.weight_3d, opt.weight_var)

    # crit_ocv = nn.BCEWithLogitsLoss()
    crit_ocv = nn.CrossEntropyLoss()

    # acc_idxs = data_loader.dataset.acc_idxs
    # edges = data_loader.dataset.edges
    # edges_3d = data_loader.dataset.edges_3d
    # shuffle_ref = data_loader.dataset.shuffle_ref
    # mean = data_loader.dataset.mean
    # std = data_loader.dataset.std
    # convert_eval_format = data_loader.dataset.convert_eval_format

    # Loss, Loss3D = AverageMeter(), AverageMeter()
    # Acc, MPJPE = AverageMeter(), AverageMeter()

    Loss_ocv, Acc_ocv = AverageMeter(), AverageMeter()

    data_time, batch_time = AverageMeter(), AverageMeter()
    preds = []
    time_str = ''

    nIters = len(data_loader)
    if opt.train_half:
        nIters = nIters / 2
    bar = Bar('{}'.format(opt.exp_id), max=nIters)

    end = time.time()
    for i, batch in enumerate(data_loader):
        if i >= nIters:
            break

        data_time.update(time.time() - end)
        # for k in batch:
        #   if k != 'meta':
        #     batch[k] = batch[k].cuda(device=opt.device, non_blocking=True)
        # gt_2d = batch['meta']['pts_crop'].cuda(
        #   device=opt.device, non_blocking=True).float() / opt.output_h

        img, ocv_gt, info = batch

        if i == 0:
            np.savez(split + '_debug.npz',
                     img=img.numpy(),
                     ocv_gt=ocv_gt.numpy(),
                     info=info)

        img = img.cuda(device=opt.device, non_blocking=True)
        ocv_gt = ocv_gt.cuda(device=opt.device, non_blocking=True)
        output = model(img)

        # loss = crit(output[-1]['hm'], batch['target'])
        # loss_3d = crit_3d(
        #   output[-1]['depth'], batch['reg_mask'], batch['reg_ind'],
        #   batch['reg_target'],gt_2d)
        # for k in range(opt.num_stacks - 1):
        #   loss += crit(output[k], batch['target'])
        #   loss_3d = crit_3d(
        #     output[-1]['depth'], batch['reg_mask'], batch['reg_ind'],
        #     batch['reg_target'], gt_2d)
        # loss += loss_3d
        # loss = crit_ocv(output, ocv_gt)
        loss = crit_ocv(output, torch.argmax(ocv_gt, 1))
        preds = torch.argmax(output, 1)

        if split == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # else:
        #   input_ = batch['input'].cpu().numpy().copy()
        #   input_[0] = flip(input_[0]).copy()[np.newaxis, ...]
        #   input_flip_var = torch.from_numpy(input_).cuda(
        #     device=opt.device, non_blocking=True)
        #   output_flip_ = model(input_flip_var)
        #   output_flip = shuffle_lr(
        #     flip(output_flip_[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref)
        #   output_flip = output_flip.reshape(
        #     1, opt.num_output, opt.output_h, opt.output_w)
        #   output_depth_flip = shuffle_lr(
        #     flip(output_flip_[-1]['depth'].detach().cpu().numpy()[0]), shuffle_ref)
        #   output_depth_flip = output_depth_flip.reshape(
        #     1, opt.num_output, opt.output_h, opt.output_w)
        #   output_flip = torch.from_numpy(output_flip).cuda(
        #     device=opt.device, non_blocking=True)
        #   output_depth_flip = torch.from_numpy(output_depth_flip).cuda(
        #     device=opt.device, non_blocking=True)
        #   output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2
        #   output[-1]['depth'] = (output[-1]['depth'] + output_depth_flip) / 2
        # pred = get_preds(output[-1]['hm'].detach().cpu().numpy())
        # preds.append(convert_eval_format(pred, conf, meta)[0])

        acc = accuracy_ocv(preds, torch.argmax(ocv_gt, 1))
        Loss_ocv.update(loss.item(), img.size(0))
        Acc_ocv.update(acc, img.size(0))
        # Loss.update(loss.item(), batch['input'].size(0))
        # Loss3D.update(loss_3d.item(), batch['input'].size(0))
        # Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(),
        #                     batch['target'].detach().cpu().numpy(), acc_idxs))
        # mpeje_batch, mpjpe_cnt = mpjpe(output[-1]['hm'].detach().cpu().numpy(),
        #                                output[-1]['depth'].detach().cpu().numpy(),
        #                                batch['meta']['gt_3d'].detach().numpy(),
        #                                convert_func=convert_eval_format)
        # MPJPE.update(mpeje_batch, mpjpe_cnt)

        batch_time.update(time.time() - end)
        end = time.time()
        if not opt.hide_data_time:
            time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \
                       ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)

        # Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\
        #              '|Loss {loss.avg:.5f} |Loss3D {loss_3d.avg:.5f}'\
        #              '|Acc {Acc.avg:.4f} |MPJPE {MPJPE.avg:.2f}'\
        #              '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td,
        #                                  eta=bar.eta_td, loss=Loss, Acc=Acc,
        #                                  split=split, time_str=time_str,
        # MPJPE=MPJPE, loss_3d=Loss3D)

        Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\
                     '|Loss_ocv {loss.avg:.5f}'\
                     '|Acc_ocv {Acc.avg:.4f}'\
                     '|loss_batch {loss_batch:.4f}'\
                     '|acc_batch {acc_batch:.4f}'\
                     '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td,
                                         eta=bar.eta_td, loss=Loss_ocv, Acc=Acc_ocv,
                                         loss_batch=loss.item(), acc_batch=acc,
                                         split=split, time_str=time_str)

        if opt.print_iter > 0:
            if i % opt.print_iter == 0:
                print('{}| {}'.format(opt.exp_id, Bar.suffix))
        else:
            bar.next()
        if opt.debug >= 2:
            gt = get_preds(batch['target'].cpu().numpy()) * 4
            pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) * 4
            debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges)
            img = (batch['input'][0].cpu().numpy().transpose(1, 2, 0) * std +
                   mean) * 256
            img = img.astype(np.uint8).copy()
            debugger.add_img(img)
            debugger.add_mask(
                cv2.resize(batch['target'][0].cpu().numpy().max(axis=0),
                           (opt.input_w, opt.input_h)), img, 'target')
            debugger.add_mask(
                cv2.resize(
                    output[-1]['hm'][0].detach().cpu().numpy().max(axis=0),
                    (opt.input_w, opt.input_h)), img, 'pred')
            debugger.add_point_2d(gt[0], (0, 0, 255))
            debugger.add_point_2d(pred[0], (255, 0, 0))
            debugger.add_point_3d(batch['meta']['gt_3d'].detach().numpy()[0],
                                  'r',
                                  edges=edges_3d)
            pred_3d = get_preds_3d(output[-1]['hm'].detach().cpu().numpy(),
                                   output[-1]['depth'].detach().cpu().numpy())
            debugger.add_point_3d(convert_eval_format(pred_3d[0]),
                                  'b',
                                  edges=edges_3d)
            debugger.show_all_imgs(pause=False)
            debugger.show_3d()

        # pdb.set_trace()

    bar.finish()
    # return {'loss': Loss.avg,
    #         'acc': Acc.avg,
    #         'mpjpe': MPJPE.avg,
    #         'time': bar.elapsed_td.total_seconds() / 60.}, preds
    return {
        'loss': Loss_ocv.avg,
        'acc': Acc_ocv.avg,
        'time': bar.elapsed_td.total_seconds() / 60.
    }, preds
Beispiel #20
0
    def run(self, image_or_path_or_tensor, meta=None):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, tot_time = 0, 0
        debugger = Debugger(dataset=self.opt.dataset,
                            ipynb=(self.opt.debug == 3),
                            theme=self.opt.debugger_theme)
        start_time = time.time()
        pre_processed = False
        if isinstance(image_or_path_or_tensor, np.ndarray):
            image = image_or_path_or_tensor
        elif type(image_or_path_or_tensor) == type(''):
            image = cv2.imread(image_or_path_or_tensor)
        else:
            image = image_or_path_or_tensor['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        loaded_time = time.time()
        load_time += (loaded_time - start_time)

        detections = []
        for scale in self.scales:
            scale_start_time = time.time()
            if not pre_processed:
                images, meta = self.pre_process(image, scale, meta)
            else:
                # import pdb; pdb.set_trace()
                images = pre_processed_images['images'][scale][0]
                meta = pre_processed_images['meta'][scale]
                meta = {k: v.numpy()[0] for k, v in meta.items()}
            images = images.to(self.opt.device)
            torch.cuda.synchronize()
            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time

            output, dets, forward_time = self.process(images, return_time=True)

            torch.cuda.synchronize()
            net_time += forward_time - pre_process_time
            decode_time = time.time()
            dec_time += decode_time - forward_time

            if self.opt.debug >= 2:
                self.debug(debugger, images, dets, output, scale)

            dets = self.post_process(dets, meta, scale)
            torch.cuda.synchronize()
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(dets)

        results = self.merge_outputs(detections)
        torch.cuda.synchronize()
        end_time = time.time()
        merge_time += end_time - post_process_time
        tot_time += end_time - start_time

        if self.opt.debug >= 1:
            # print('--->>> base_detector run show_results')
            # img_ = self.show_results(debugger, image, results)
            debugger.add_img(image, img_id='multi_pose')
            #---------------------------------------------------------------- NMS
            nms_dets_ = []
            for bbox in results[1]:
                if bbox[4] > self.opt.vis_thresh:
                    nms_dets_.append(
                        (bbox[0], bbox[1], bbox[2], bbox[3], bbox[4]))
            if len(nms_dets_) > 0:
                keep_ = py_cpu_nms(np.array(nms_dets_), thresh=0.35)
                # print('keep_ : ',nms_dets_,keep_)
            #----------------------------------------------------------------
            faces_boxes = []
            person_boxes = []
            idx = 0
            for bbox in results[1]:
                if bbox[4] > self.opt.vis_thresh:
                    idx += 1
                    if (idx - 1) not in keep_:
                        continue
                    # 绘制目标物体
                    # print('------------------>>>add_coco_bbox')
                    debugger.add_coco_bbox(bbox[:4],
                                           0,
                                           bbox[4],
                                           img_id='multi_pose')
                    face_pts = debugger.add_coco_hp(bbox[5:39],
                                                    img_id='multi_pose')
                    # print('--------------------------------->>>>>>>>>>oou')
                    if len(face_pts) == 5:
                        # print('change box')

                        person_boxes.append([
                            int(bbox[0]),
                            int(bbox[1]),
                            int(bbox[2]),
                            int(bbox[3]), bbox[4]
                        ])

                        x_min = min(
                            [face_pts[i][0] for i in range(len(face_pts))])
                        y_min = min(
                            [face_pts[i][1] for i in range(len(face_pts))])
                        x_max = max(
                            [face_pts[i][0] for i in range(len(face_pts))])
                        y_max = max(
                            [face_pts[i][1] for i in range(len(face_pts))])

                        edge = abs(x_max - x_min)
                        #
                        bbox_x1 = int(max(0, (x_min - edge * 0.05)))
                        bbox_x2 = int(
                            min(image.shape[1] - 1, (x_max + edge * 0.05)))
                        bbox_y1 = int(max(0, (y_min - edge * 0.32)))
                        bbox_y2 = int(
                            min(image.shape[0] - 1, (y_max + edge * 0.55)))
                        # print('ppppp',face_pts,x1)
                        # if ((bbox_x2-bbox_x1)*(bbox_y2-bbox_y1))>100:
                        faces_boxes.append(
                            [bbox_x1, bbox_y1, bbox_x2, bbox_y2, 1.])
                        # cv2.rectangle(image,(bbox_x1,bbox_y1),(bbox_x2,bbox_y2),(0,255,255),2)
            # print('-------->>> show_results debugger')
            img_ = debugger.show_all_imgs(pause=self.pause)

        return img_, {
            'results': results,
            'tot': tot_time,
            'load': load_time,
            'pre': pre_time,
            'net': net_time,
            'dec': dec_time,
            'post': post_time,
            'merge': merge_time
        }, faces_boxes, person_boxes
Beispiel #21
0
    def debug(self, batch, output, iter_id, dataset):
        opt = self.opt
        if 'pre_hm' in batch:
            output.update({'pre_hm': batch['pre_hm']})
        dets = fusion_decode(output, K=opt.K, opt=opt)
        for k in dets:
            dets[k] = dets[k].detach().cpu().numpy()
        dets_gt = batch['meta']['gt_det']
        for i in range(1):
            debugger = Debugger(opt=opt, dataset=dataset)
            img = batch['image'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * dataset.std + dataset.mean) * 255.), 0,
                          255).astype(np.uint8)
            pred = debugger.gen_colormap(
                output['hm'][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img,
                                   pred,
                                   'pred_hm',
                                   trans=self.opt.hm_transparency)
            debugger.add_blend_img(img,
                                   gt,
                                   'gt_hm',
                                   trans=self.opt.hm_transparency)

            debugger.add_img(img, img_id='img')

            # show point clouds
            if opt.pointcloud:
                pc_2d = batch['pc_2d'][i].detach().cpu().numpy()
                pc_3d = None
                pc_N = batch['pc_N'][i].detach().cpu().numpy()
                debugger.add_img(img, img_id='pc')
                debugger.add_pointcloud(pc_2d, pc_N, img_id='pc')

                if 'pc_hm' in opt.pc_feat_lvl:
                    channel = opt.pc_feat_channels['pc_hm']
                    pc_hm = debugger.gen_colormap(
                        batch['pc_hm'][i][channel].unsqueeze(
                            0).detach().cpu().numpy())
                    debugger.add_blend_img(img,
                                           pc_hm,
                                           'pc_hm',
                                           trans=self.opt.hm_transparency)
                if 'pc_dep' in opt.pc_feat_lvl:
                    channel = opt.pc_feat_channels['pc_dep']
                    pc_hm = batch['pc_hm'][i][channel].unsqueeze(
                        0).detach().cpu().numpy()
                    pc_dep = debugger.add_overlay_img(img, pc_hm, 'pc_dep')

            if 'pre_img' in batch:
                pre_img = batch['pre_img'][i].detach().cpu().numpy().transpose(
                    1, 2, 0)
                pre_img = np.clip(
                    ((pre_img * dataset.std + dataset.mean) * 255), 0,
                    255).astype(np.uint8)
                debugger.add_img(pre_img, 'pre_img_pred')
                debugger.add_img(pre_img, 'pre_img_gt')
                if 'pre_hm' in batch:
                    pre_hm = debugger.gen_colormap(
                        batch['pre_hm'][i].detach().cpu().numpy())
                    debugger.add_blend_img(pre_img,
                                           pre_hm,
                                           'pre_hm',
                                           trans=self.opt.hm_transparency)

            debugger.add_img(img, img_id='out_pred')
            if 'ltrb_amodal' in opt.heads:
                debugger.add_img(img, img_id='out_pred_amodal')
                debugger.add_img(img, img_id='out_gt_amodal')

            # Predictions
            for k in range(len(dets['scores'][i])):
                if dets['scores'][i, k] > opt.vis_thresh:
                    debugger.add_coco_bbox(dets['bboxes'][i, k] *
                                           opt.down_ratio,
                                           dets['clses'][i, k],
                                           dets['scores'][i, k],
                                           img_id='out_pred')

                    if 'ltrb_amodal' in opt.heads:
                        debugger.add_coco_bbox(dets['bboxes_amodal'][i, k] *
                                               opt.down_ratio,
                                               dets['clses'][i, k],
                                               dets['scores'][i, k],
                                               img_id='out_pred_amodal')

                    if 'hps' in opt.heads and int(dets['clses'][i, k]) == 0:
                        debugger.add_coco_hp(dets['hps'][i, k] *
                                             opt.down_ratio,
                                             img_id='out_pred')

                    if 'tracking' in opt.heads:
                        debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio,
                                           dets['tracking'][i][k] *
                                           opt.down_ratio,
                                           img_id='out_pred')
                        debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio,
                                           dets['tracking'][i][k] *
                                           opt.down_ratio,
                                           img_id='pre_img_pred')

            # Ground truth
            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt['scores'][i])):
                if dets_gt['scores'][i][k] > opt.vis_thresh:
                    if 'dep' in dets_gt.keys():
                        dist = dets_gt['dep'][i][k]
                        if len(dist) > 1:
                            dist = dist[0]
                    else:
                        dist = -1
                    debugger.add_coco_bbox(dets_gt['bboxes'][i][k] *
                                           opt.down_ratio,
                                           dets_gt['clses'][i][k],
                                           dets_gt['scores'][i][k],
                                           img_id='out_gt',
                                           dist=dist)

                    if 'ltrb_amodal' in opt.heads:
                        debugger.add_coco_bbox(dets_gt['bboxes_amodal'][i, k] *
                                               opt.down_ratio,
                                               dets_gt['clses'][i, k],
                                               dets_gt['scores'][i, k],
                                               img_id='out_gt_amodal')

                    if 'hps' in opt.heads and \
                            (int(dets['clses'][i, k]) == 0):
                        debugger.add_coco_hp(dets_gt['hps'][i][k] *
                                             opt.down_ratio,
                                             img_id='out_gt')

                    if 'tracking' in opt.heads:
                        debugger.add_arrow(
                            dets_gt['cts'][i][k] * opt.down_ratio,
                            dets_gt['tracking'][i][k] * opt.down_ratio,
                            img_id='out_gt')
                        debugger.add_arrow(
                            dets_gt['cts'][i][k] * opt.down_ratio,
                            dets_gt['tracking'][i][k] * opt.down_ratio,
                            img_id='pre_img_gt')

            if 'hm_hp' in opt.heads:
                pred = debugger.gen_colormap_hp(
                    output['hm_hp'][i].detach().cpu().numpy())
                gt = debugger.gen_colormap_hp(
                    batch['hm_hp'][i].detach().cpu().numpy())
                debugger.add_blend_img(img,
                                       pred,
                                       'pred_hmhp',
                                       trans=self.opt.hm_transparency)
                debugger.add_blend_img(img,
                                       gt,
                                       'gt_hmhp',
                                       trans=self.opt.hm_transparency)

            if 'rot' in opt.heads and 'dim' in opt.heads and 'dep' in opt.heads:
                dets_gt = {k: dets_gt[k].cpu().numpy() for k in dets_gt}
                calib = batch['meta']['calib'].detach().numpy() \
                    if 'calib' in batch['meta'] else None
                det_pred = generic_post_process(
                    opt, dets, batch['meta']['c'].cpu().numpy(),
                    batch['meta']['s'].cpu().numpy(), output['hm'].shape[2],
                    output['hm'].shape[3], self.opt.num_classes, calib)
                det_gt = generic_post_process(opt,
                                              dets_gt,
                                              batch['meta']['c'].cpu().numpy(),
                                              batch['meta']['s'].cpu().numpy(),
                                              output['hm'].shape[2],
                                              output['hm'].shape[3],
                                              self.opt.num_classes,
                                              calib,
                                              is_gt=True)

                debugger.add_3d_detection(batch['meta']['img_path'][i],
                                          batch['meta']['flipped'][i],
                                          det_pred[i],
                                          calib[i],
                                          vis_thresh=opt.vis_thresh,
                                          img_id='add_pred')
                debugger.add_3d_detection(batch['meta']['img_path'][i],
                                          batch['meta']['flipped'][i],
                                          det_gt[i],
                                          calib[i],
                                          vis_thresh=opt.vis_thresh,
                                          img_id='add_gt')

                pc_3d = None
                if opt.pointcloud:
                    pc_3d = batch['pc_3d'].cpu().numpy()

                debugger.add_bird_views(det_pred[i],
                                        det_gt[i],
                                        vis_thresh=opt.vis_thresh,
                                        img_id='bird_pred_gt',
                                        pc_3d=pc_3d,
                                        show_velocity=opt.show_velocity)
                debugger.add_bird_views([],
                                        det_gt[i],
                                        vis_thresh=opt.vis_thresh,
                                        img_id='bird_gt',
                                        pc_3d=pc_3d,
                                        show_velocity=opt.show_velocity)

            if opt.debug == 4:
                debugger.save_all_imgs(opt.debug_dir,
                                       prefix='{}'.format(iter_id))
            else:
                debugger.show_all_imgs(pause=True)
Beispiel #22
0
  def run(self, image_or_path_or_tensor, meta=None):
    load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
    merge_time, tot_time = 0, 0
    debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug==3),
                        theme=self.opt.debugger_theme)
    start_time = time.time()
    pre_processed = False
    if isinstance(image_or_path_or_tensor, np.ndarray):
      image = image_or_path_or_tensor
    elif type(image_or_path_or_tensor) == type (''):
      image = cv2.imread(image_or_path_or_tensor)
    else:
      image = image_or_path_or_tensor['image'][0].numpy()
      pre_processed_images = image_or_path_or_tensor
      pre_processed = True

    loaded_time = time.time()
    load_time += (loaded_time - start_time)

    detections = []
    for scale in self.scales:
      scale_start_time = time.time()
      if not pre_processed:
        images, meta = self.pre_process(image, scale, meta)
      else:
        # import pdb; pdb.set_trace()
        images = pre_processed_images['images'][scale][0]
        meta = pre_processed_images['meta'][scale]
        meta = {k: v.numpy()[0] for k, v in meta.items()}
      images = images.to(self.opt.device)
      torch.cuda.synchronize()
      pre_process_time = time.time()
      pre_time += pre_process_time - scale_start_time

      output, dets, forward_time = self.process(images, return_time=True)

      torch.cuda.synchronize()
      net_time += forward_time - pre_process_time
      decode_time = time.time()
      dec_time += decode_time - forward_time

      if self.opt.debug >= 2:
        self.debug(debugger, images, dets, output, scale)

      dets = self.post_process(dets, meta, scale)
      torch.cuda.synchronize()
      post_process_time = time.time()
      post_time += post_process_time - decode_time

      detections.append(dets)

    results = self.merge_outputs(detections)
    torch.cuda.synchronize()
    end_time = time.time()
    merge_time += end_time - post_process_time
    tot_time += end_time - start_time

    if self.opt.debug >= 1:
      # print('--->>> base_detector run show_results')
      # img_ = self.show_results(debugger, image, results)
      debugger.add_img(image, img_id='multi_pose')
      for bbox in results[1]:
        if bbox[4] > self.opt.vis_thresh:
          # 绘制目标物体
          # print('------------------>>>add_coco_bbox')
          debugger.add_coco_bbox(bbox[:4], 0, bbox[4], img_id='multi_pose')
          debugger.add_coco_hp(bbox[5:39], img_id='multi_pose')
      # print('-------->>> show_results debugger')
      img_ = debugger.show_all_imgs(pause=self.pause)

    return img_,{'results': results, 'tot': tot_time, 'load': load_time,
            'pre': pre_time, 'net': net_time, 'dec': dec_time,
            'post': post_time, 'merge': merge_time}
    def debug(self, batch, output, iter_id, dataset):
        opt = self.opt
        if 'pre_hm' in batch:
            output.update({'pre_hm': batch['pre_hm']})
        dets = generic_decode(output, K=opt.K, opt=opt)
        for k in dets:
            dets[k] = dets[k].detach().cpu().numpy()
        dets_gt = batch['meta']['gt_det']
        for i in range(1):
            debugger = Debugger(opt=opt, dataset=dataset)
            img = batch['image'][i].detach().cpu().numpy().transpose(1, 2, 0)
            img = np.clip(((img * dataset.std + dataset.mean) * 255.), 0,
                          255).astype(np.uint8)
            pred = debugger.gen_colormap(
                output['hm'][i].detach().cpu().numpy())
            gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            debugger.add_blend_img(img, pred, 'pred_hm')
            debugger.add_blend_img(img, gt, 'gt_hm')

            if 'pre_img' in batch:
                pre_img = batch['pre_img'][i].detach().cpu().numpy().transpose(
                    1, 2, 0)
                pre_img = np.clip(
                    ((pre_img * dataset.std + dataset.mean) * 255), 0,
                    255).astype(np.uint8)
                debugger.add_img(pre_img, 'pre_img_pred')
                debugger.add_img(pre_img, 'pre_img_gt')
                if 'pre_hm' in batch:
                    pre_hm = debugger.gen_colormap(
                        batch['pre_hm'][i].detach().cpu().numpy())
                    debugger.add_blend_img(pre_img, pre_hm, 'pre_hm')

            debugger.add_img(img, img_id='out_pred')
            if 'ltrb_amodal' in opt.heads:
                debugger.add_img(img, img_id='out_pred_amodal')
                debugger.add_img(img, img_id='out_gt_amodal')

            # Predictions
            for k in range(len(dets['scores'][i])):
                if dets['scores'][i, k] > opt.vis_thresh:
                    debugger.add_coco_bbox(dets['bboxes'][i, k] *
                                           opt.down_ratio,
                                           dets['clses'][i, k],
                                           dets['scores'][i, k],
                                           img_id='out_pred')

                    if 'ltrb_amodal' in opt.heads:
                        debugger.add_coco_bbox(dets['bboxes_amodal'][i, k] *
                                               opt.down_ratio,
                                               dets['clses'][i, k],
                                               dets['scores'][i, k],
                                               img_id='out_pred_amodal')

                    if 'hps' in opt.heads and int(dets['clses'][i, k]) == 0:
                        debugger.add_coco_hp(dets['hps'][i, k] *
                                             opt.down_ratio,
                                             img_id='out_pred')

                    if 'tracking' in opt.heads:
                        debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio,
                                           dets['tracking'][i][k] *
                                           opt.down_ratio,
                                           img_id='out_pred')
                        debugger.add_arrow(dets['cts'][i][k] * opt.down_ratio,
                                           dets['tracking'][i][k] *
                                           opt.down_ratio,
                                           img_id='pre_img_pred')

            # Ground truth
            debugger.add_img(img, img_id='out_gt')
            for k in range(len(dets_gt['scores'][i])):
                if dets_gt['scores'][i][k] > opt.vis_thresh:
                    debugger.add_coco_bbox(dets_gt['bboxes'][i][k] *
                                           opt.down_ratio,
                                           dets_gt['clses'][i][k],
                                           dets_gt['scores'][i][k],
                                           img_id='out_gt')

                    if 'ltrb_amodal' in opt.heads:
                        debugger.add_coco_bbox(dets_gt['bboxes_amodal'][i, k] *
                                               opt.down_ratio,
                                               dets_gt['clses'][i, k],
                                               dets_gt['scores'][i, k],
                                               img_id='out_gt_amodal')

                    if 'hps' in opt.heads and \
                      (int(dets['clses'][i, k]) == 0):
                        debugger.add_coco_hp(dets_gt['hps'][i][k] *
                                             opt.down_ratio,
                                             img_id='out_gt')

                    if 'tracking' in opt.heads:
                        debugger.add_arrow(
                            dets_gt['cts'][i][k] * opt.down_ratio,
                            dets_gt['tracking'][i][k] * opt.down_ratio,
                            img_id='out_gt')
                        debugger.add_arrow(
                            dets_gt['cts'][i][k] * opt.down_ratio,
                            dets_gt['tracking'][i][k] * opt.down_ratio,
                            img_id='pre_img_gt')

            if 'hm_hp' in opt.heads:
                pred = debugger.gen_colormap_hp(
                    output['hm_hp'][i].detach().cpu().numpy())
                gt = debugger.gen_colormap_hp(
                    batch['hm_hp'][i].detach().cpu().numpy())
                debugger.add_blend_img(img, pred, 'pred_hmhp')
                debugger.add_blend_img(img, gt, 'gt_hmhp')

            if 'rot' in opt.heads and 'dim' in opt.heads and 'dep' in opt.heads:
                dets_gt = {k: dets_gt[k].cpu().numpy() for k in dets_gt}
                calib = batch['meta']['calib'].detach().numpy() \
                        if 'calib' in batch['meta'] else None
                det_pred = generic_post_process(
                    opt, dets, batch['meta']['c'].cpu().numpy(),
                    batch['meta']['s'].cpu().numpy(), output['hm'].shape[2],
                    output['hm'].shape[3], self.opt.num_classes, calib)
                det_gt = generic_post_process(opt, dets_gt,
                                              batch['meta']['c'].cpu().numpy(),
                                              batch['meta']['s'].cpu().numpy(),
                                              output['hm'].shape[2],
                                              output['hm'].shape[3],
                                              self.opt.num_classes, calib)

                debugger.add_3d_detection(batch['meta']['img_path'][i],
                                          batch['meta']['flipped'][i],
                                          det_pred[i],
                                          calib[i],
                                          vis_thresh=opt.vis_thresh,
                                          img_id='add_pred')
                debugger.add_3d_detection(batch['meta']['img_path'][i],
                                          batch['meta']['flipped'][i],
                                          det_gt[i],
                                          calib[i],
                                          vis_thresh=opt.vis_thresh,
                                          img_id='add_gt')
                debugger.add_bird_views(det_pred[i],
                                        det_gt[i],
                                        vis_thresh=opt.vis_thresh,
                                        img_id='bird_pred_gt')

            if opt.debug == 4:
                debugger.save_all_imgs(opt.debug_dir,
                                       prefix='{}'.format(iter_id))
            else:
                debugger.show_all_imgs(pause=True)
def run_one_scn(demo_scn, demo_dir, opt):
    Detector = detector_factory[opt.task]
    detector = Detector(opt)

    basename = os.path.basename(demo_scn)
    basename = basename.replace('.scn', '')
    basename = basename.replace('.svs', '')
    # basename = basename.replace(' ', '-')
    working_dir = os.path.join(demo_dir, basename)

    xml_file = os.path.join(working_dir, '%s.xml' % (basename))
    if os.path.exists(xml_file):
        return

    patch_2d_dir, simg_big, simg = scn_to_patchs(demo_scn, working_dir, opt)

    if os.path.isdir(patch_2d_dir):
        image_names = []
        ls = os.listdir(patch_2d_dir)
        for file_name in sorted(ls):
            ext = file_name[file_name.rfind('.') + 1:].lower()
            if ext in image_ext:
                image_names.append(os.path.join(patch_2d_dir, file_name))
    else:
        image_names = [patch_2d_dir]

    detect_all = None
    count = 1
    for (image_name) in image_names:
        ret = detector.run(image_name)
        results = ret['results']
        res_strs = os.path.basename(image_name).replace('.png',
                                                        '').split('-x-')
        lv_str = res_strs[0]
        patch_start_x = np.int(res_strs[3])
        patch_start_y = np.int(res_strs[4])

        if opt.filter_boarder:
            output_h = opt.input_h  # hard coded
            output_w = opt.input_w  # hard coded
            for j in range(1, opt.num_classes + 1):
                for i in range(len(results[j])):
                    cp = [0, 0]
                    cp[0] = results[j][i][0]
                    cp[1] = results[j][i][1]
                    cr = results[j][i][2]
                    if cp[0] - cr < 0 or cp[0] + cr > output_w:
                        results[j][i][3] = 0
                        continue
                    if cp[1] - cr < 0 or cp[1] + cr > output_h:
                        results[j][i][3] = 0
                        continue

        for j in range(1, opt.num_classes + 1):
            for circle in results[j]:
                if circle[3] > opt.vis_thresh:
                    circle_out = circle[:]
                    circle_out[0] = circle[0] + patch_start_x
                    circle_out[1] = circle[1] + patch_start_y
                    if detect_all is None:
                        detect_all = [circle]
                    else:
                        detect_all = np.append(detect_all, [circle], axis=0)

        time_str = ''
        for stat in time_stats:
            time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
        print(' %d/%d %s' % (count, len(image_names), time_str))
        count = count + 1

    num_classes = 1
    scales = 1
    max_per_image = 2000
    run_nms = True
    results2 = merge_outputs(num_classes, max_per_image, run_nms, detect_all)
    detect_all = results2[1]

    # detections = []
    # det_clss = {}
    # det_clss[1] = detect_all
    # detections.append(det_clss)
    # detect_all = merge_outputs(opt, detections)

    if not simg_big is None:
        debugger = Debugger(dataset=opt.dataset,
                            ipynb=(opt.debug == 3),
                            theme=opt.debugger_theme)
        debugger.add_img(simg_big, img_id='')
        debugger.save_all_imgs(working_dir,
                               prefix='%s' % (basename))  # save original image
        json_file = os.path.join(working_dir, '%s.json' % (basename))
        debugger.save_detect_all_to_json(simg_big, detect_all, json_file, opt,
                                         simg)

        for circle in detect_all:
            debugger.add_coco_circle(circle[:3],
                                     circle[-1],
                                     circle[3],
                                     img_id='')
        debugger.save_all_imgs(working_dir, prefix='%s_overlay' %
                               (basename))  # save original overlay

    # # make open slide file
    # with open("/media/huoy1/48EAE4F7EAE4E264/Projects/detection/test_demo/Case 01-3_manual_good.xml") as fd:
    #     doc = xmltodict.parse(fd.read())

    try:
        start_x = np.int(simg.properties['openslide.bounds-x'])
        start_y = np.int(simg.properties['openslide.bounds-y'])
        width_x = np.int(simg.properties['openslide.bounds-width'])
        height_y = np.int(simg.properties['openslide.bounds-height'])
    except:
        start_x = 0
        start_y = 0
        width_x = np.int(simg.properties['aperio.OriginalWidth'])
        height_y = np.int(simg.properties['aperio.OriginalHeight'])
    down_rate = simg.level_downsamples[opt.lv]

    detect_json = []
    doc_out = {}
    doc_out['Annotations'] = {}
    doc_out['Annotations']['@MicronsPerPixel'] = simg.properties[
        'openslide.mpp-x']
    doc_out['Annotations']['@Level'] = opt.lv
    doc_out['Annotations']['@DownRate'] = down_rate
    doc_out['Annotations']['@start_x'] = start_x
    doc_out['Annotations']['@start_y'] = start_y
    doc_out['Annotations']['@width_x'] = width_x
    doc_out['Annotations']['@height_y'] = height_y
    if 'leica.device-model' in simg.properties:
        doc_out['Annotations']['@Device'] = 'leica.device-model'
    else:
        doc_out['Annotations']['@Device'] = 'aperio.Filename'
    doc_out['Annotations']['Annotation'] = {}
    doc_out['Annotations']['Annotation']['@Id'] = '1'
    doc_out['Annotations']['Annotation']['@Name'] = ''
    doc_out['Annotations']['Annotation']['@ReadOnly'] = '0'
    doc_out['Annotations']['Annotation']['@LineColorReadOnly'] = '0'
    doc_out['Annotations']['Annotation']['@Incremental'] = '0'
    doc_out['Annotations']['Annotation']['@Type'] = '4'
    doc_out['Annotations']['Annotation']['@LineColor'] = '65280'
    doc_out['Annotations']['Annotation']['@Visible'] = '1'
    doc_out['Annotations']['Annotation']['@Selected'] = '1'
    doc_out['Annotations']['Annotation']['@MarkupImagePath'] = ''
    doc_out['Annotations']['Annotation']['@MacroName'] = ''
    doc_out['Annotations']['Annotation']['Attributes'] = {}
    doc_out['Annotations']['Annotation']['Attributes']['Attribute'] = {}
    doc_out['Annotations']['Annotation']['Attributes']['Attribute'][
        '@Name'] = 'glomerulus'
    doc_out['Annotations']['Annotation']['Attributes']['Attribute'][
        '@Id'] = '0'
    doc_out['Annotations']['Annotation']['Attributes']['Attribute'][
        '@Value'] = ''
    doc_out['Annotations']['Annotation']['Plots'] = None
    doc_out['Annotations']['Annotation']['Regions'] = {}
    doc_out['Annotations']['Annotation']['Regions'][
        'RegionAttributeHeaders'] = {}
    doc_out['Annotations']['Annotation']['Regions']['AttributeHeader'] = []
    doc_out['Annotations']['Annotation']['Regions']['Region'] = []

    for di in range(len(detect_all)):
        detect_one = detect_all[di]
        detect_dict = {}
        detect_dict['@Id'] = str(di + 1)
        detect_dict['@Type'] = '2'
        detect_dict['@Zoom'] = '0.5'
        detect_dict['@ImageLocation'] = ''
        detect_dict['@ImageFocus'] = '-1'
        detect_dict['@Length'] = '2909.1'
        detect_dict['@Area'] = '673460.1'
        detect_dict['@LengthMicrons'] = '727.3'
        detect_dict['@AreaMicrons'] = '42091.3'
        detect_dict['@Text'] = ('%.3f' % detect_one[3])
        detect_dict['@NegativeROA'] = '0'
        detect_dict['@InputRegionId'] = '0'
        detect_dict['@Analyze'] = '0'
        detect_dict['@DisplayId'] = str(di + 1)
        detect_dict['Attributes'] = None
        detect_dict['Vertices'] = '0'
        detect_dict['Vertices'] = {}
        detect_dict['Vertices']['Vertex'] = []

        if 'leica.device-model' in simg.properties:  #leica
            coord1 = {}
            coord1['@X'] = str(height_y -
                               (detect_one[1] - detect_one[2]) * down_rate)
            coord1['@Y'] = str((detect_one[0] - detect_one[2]) * down_rate)
            coord1['@Z'] = '0'
            coord2 = {}
            coord2['@X'] = str(
                height_y - (detect_one[1] + detect_one[2]) * down_rate)  # 左右
            coord2['@Y'] = str(
                (detect_one[0] + detect_one[2]) * down_rate)  # 上下
            coord2['@Z'] = '0'
            detect_dict['Vertices']['Vertex'].append(coord1)
            detect_dict['Vertices']['Vertex'].append(coord2)
        elif 'aperio.Filename' in simg.properties:
            coord1 = {}
            coord1['@X'] = str((detect_one[0] - detect_one[2]) * down_rate)
            coord1['@Y'] = str((detect_one[1] - detect_one[2]) * down_rate)
            coord1['@Z'] = '0'
            coord2 = {}
            coord2['@X'] = str(
                (detect_one[0] + detect_one[2]) * down_rate)  # 左右
            coord2['@Y'] = str(
                (detect_one[1] + detect_one[2]) * down_rate)  # 上下
            coord2['@Z'] = '0'
            detect_dict['Vertices']['Vertex'].append(coord1)
            detect_dict['Vertices']['Vertex'].append(coord2)
        doc_out['Annotations']['Annotation']['Regions']['Region'].append(
            detect_dict)

    out = xmltodict.unparse(doc_out, pretty=True)
    with open(xml_file, 'wb') as file:
        file.write(out.encode('utf-8'))

    os.system('rm -r "%s"' % (os.path.join(working_dir, '2d_patch')))

    return
def demo(opt):
    class_map = {1: 1, 2: 2}  # color for boundingbox
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
    # opt.debug = max(opt.debug, 1)
    Detector = detector_factory[opt.task]
    detector = Detector(opt)

    assert os.path.isdir(opt.demo), 'Need path to videos directory.'
    video_paths = [
        os.path.join(opt.demo, video_name)
        for video_name in os.listdir(opt.demo)
        if video_name.split('.')[-1] == 'mp4'
    ]
    # video_paths = [
    #     os.path.join(opt.demo, 'cam_2.mp4')
    # ]
    debugger = Debugger(dataset=opt.dataset, theme=opt.debugger_theme)

    for video_path in sorted(video_paths):
        print('video_name = ', video_path)

        bboxes = []
        video = cv2.VideoCapture(video_path)
        width, height = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(
            video.get(cv2.CAP_PROP_FRAME_HEIGHT))

        # pointer
        pts = []
        arr_name = os.path.basename(video_path).split('.')[0].split('_')
        cam_name = arr_name[0] + '_' + arr_name[1]
        print('cam_name = ', cam_name)
        with open('../ROIs/{}.txt'.format(cam_name)) as f:
            for line in f:
                pts.append([int(x) for x in line.split(',')])
        pts = np.array(pts)

        # make mask
        mask = np.zeros((height, width), np.uint8)
        cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA)

        bbox_video = cv2.VideoWriter(
            filename='/home/leducthinh0409/centernet_visualize_{}/'.format(
                opt.arch) + os.path.basename(video_path),
            fourcc=cv2.VideoWriter_fourcc(*'mp4v'),
            fps=float(30),
            frameSize=(width, height),
            isColor=True)

        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        for i in tqdm(range(num_frames)):
            _, img_pre = video.read()

            ## do bit-op
            dst = cv2.bitwise_and(img_pre, img_pre, mask=mask)
            ## add the white background
            bg = np.ones_like(img_pre, np.uint8) * 255
            cv2.bitwise_not(bg, bg, mask=mask)
            img = bg + dst

            ret = detector.run(img)
            bboxes.append(ret['results'])
            debugger.add_img(img, img_id='default')
            for class_id in class_map.keys():
                for bbox in ret['results'][class_id]:
                    if bbox[4] > opt.vis_thresh:
                        debugger.add_coco_bbox(bbox[:4],
                                               class_map[class_id],
                                               bbox[4],
                                               img_id='default')
            bbox_img = debugger.imgs['default']
            bbox_video.write(bbox_img)

        with open(
                '/home/leducthinh0409/bboxes_{}/'.format(opt.arch) +
                os.path.basename(video_path) + '.pkl', 'wb') as f:
            pickle.dump(bboxes, f)
Beispiel #26
0
def step(split, epoch, opt, data_loader, model, optimizer=None):
    if split == 'train':
        model.train()
    else:
        model.eval()

    crit = torch.nn.MSELoss()

    acc_idxs = data_loader.dataset.acc_idxs
    edges = data_loader.dataset.edges
    shuffle_ref = data_loader.dataset.shuffle_ref
    mean = data_loader.dataset.mean
    std = data_loader.dataset.std
    convert_eval_format = data_loader.dataset.convert_eval_format

    Loss, Acc = AverageMeter(), AverageMeter()
    data_time, batch_time = AverageMeter(), AverageMeter()
    preds = []

    nIters = len(data_loader)
    bar = Bar('{}'.format(opt.exp_id), max=nIters)

    end = time.time()
    for i, batch in enumerate(data_loader):
        data_time.update(time.time() - end)
        input, target, meta = batch['input'], batch['target'], batch['meta']
        input_var = input.cuda(device=opt.device, non_blocking=True)
        target_var = target.cuda(device=opt.device, non_blocking=True)

        output = model(input_var)

        loss = crit(output[-1]['hm'], target_var)
        for k in range(opt.num_stacks - 1):
            loss += crit(output[k], target_var)

        if split == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        else:
            input_ = input.cpu().numpy().copy()
            input_[0] = flip(input_[0]).copy()[np.newaxis, ...]
            input_flip_var = torch.from_numpy(input_).cuda(
                device=opt.device, non_blocking=True)
            output_flip = model(input_flip_var)
            output_flip = shuffle_lr(
                flip(output_flip[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref)
            output_flip = output_flip.reshape(
                1, opt.num_output, opt.output_h, opt.output_w)
            # output_ = (output[-1].detach().cpu().numpy() + output_flip) / 2
            output_flip = torch.from_numpy(output_flip).cuda(
                device=opt.device, non_blocking=True)
            output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2
            pred, conf = get_preds(
                output[-1]['hm'].detach().cpu().numpy(), True)
            preds.append(convert_eval_format(pred, conf, meta)[0])

        Loss.update(loss.detach().item(), input.size(0))
        Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(),
                            target_var.detach().cpu().numpy(), acc_idxs))

        batch_time.update(time.time() - end)
        end = time.time()
        if not opt.hide_data_time:
            time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \
                       ' |Net {bt.avg:.3f}s'.format(dt=data_time,
                                                    bt=batch_time)
        else:
            time_str = ''
        Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:}' \
                     '|Loss {loss.avg:.5f} |Acc {Acc.avg:.4f}'\
                     '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td,
                                         eta=bar.eta_td, loss=Loss, Acc=Acc,
                                         split=split, time_str=time_str)
        if opt.print_iter > 0:
            if i % opt.print_iter == 0:
                print('{}| {}'.format(opt.exp_id, Bar.suffix))
        else:
            bar.next()
        if opt.debug >= 2:
            gt, amb_idx = get_preds(target.cpu().numpy())
            gt *= 4
            pred, amb_idx = get_preds(output[-1]['hm'].detach().cpu().numpy())
            pred *= 4
            debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges)
            img = (input[0].numpy().transpose(1, 2, 0) * std + mean) * 256
            img = img.astype(np.uint8).copy()
            debugger.add_img(img)
            debugger.add_mask(
                cv2.resize(target[0].numpy().max(axis=0),
                           (opt.input_w, opt.input_h)), img, 'target')
            debugger.add_mask(
                cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0),
                           (opt.input_w, opt.input_h)), img, 'pred')
            debugger.add_point_2d(pred[0], (255, 0, 0))
            debugger.add_point_2d(gt[0], (0, 0, 255))
            debugger.show_all_imgs(pause=True)

    bar.finish()
    return {'loss': Loss.avg,
            'acc': Acc.avg,
            'time': bar.elapsed_td.total_seconds() / 60.}, preds
Beispiel #27
0
def step(split, epoch, opt, data_loader, model, optimizer=None):
  if split == 'train':
    model.train()
  else:
    model.eval()
  
  crit = torch.nn.MSELoss()
  crit_3d = FusionLoss(opt.device, opt.weight_3d, opt.weight_var)

  acc_idxs = data_loader.dataset.acc_idxs
  edges = data_loader.dataset.edges
  edges_3d = data_loader.dataset.edges_3d
  shuffle_ref = data_loader.dataset.shuffle_ref
  mean = data_loader.dataset.mean
  std = data_loader.dataset.std
  convert_eval_format = data_loader.dataset.convert_eval_format

  Loss, Loss3D = AverageMeter(), AverageMeter()
  Acc, MPJPE = AverageMeter(), AverageMeter()
  data_time, batch_time = AverageMeter(), AverageMeter()
  preds = []
  time_str = ''

  nIters = len(data_loader)
  bar = Bar('{}'.format(opt.exp_id), max=nIters)
  
  end = time.time()
  for i, batch in enumerate(data_loader):
    data_time.update(time.time() - end)
    for k in batch:
      if k != 'meta':
        batch[k] = batch[k].cuda(device=opt.device, non_blocking=True)
    gt_2d = batch['meta']['pts_crop'].cuda(
      device=opt.device, non_blocking=True).float() / opt.output_h
    output = model(batch['input'])

    loss = crit(output[-1]['hm'], batch['target'])
    loss_3d = crit_3d(
      output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], 
      batch['reg_target'],gt_2d)
    for k in range(opt.num_stacks - 1):
      loss += crit(output[k], batch['target'])
      loss_3d = crit_3d(
        output[-1]['depth'], batch['reg_mask'], batch['reg_ind'], 
        batch['reg_target'], gt_2d)
    loss += loss_3d

    if split == 'train':
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    else:
      input_ = batch['input'].cpu().numpy().copy()
      input_[0] = flip(input_[0]).copy()[np.newaxis, ...]
      input_flip_var = torch.from_numpy(input_).cuda(
        device=opt.device, non_blocking=True)
      output_flip_ = model(input_flip_var)
      output_flip = shuffle_lr(
        flip(output_flip_[-1]['hm'].detach().cpu().numpy()[0]), shuffle_ref)
      output_flip = output_flip.reshape(
        1, opt.num_output, opt.output_h, opt.output_w)
      output_depth_flip = shuffle_lr(
        flip(output_flip_[-1]['depth'].detach().cpu().numpy()[0]), shuffle_ref)
      output_depth_flip = output_depth_flip.reshape(
        1, opt.num_output, opt.output_h, opt.output_w)
      output_flip = torch.from_numpy(output_flip).cuda(
        device=opt.device, non_blocking=True)
      output_depth_flip = torch.from_numpy(output_depth_flip).cuda(
        device=opt.device, non_blocking=True)
      output[-1]['hm'] = (output[-1]['hm'] + output_flip) / 2
      output[-1]['depth'] = (output[-1]['depth'] + output_depth_flip) / 2
      # pred = get_preds(output[-1]['hm'].detach().cpu().numpy())
      # preds.append(convert_eval_format(pred, conf, meta)[0])
    
    Loss.update(loss.item(), batch['input'].size(0))
    Loss3D.update(loss_3d.item(), batch['input'].size(0))
    Acc.update(accuracy(output[-1]['hm'].detach().cpu().numpy(), 
                        batch['target'].detach().cpu().numpy(), acc_idxs))
    mpeje_batch, mpjpe_cnt = mpjpe(output[-1]['hm'].detach().cpu().numpy(),
                                   output[-1]['depth'].detach().cpu().numpy(),
                                   batch['meta']['gt_3d'].detach().numpy(),
                                   convert_func=convert_eval_format)
    MPJPE.update(mpeje_batch, mpjpe_cnt)
   
    batch_time.update(time.time() - end)
    end = time.time()
    if not opt.hide_data_time:
      time_str = ' |Data {dt.avg:.3f}s({dt.val:.3f}s)' \
                 ' |Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
      
    Bar.suffix = '{split}: [{0}][{1}/{2}] |Total {total:} |ETA {eta:} '\
                 '|Loss {loss.avg:.5f} |Loss3D {loss_3d.avg:.5f}'\
                 '|Acc {Acc.avg:.4f} |MPJPE {MPJPE.avg:.2f}'\
                 '{time_str}'.format(epoch, i, nIters, total=bar.elapsed_td, 
                                     eta=bar.eta_td, loss=Loss, Acc=Acc, 
                                     split=split, time_str=time_str,
                                     MPJPE=MPJPE, loss_3d=Loss3D)
    if opt.print_iter > 0:
      if i % opt.print_iter == 0:
        print('{}| {}'.format(opt.exp_id, Bar.suffix))
    else:
      bar.next()
    if opt.debug >= 2:
      gt = get_preds(batch['target'].cpu().numpy()) * 4
      pred = get_preds(output[-1]['hm'].detach().cpu().numpy()) * 4
      debugger = Debugger(ipynb=opt.print_iter > 0, edges=edges)
      img = (
        batch['input'][0].cpu().numpy().transpose(1, 2, 0) * std + mean) * 256
      img = img.astype(np.uint8).copy()
      debugger.add_img(img)
      debugger.add_mask(
        cv2.resize(batch['target'][0].cpu().numpy().max(axis=0), 
                   (opt.input_w, opt.input_h)), img, 'target')
      debugger.add_mask(
        cv2.resize(output[-1]['hm'][0].detach().cpu().numpy().max(axis=0), 
                   (opt.input_w, opt.input_h)), img, 'pred')
      debugger.add_point_2d(gt[0], (0, 0, 255))
      debugger.add_point_2d(pred[0], (255, 0, 0))
      debugger.add_point_3d(
        batch['meta']['gt_3d'].detach().numpy()[0], 'r', edges=edges_3d)
      pred_3d = get_preds_3d(output[-1]['hm'].detach().cpu().numpy(), 
                             output[-1]['depth'].detach().cpu().numpy())
      debugger.add_point_3d(convert_eval_format(pred_3d[0]), 'b',edges=edges_3d)
      debugger.show_all_imgs(pause=False)
      debugger.show_3d()

  bar.finish()
  return {'loss': Loss.avg, 
          'acc': Acc.avg, 
          'mpjpe': MPJPE.avg,
          'time': bar.elapsed_td.total_seconds() / 60.}, preds
Beispiel #28
0
    def train(self, cfg):
        # 设置gpu环境,考虑单卡多卡情况
        gpus_str = ''
        if isinstance(cfg.gpus, (list, tuple)):
            cfg.gpus = [int(i) for i in cfg.gpus]
            for s in cfg.gpus:
                gpus_str += str(s) + ','
            gpus_str = gpus_str[:-1]
        else:
            gpus_str = str(int(cfg.gpus))
            cfg.gpus = [int(cfg.gpus)]
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus_str
        cfg.gpus = [i for i in range(len(cfg.gpus))
                    ] if cfg.gpus[0] >= 0 else [-1]

        # 设置log
        model_dir = os.path.join(cfg.save_dir, cfg.id)
        debug_dir = os.path.join(model_dir, 'debug')
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        if not os.path.exists(debug_dir):
            os.makedirs(debug_dir)
        logger = setup_logger(cfg.id, os.path.join(model_dir, 'log'))
        if USE_TENSORBOARD:
            writer = tensorboardX.SummaryWriter(
                log_dir=os.path.join(model_dir, 'log'))
        logger.info(cfg)

        gpus = cfg.gpus
        device = torch.device('cpu' if gpus[0] < 0 else 'cuda')
        lr = cfg.lr
        lr_step = cfg.lr_step
        num_epochs = cfg.num_epochs
        val_step = cfg.val_step
        sample_size = cfg.sample_size

        # 设置数据集
        dataset = YOLO(cfg.data_dir,
                       cfg.hflip,
                       cfg.vflip,
                       cfg.rotation,
                       cfg.scale,
                       cfg.shear,
                       opt=cfg,
                       split='train')
        names = dataset.class_name
        std = dataset.std
        mean = dataset.mean
        # 用数据集类别数设置预测网络
        cfg.setup_head(dataset)
        trainloader = DataLoader(dataset,
                                 batch_size=cfg.batch_size,
                                 shuffle=True,
                                 num_workers=cfg.num_workers,
                                 pin_memory=True,
                                 drop_last=True)

        # val_dataset = YOLO(cfg.data_dir, cfg.hflip, cfg.vflip, cfg.rotation, cfg.scale, cfg.shear, opt=cfg, split='val')
        # valloader = DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=1, pin_memory=True)
        valid_file = cfg.val_dir if not cfg.val_dir == '' else os.path.join(
            cfg.data_dir, 'valid.txt')
        with open(valid_file, 'r') as f:
            val_list = [l.rstrip() for l in f.readlines()]

        net = create_model(cfg.arch, cfg.heads, cfg.head_conv, cfg.down_ratio,
                           cfg.filters)
        optimizer = optim.Adam(net.parameters(), lr=lr)
        start_epoch = 0

        if cfg.resume:
            pretrain = os.path.join(model_dir, 'model_last.pth')
            if os.path.exists(pretrain):
                print('resume model from %s' % pretrain)
                try:
                    net, optimizer, start_epoch = load_model(
                        net, pretrain, optimizer, True, lr, lr_step)
                except:
                    print('\t... loading model error: ckpt may not compatible')
        model = ModleWithLoss(net, CtdetLoss(cfg))
        if len(gpus) > 1:
            model = nn.DataParallel(model, device_ids=gpus).to(device)
        else:
            model = model.to(device)

        step = 0
        best = 1e10
        log_loss_stats = ['loss', 'hm_loss', 'wh_loss']
        if cfg.reg_offset:
            log_loss_stats += ['off_loss']
        if cfg.reg_obj:
            log_loss_stats += ['obj_loss']
        for epoch in range(start_epoch + 1, num_epochs + 1):
            avg_loss_stats = {l: AverageMeter() for l in log_loss_stats}
            model.train()
            with tqdm(trainloader) as loader:
                for _, batch in enumerate(loader):
                    for k in batch:
                        if k != 'meta':
                            batch[k] = batch[k].to(device=device,
                                                   non_blocking=True)
                    output, loss, loss_stats = model(batch)
                    loss = loss.mean()
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    # 设置tqdm显示信息
                    lr = optimizer.param_groups[0]['lr']
                    poststr = ''
                    for l in avg_loss_stats:
                        avg_loss_stats[l].update(loss_stats[l].mean().item(),
                                                 batch['input'].size(0))
                        poststr += '{}: {:.4f}; '.format(
                            l, avg_loss_stats[l].avg)
                    loader.set_description('Epoch %d' % (epoch))
                    poststr += 'lr: {:.4f}'.format(lr)
                    loader.set_postfix_str(poststr)

                    step += 1
                    # self.lossSignal.emit(loss.item(), step)
                    del output, loss, loss_stats

                    # valid
                    if step % val_step == 0:
                        if len(cfg.gpus) > 1:
                            val_model = model.module
                        else:
                            val_model = model
                        val_model.eval()
                        torch.cuda.empty_cache()

                        # 随机采样
                        idx = np.arange(len(val_list))
                        idx = np.random.permutation(idx)[:sample_size]

                        for j, id in enumerate(idx):
                            image = cv2.imread(val_list[id])
                            image = self.preprocess(image, cfg.input_h,
                                                    cfg.input_w, mean, std)
                            image = image.to(device)

                            with torch.no_grad():
                                output = val_model.model(image)[-1]

                            # 画图并保存
                            debugger = Debugger(dataset=names,
                                                down_ratio=cfg.down_ratio)
                            reg = output['reg'] if cfg.reg_offset else None
                            obj = output['obj'] if cfg.reg_obj else None
                            dets = ctdet_decode(output['hm'].sigmoid_(),
                                                output['wh'],
                                                reg=reg,
                                                obj=obj,
                                                cat_spec_wh=cfg.cat_spec_wh,
                                                K=cfg.K)
                            dets = dets.detach().cpu().numpy().reshape(
                                -1, dets.shape[2])
                            dets[:, :4] *= cfg.down_ratio
                            image = image[0].detach().cpu().numpy().transpose(
                                1, 2, 0)
                            image = np.clip(((image * std + mean) * 255.), 0,
                                            255).astype(np.uint8)
                            pred = debugger.gen_colormap(
                                output['hm'][0].detach().cpu().numpy())
                            debugger.add_blend_img(image, pred, 'pred_hm')
                            debugger.add_img(image, img_id='out_pred')
                            for k in range(len(dets)):
                                if dets[k, 4] > cfg.vis_thresh:
                                    debugger.add_coco_bbox(dets[k, :4],
                                                           dets[k, -1],
                                                           dets[k, 4],
                                                           img_id='out_pred')

                            debugger.save_all_imgs(debug_dir,
                                                   prefix='{}.{}_'.format(
                                                       step, j))
                            del output, image, dets
                        # 保存模型参数
                        save_model(os.path.join(model_dir, 'model_best.pth'),
                                   epoch, net)
                        model.train()

            logstr = 'epoch {}'.format(epoch)
            for k, v in avg_loss_stats.items():
                logstr += ' {}: {:.4f};'.format(k, v.avg)
                if USE_TENSORBOARD:
                    writer.add_scalar('train_{}'.format(k), v.avg, epoch)
            logger.info(logstr)

            # if epoch % val_step == 0:
            #     if len(cfg.gpus) > 1:
            #         val_model = model.module
            #     else:
            #         val_model = model
            #     val_model.eval()
            #     torch.cuda.empty_cache()
            #
            #     val_loss_stats = {l: AverageMeter() for l in log_loss_stats}
            #
            #     with tqdm(valloader) as loader:
            #         for j, batch in enumerate(loader):
            #             for k in batch:
            #                 if k != 'meta':
            #                     batch[k] = batch[k].to(device=device, non_blocking=True)
            #             with torch.no_grad():
            #                 output, loss, loss_stats = val_model(batch)
            #
            #             poststr = ''
            #             for l in val_loss_stats:
            #                 val_loss_stats[l].update(
            #                     loss_stats[l].mean().item(), batch['input'].size(0))
            #                 poststr += '{}: {:.4f}; '.format(l, val_loss_stats[l].avg)
            #             loader.set_description('Epoch %d valid' % (epoch))
            #             poststr += 'lr: {:.4f}'.format(lr)
            #             loader.set_postfix_str(poststr)
            #
            #             if j < sample_size:
            #                 # 将预测结果画出来保存成jpg图片
            #                 debugger = Debugger(dataset=names, down_ratio=cfg.down_ratio)
            #                 reg = output['reg'] if cfg.reg_offset else None
            #                 obj = output['obj'] if cfg.reg_obj else None
            #                 dets = ctdet_decode(
            #                     output['hm'], output['wh'], reg=reg, obj=obj,
            #                     cat_spec_wh=cfg.cat_spec_wh, K=cfg.K)
            #                 dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
            #                 dets[:, :, :4] *= cfg.down_ratio
            #                 dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
            #                 dets_gt[:, :, :4] *= cfg.down_ratio
            #                 for i in range(1):
            #                     img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
            #                     img = np.clip(((img * std + mean) * 255.), 0, 255).astype(np.uint8)
            #                     pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
            #                     gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
            #                     debugger.add_blend_img(img, pred, 'pred_hm')
            #                     debugger.add_blend_img(img, gt, 'gt_hm')
            #                     debugger.add_img(img, img_id='out_pred')
            #                     for k in range(len(dets[i])):
            #                         if dets[i, k, 4] > cfg.vis_thresh:
            #                             debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
            #                                                    dets[i, k, 4], img_id='out_pred')
            #
            #                     debugger.add_img(img, img_id='out_gt')
            #                     for k in range(len(dets_gt[i])):
            #                         if dets_gt[i, k, 4] > cfg.vis_thresh:
            #                             debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
            #                                                    dets_gt[i, k, 4], img_id='out_gt')
            #
            #                     debugger.save_all_imgs(debug_dir, prefix='{}.{}_'.format(epoch, j))
            #             del output, loss, loss_stats
            #     model.train()
            #     logstr = 'epoch {} valid'.format(epoch)
            #     for k, v in val_loss_stats.items():
            #         logstr += ' {}: {:.4f};'.format(k, v.avg)
            #         if USE_TENSORBOARD:
            #             writer.add_scalar('val_{}'.format(k), v.avg, epoch)
            #     logger.info(logstr)
            #     if val_loss_stats['loss'].avg < best:
            #         best = val_loss_stats['loss'].avg
            #         save_model(os.path.join(model_dir, 'model_best.pth'), epoch, net)
            save_model(os.path.join(model_dir, 'model_last.pth'), epoch, net,
                       optimizer)
            if epoch in cfg.lr_step:
                save_model(
                    os.path.join(model_dir, 'model_{}.pth'.format(epoch)),
                    epoch, net, optimizer)
                lr = cfg.lr * (0.1**(cfg.lr_step.index(epoch) + 1))
                logger.info('Drop LR to {}'.format(lr))
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
def test_loader(cfg):

    debugger = Debugger((cfg.DEBUG == 3),
                        theme=cfg.DEBUG_THEME,
                        num_classes=cfg.MODEL.NUM_CLASSES,
                        dataset=cfg.SAMPLE_METHOD,
                        down_ratio=cfg.MODEL.DOWN_RATIO)

    Dataset = get_dataset(cfg.SAMPLE_METHOD, cfg.TASK)
    val_dataset = Dataset(cfg, 'val')
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             pin_memory=True)
    for i, batch_data in enumerate(val_loader):
        input_image = batch_data['input']
        heat = batch_data['hm']
        reg = batch_data['reg']
        reg_mask = batch_data['reg_mask']
        ind = batch_data['ind']
        wh = batch_data['wh']
        kps = batch_data['hps']
        hps_mask = batch_data['hps_mask']
        seg_feat = batch_data['seg']
        hm_hp = batch_data['hm_hp']
        hp_offset = batch_data['hp_offset']
        hp_ind = batch_data['hp_ind']
        hp_mask = batch_data['hp_mask']
        meta = batch_data['meta']

        for k, v in batch_data.items():
            if type(v) == type(dict()):
                for k1, v1 in v.items():
                    print(k1)
                    print(v1)
            else:
                print(k)
                print(v.shape)
        print(input_image.shape)
        print(hm_hp.shape)
        #handle image
        input_image = input_image[0].numpy().transpose(1, 2, 0)
        input_image = (input_image * STD) + MEAN
        input_image = input_image * 255
        input_image = input_image.astype(np.uint8)

        heat = heat.sigmoid_()
        hm_hp = hm_hp.sigmoid_()

        num_joints = 17

        K = cfg.TEST.TOPK
        # perform nms on heatmaps
        batch, cat, height, width = heat.size()
        heat = _nms(heat)
        scores, inds, clses, ys, xs = _topk(heat, K=K)

        kps = kps.view(batch, K, num_joints * 2)
        kps[..., ::2] += xs.view(batch, K, 1).expand(batch, K, num_joints)
        kps[..., 1::2] += ys.view(batch, K, 1).expand(batch, K, num_joints)

        xs = xs.view(batch, K, 1) + reg[:, :, 0:1]
        ys = ys.view(batch, K, 1) + reg[:, :, 1:2]

        wh = wh.view(batch, K, 2)

        #weight = _transpose_and_gather_feat(seg, inds)
        ## you can write  (if weight.size(1)!=seg_feat.size(1): 3x3conv  else 1x1conv ) here to select seg conv.
        ## for 3x3
        #weight = weight.view([weight.size(1), -1, 3, 3])
        pred_seg = seg_feat

        clses = clses.view(batch, K, 1).float()
        scores = scores.view(batch, K, 1)

        bboxes = torch.cat([
            xs - wh[..., 0:1] / 2, ys - wh[..., 1:2] / 2,
            xs + wh[..., 0:1] / 2, ys + wh[..., 1:2] / 2
        ],
                           dim=2)
        if hm_hp is not None:
            hm_hp = _nms(hm_hp)
            thresh = 0.1
            kps = kps.view(batch, K, num_joints,
                           2).permute(0, 2, 1, 3).contiguous()  # b x J x K x 2
            reg_kps = kps.unsqueeze(3).expand(batch, num_joints, K, K, 2)
            hm_score, hm_inds, hm_ys, hm_xs = _topk_channel(hm_hp,
                                                            K=K)  # b x J x K

            hp_offset = hp_offset.view(batch, num_joints, K, 2)
            hm_xs = hm_xs + hp_offset[:, :, :, 0]
            hm_ys = hm_ys + hp_offset[:, :, :, 1]

            mask = (hm_score > thresh).float()
            hm_score = (1 - mask) * -1 + mask * hm_score
            hm_ys = (1 - mask) * (-10000) + mask * hm_ys
            hm_xs = (1 - mask) * (-10000) + mask * hm_xs
            hm_kps = torch.stack([hm_xs, hm_ys], dim=-1).unsqueeze(2).expand(
                batch, num_joints, K, K, 2)
            dist = (((reg_kps - hm_kps)**2).sum(dim=4)**0.5)
            min_dist, min_ind = dist.min(dim=3)  # b x J x K
            hm_score = hm_score.gather(2,
                                       min_ind).unsqueeze(-1)  # b x J x K x 1
            min_dist = min_dist.unsqueeze(-1)
            min_ind = min_ind.view(batch, num_joints, K, 1,
                                   1).expand(batch, num_joints, K, 1, 2)
            hm_kps = hm_kps.gather(3, min_ind)
            hm_kps = hm_kps.view(batch, num_joints, K, 2)
            l = bboxes[:, :, 0].view(batch, 1, K,
                                     1).expand(batch, num_joints, K, 1)
            t = bboxes[:, :, 1].view(batch, 1, K,
                                     1).expand(batch, num_joints, K, 1)
            r = bboxes[:, :, 2].view(batch, 1, K,
                                     1).expand(batch, num_joints, K, 1)
            b = bboxes[:, :, 3].view(batch, 1, K,
                                     1).expand(batch, num_joints, K, 1)
            mask = (hm_kps[..., 0:1] < l) + (hm_kps[..., 0:1] > r) + \
                 (hm_kps[..., 1:2] < t) + (hm_kps[..., 1:2] > b) + \
                 (hm_score < thresh) + (min_dist > (torch.max(b - t, r - l) * 0.3))
            mask = (mask > 0).float().expand(batch, num_joints, K, 2)
            kps = (1 - mask) * hm_kps + mask * kps
            kps = kps.permute(0, 2, 1,
                              3).contiguous().view(batch, K, num_joints * 2)
        dets = torch.cat([
            bboxes, scores, kps,
            torch.transpose(hm_score.squeeze(dim=3), 1, 2)
        ],
                         dim=2)
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])

        dets, inds = whole_body_post_process(dets.copy(), [meta['c'].numpy()],
                                             [meta['s'].numpy()], 128, 128, 1)
        for j in range(1, cfg.MODEL.NUM_CLASSES + 1):
            dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 56)
            dets[0][j][:, :4] /= 1.
            dets[0][j][:, 5:39] /= 1.

        print(pred_seg.shape)
        seg = pred_seg[0]
        trans = get_affine_transform(meta['c'],
                                     meta['s'],
                                     0,
                                     (meta['out_width'], meta['out_height']),
                                     inv=1)
        debugger.add_img(image, img_id='multi_pose')
        for j in range(1, self.num_classes + 1):
            for b_id, detection in enumerate(results[j]):
                bbox = detection[:4]
                bbox_prob = detection[4]
                keypoints = detection[5:39]
                keypoints_prob = detection[39:]
                if bbox_prob > self.cfg.TEST.VIS_THRESH:
                    debugger.add_coco_bbox(bbox,
                                           0,
                                           bbox_prob,
                                           img_id='multi_pose')
                    segment = seg[b_id].detach().cpu().numpy()

                    segment = cv2.warpAffine(segment,
                                             trans,
                                             (image.shape[1], image.shape[0]),
                                             flags=cv2.INTER_CUBIC)
                    w, h = bbox[2:4] - bbox[:2]
                    ct = np.array([(bbox[0] + bbox[2]) / 2,
                                   (bbox[1] + bbox[3]) / 2],
                                  dtype=np.float32)

                    segment_mask = np.zeros_like(segment)
                    pad_rate = 0.3
                    x, y = np.clip([ct[0] - (1 + pad_rate) * w / 2, ct[0] + (1 + pad_rate) * w / 2], 0,
                                   segment.shape[1] - 1).astype(np.int), \
                           np.clip([ct[1] - (1 + pad_rate) * h / 2, ct[1] + (1 + pad_rate) * h / 2], 0,
                                   segment.shape[0] - 1).astype(np.int)
                    segment_mask[y[0]:y[1], x[0]:x[1]] = 1
                    segment = segment_mask * segment
                    debugger.add_coco_seg(segment, img_id='multi_pose')
                    debugger.add_coco_hp(keypoints,
                                         keypoints_prob,
                                         img_id='multi_pose')

        debugger.show_all_imgs(pause=self.pause)

        save_path = os.path.join(SAVE_DIR, '{}.png'.format(i))
        cv2.imwrite(save_path, input_image)
Beispiel #30
0
def error_bound_saliency(opt, img_id, loc=None, error_bound=0.1):

    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str

    Dataset = dataset_factory[opt.dataset]
    opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
    Detector = detector_factory[opt.task]

    ### simply run the detector and save the objectness heat map and the detection results
    split = 'val' if not opt.trainval else 'test'
    dataset = Dataset(opt, split)
    detector = Detector(opt)
    # use the FeatureExtractor to regester the hook to get activation value
    # to find the name of target_layers, see the model.named_modules()
    feature_extractor = FeatureExtractor(detector.model, target_layers='hm')
    detector.model = feature_extractor
    feature_extractor.eval()
    img_info = dataset.coco.loadImgs(ids=[img_id])[0]
    img_path = os.path.join(dataset.img_dir, img_info['file_name'])
    detector.run(img_path)

    ### get saliency mask
    ### Not due to the input image is usually resized and padding, we get the mask on the resized image
    ### for error, we use L1 loss.

    ## gradually increase the rect center on the image coor untile the error is lower the boundry
    debug_dir = detector.opt.debug_dir
    scale = 1.0
    debugger = Debugger(dataset=detector.opt.dataset,
                        ipynb=(detector.opt.debug == 3),
                        theme=detector.opt.debugger_theme)

    image_org = cv2.imread(img_path)
    image, meta, resized_img = pre_process(detector,
                                           image_org,
                                           scale,
                                           mask=None,
                                           return_resized_img=True)
    _, _, h, w = image.size()
    down_sample_rate = h / feature_extractor.target_val.size(2)
    # get the loc[center_h,center_w] on the resized image and corresponding [fh,fw] on feature map
    if loc is None:  # if loc [center_h,center_w] is not specified, use the location of the max value
        ind = torch.argmax(feature_extractor.target_val[0].sum(dim=0))
        fh = ind // feature_extractor.target_val.size(3)
        fw = ind % feature_extractor.target_val.size(3)
        center_h = fh * down_sample_rate
        center_w = fw * down_sample_rate
        val = feature_extractor.target_val[0, :, fh, fw]
        print([center_h, center_w])
    else:
        center_h, center_w = loc
        fh = int(center_h / down_sample_rate)
        fw = int(center_w / down_sample_rate)
        val = feature_extractor.target_val[0, :, fh, fw]

    loss_fn = lambda x: torch.mean(torch.pow((x - val), 2))
    area_increment = np.prod(image.size()) / 1000.0
    area = 0
    ratio = 1.0  # w/h = 1.0 increased rect ratio
    error = 1e10
    mask = np.zeros([h, w])  # [H,W]
    while (error > error_bound):
        print("it:{} error:{}".format(area // area_increment, error))
        area += area_increment
        bh = np.sqrt(area / ratio)
        bw = area / bh
        mask = np.zeros([h, w])
        hmin, hmax = max(int(center_h - bh / 2),
                         0), min(int(center_h + bh / 2) + 1, h - 1)
        wmin, wmax = max(int(center_w - bw / 2),
                         0), min(int(center_w + bw / 2) + 1, w - 1)
        mask[hmin:hmax, wmin:wmax] = 1
        image_masked, _ = pre_process(detector, image_org, 1.0, mask)
        image_masked = image_masked.to(opt.device)
        with torch.no_grad():
            feature_extractor(image_masked)
        error = loss_fn(feature_extractor.target_val[0, :, fh, fw])
    print("it:{} error:{}".format(area // area_increment, error))
    # draw the rect mask on resized_image and save
    rect_mask_img_save_name = 'rect_mask_{:.1f}'.format(scale)
    debugger.add_blend_img(resized_img,
                           debugger.gen_colormap(mask[np.newaxis, :, :]),
                           rect_mask_img_save_name)
    kernel_hmin, kernel_hmax = max(
        int(center_h - down_sample_rate / 2),
        0), min(int(center_h + down_sample_rate / 2) + 1, h - 1)
    kernel_wmin, kernel_wmax = max(
        int(center_w - down_sample_rate / 2),
        0), min(int(center_w + down_sample_rate / 2) + 1, w - 1)
    debugger.imgs[rect_mask_img_save_name][kernel_hmin:kernel_hmax,
                                           kernel_wmin:kernel_wmax] = [
                                               255, 0, 0
                                           ]  # green

    ## get saliency superpixel
    rect_img = resized_img[hmin:hmax, wmin:wmax]
    segments = slic(rect_img, n_segments=30)  #[hmin:hmax, wmin:wmax]
    un_removed_superpixel = list(np.unique(segments))
    rect_segment_mask = np.ones_like(segments)
    while (error < error_bound):
        # find superpixel whose removement leads to lowest error
        lowest_error = 1e10
        lowest_error_ind = -1
        for i in un_removed_superpixel:
            mask = np.zeros([h, w])
            mask[hmin:hmax, wmin:wmax] = rect_segment_mask * (segments != i)
            image_masked, _ = pre_process(detector, image_org, 1.0, mask)
            image_masked = image_masked.to(opt.device)
            with torch.no_grad():
                feature_extractor(image_masked)
            cur_error = loss_fn(feature_extractor.target_val[0, :, fh, fw])
            if cur_error < lowest_error:
                lowest_error = cur_error
                lowest_error_ind = i
        if not lowest_error < error_bound:
            break
        else:
            un_removed_superpixel.remove(lowest_error_ind)
            error = lowest_error
            rect_segment_mask = rect_segment_mask * (segments !=
                                                     lowest_error_ind)
            print("error={} remaining super pixel:{}".format(
                error, len(un_removed_superpixel)))

    # draw the segmentation saliency mask on resized_image and save
    mask = np.zeros([h, w])
    mask[hmin:hmax, wmin:wmax] = rect_segment_mask

    inp_image = resized_img * mask[:, :, np.newaxis].astype(np.uint8)
    debugger.add_img(inp_image, 'masked_img')
    mask_img_save_name = 'mask_{:.1f}'.format(scale)
    debugger.add_blend_img(resized_img,
                           debugger.gen_colormap(mask[np.newaxis, :, :]),
                           mask_img_save_name)
    debugger.imgs[mask_img_save_name][kernel_hmin:kernel_hmax,
                                      kernel_wmin:kernel_wmax] = [255, 0,
                                                                  0]  # blue
    debugger.save_all_imgs(debug_dir, prefix='{}'.format(opt.img_id))

    opt.prefix = '{}masked'.format(opt.img_id)
    detector.run(inp_image)
    return