Esempio n. 1
0
    def post_process(self, dets, meta, output_size):
        dets = dets.data.cpu().numpy()
        dets = dets.reshape(-1, dets.shape[2])
        c = meta['c']
        s = meta['s']
        h, w = output_size

        top_preds = {}
        dets[:, :2] = transform_preds(dets[:, 0:2], c, s, (w, h))
        dets[:, 2:4] = transform_preds(dets[:, 2:4], c, s, (w, h))
        classes = dets[:, -1]
        scores = dets[:, 4]
        '''
        if len(scores) > self._max_per_image:
            kth = len(scores) - self._max_per_image
            thresh = np.partition(scores, kth)[kth]
        '''

        for j in range(self._num_classes):
            inds = np.logical_and(classes == j, scores >= self._threshold)
            top_preds[j + 1] = np.concatenate([
                dets[inds, :4].astype(np.float32), scores[inds].reshape(
                    -1, 1).astype(np.float32)
            ],
                                              axis=1)
        return top_preds
Esempio n. 2
0
    def val_map(epoch):
        print('\n Val@Epoch: %d' % epoch)
        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        with torch.no_grad():
            for inputs in val_loader:
                img_id, inputs = inputs[0]

                detections = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)
                    output = model(inputs[scale]['image'])[-1]

                    dets = ctdet_decode(*output, K=cfg.test_topk)
                    dets = dets.detach().cpu().numpy().reshape(
                        1, -1, dets.shape[2])[0]

                    top_preds = {}
                    dets[:, :2] = transform_preds(
                        dets[:, 0:2], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    dets[:, 2:4] = transform_preds(
                        dets[:, 2:4], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    clses = dets[:, -1]
                    for j in range(val_dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                        top_preds[j + 1][:, :4] /= scale

                    detections.append(top_preds)

                bbox_and_scores = {
                    j: np.concatenate([d[j] for d in detections], axis=0)
                    for j in range(1, val_dataset.num_classes + 1)
                }
                scores = np.hstack([
                    bbox_and_scores[j][:, 4]
                    for j in range(1, val_dataset.num_classes + 1)
                ])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, val_dataset.num_classes + 1):
                        keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                        bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

                results[img_id] = bbox_and_scores

        eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir)
        print(eval_results)
        summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch)
 def post_process(self, dets, meta, scale=1):
     out_width, out_height = meta['out_width'], meta['out_height']
     dets = dets.detach().cpu().numpy().reshape(2, -1, 14)
     dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
     dets = dets.reshape(1, -1, 14)
     dets[0, :, 0:2] = transform_preds(dets[0, :, 0:2], meta['c'],
                                       meta['s'], (out_width, out_height))
     dets[0, :, 2:4] = transform_preds(dets[0, :, 2:4], meta['c'],
                                       meta['s'], (out_width, out_height))
     dets[:, :, 0:4] /= scale
     return dets[0]
Esempio n. 4
0
 def multi_pose_post_process(self, dets, c, s, h, w):
     # dets: batch x max_dets x 40
     # return list of 39 in image coord
     ret = []
     for i in range(dets.shape[0]):
         bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h))
         pts = transform_preds(dets[i, :, 5:15].reshape(-1, 2), c[i], s[i], (w, h))
         top_preds = np.concatenate(
             [bbox.reshape(-1, 4), dets[i, :, 4:5], 
             pts.reshape(-1, 10), dets[i, :, 15:20]], axis=1).astype(np.float32).tolist()
         ret.append({np.ones(1, dtype=np.int32)[0]: top_preds})
     return ret
Esempio n. 5
0
def demo_image(image, image_name, model, opt):
    s = max(image.shape[0], image.shape[1]) * 1.0
    c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32)
    trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h])
    inp = cv2.warpAffine(image,
                         trans_input, (opt.input_w, opt.input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp / 255. - mean) / std
    inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
    inp = torch.from_numpy(inp).to(opt.device)
    out = model(inp)[-1]
    pred = get_preds(out['hm'].detach().cpu().numpy())[0]
    pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h))
    pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(),
                           out['depth'].detach().cpu().numpy())[0]

    path = "D:\\CV-Project\\pytorch-pose-hg-3d\\images\\last_save\\"
    _, image_name = os.path.split(image_name)
    image_name = image_name[:-4]

    debugger = Debugger()
    debugger.add_img(image, image_name)
    debugger.add_point_2d(pred, (255, 0, 0), image_name)
    debugger.add_point_3d(pred_3d, 'b')
    debugger.show_all_imgs(pause=False)
    debugger.show_3d(image_name, path)
    debugger.save_img(image_name, path)
Esempio n. 6
0
def demo_image(image, model, opt):
    s = max(image.shape[0], image.shape[1]) * 1.0
    c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32)
    trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h])
    inp = cv2.warpAffine(image,
                         trans_input, (opt.input_w, opt.input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp / 255. - mean) / std
    inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
    inp = torch.from_numpy(inp).to(opt.device)
    out = model(inp)[-1]  # 'hm': (1, 16, 64, 64), 'depth': (1, 16, 64, 64)
    preds, amb_idx = get_preds(out['hm'].detach().cpu().numpy())
    pred = preds[0]
    pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h))
    pred_3d, ignore_idx = get_preds_3d(out['hm'].detach().cpu().numpy(),
                                       out['depth'].detach().cpu().numpy(),
                                       amb_idx)
    pred_3d = pred_3d[0]
    ignore_idx = ignore_idx[0]

    debugger = Debugger()
    debugger.add_img(image)  # デバッガークラスに画像をコピー
    debugger.add_point_2d(pred, (255, 0, 0))
    debugger.add_point_3d(pred_3d, 'b', ignore_idx=ignore_idx)
    debugger.show_all_imgs(pause=False)
    debugger.show_3d()

    print("Done")
Esempio n. 7
0
def demo_image(image, model, opt, timestep):
    inps = []
    s = None
    c = None
    hidden = None
    for t in range(timestep):
        s = max(image[t].shape[0], image[t].shape[1]) * 1.0
        c = np.array([image[t].shape[1] / 2., image[t].shape[0] / 2.],
                     dtype=np.float32)
        trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h])
        inp = cv2.warpAffine(image[t],
                             trans_input, (opt.input_w, opt.input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp / 255. - mean) / std
        inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
        inp = torch.from_numpy(inp).to(opt.device)
        inps.append(inp)

    if opt.task == "conv3d":
        outs = model(inps)
    else:
        outs, hidden = model(inps, hidden)
    out = outs[-1]
    preds, amb_idx = get_preds(out[-1]['hm'].detach().cpu().numpy())
    pred = preds[0]
    pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h))
    pred_3d, ignore_idx = get_preds_3d(out[-1]['hm'].detach().cpu().numpy(),
                                       out[-1]['depth'].detach().cpu().numpy(),
                                       amb_idx)

    pred_3d = pred_3d[0]
    ignore_idx = ignore_idx[0]

    return image[-1], pred, pred_3d, ignore_idx
Esempio n. 8
0
def demo_image(image, model, opt, save_path=None):
    s = max(image.shape[0], image.shape[1]) * 1.0
    c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32)
    trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h])
    inp = cv2.warpAffine(image,
                         trans_input, (opt.input_w, opt.input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp / 255. - mean) / std
    inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
    inp = torch.from_numpy(inp).to(opt.device)
    out = model(inp)[-1]
    pred = get_preds(out['hm'].detach().cpu().numpy())[0]
    pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h))
    pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(),
                           out['depth'].detach().cpu().numpy())[0]

    debugger = Debugger()
    debugger.add_img(image)
    debugger.add_point_2d(pred, (255, 0, 0))
    debugger.add_point_3d(pred_3d, 'b')
    #  import pdb;pdb.set_trace()
    debugger.show_all_imgs(pause=True)
    debugger.show_3d()
    if save_path:
        debugger.save_3d(save_path)
Esempio n. 9
0
 def convert_eval_format(self, pred, conf, meta):
     ret = np.zeros((pred.shape[0], pred.shape[1], 2))
     for i in range(pred.shape[0]):
         ret[i] = transform_preds(
             pred[i], meta['center'][i].numpy(), meta['scale'][i].numpy(),
             [self.opt.output_h, self.opt.output_w])
     return ret
Esempio n. 10
0
def demo_image(
    image, model, opt
):  #image name added as an input, so that the individual output files can be asctibed to the respective image
    s = max(image.shape[0], image.shape[1]) * 1.0
    c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32)
    trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h])
    inp = cv2.warpAffine(image,
                         trans_input, (opt.input_w, opt.input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp / 255. - mean) / std
    inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
    inp = torch.from_numpy(inp).to(opt.device)
    out = model(inp)[-1]
    pred = get_preds(out['hm'].detach().cpu().numpy())[0]
    pred = transform_preds(
        pred, c, s, (opt.output_w, opt.output_h)
    )  #this step readjusts the 2D skeleton to the input image with center and scale
    pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(),
                           out['depth'].detach().cpu().numpy())[
                               0]  #uses heatmap and depthmap to create 3D pose
    #pred_3d = transform_preds(pred_3d, c, s, (opt.output_w, opt.output_h,10))  #this step readjusts the 3D skeleton to the input image with center and scale
    '''
  debugger = Debugger()
  debugger.add_img(image)                   #adds image
  debugger.add_point_2d(pred, (255, 0, 0))  #adds 2D graph of joints to image
  debugger.add_point_3d(pred_3d, 'b')       #plots the 3D joint locations into the plot
  debugger.show_all_imgs(pause=False)       #this command enables showing the images
  debugger.show_3d()                        #this command shows the figures
  '''

    return pred_3d.flatten()
def ctdet_post_process(dets, c, s, h, w, num_classes):
    # dets: batch x max_dets x dim
    # return 1-based class det dict
    ret = []
    for i in range(dets.shape[0]):
        top_preds = {}
        dets[i, :, :2] = transform_preds(dets[i, :, 0:2], c[i], s[i], (w, h))
        dets[i, :, 2:4] = transform_preds(dets[i, :, 2:4], c[i], s[i], (w, h))
        classes = dets[i, :, -1]
        for j in range(num_classes):
            inds = (classes == j)
            top_preds[j + 1] = np.concatenate([
                dets[i, inds, :4].astype(np.float32),
                dets[i, inds, 4:5].astype(np.float32)
            ],
                                              axis=1).tolist()
        ret.append(top_preds)
    return ret
Esempio n. 12
0
 def estimate(self, image):
     if isinstance(image, str):
         image = cv2.imread(image)
     inp, c, s = self.processImage(image)
     inp = torch.from_numpy(inp).to(self.device)
     out = self.model(inp)[-1]
     pred = get_preds(out['hm'].detach().cpu().numpy())[0]
     pred = transform_preds(pred, c, s, (self.output_w, self.output_h))
     pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(),
                            out['depth'].detach().cpu().numpy())[0]
     return pred, pred_3d
def demo_image(image, model, opt, name):
    s = max(image.shape[0], image.shape[1]) * 1.0
    c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32)
    trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h])
    inp = cv2.warpAffine(image,
                         trans_input, (opt.input_w, opt.input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp / 255. - mean) / std
    inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
    inp = torch.from_numpy(inp).to(opt.device)
    out = model(inp)[-1]

    pred = get_preds(out['hm'].detach().cpu().numpy())[0]
    pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h))
    # pred 2d range (176, 256)
    pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(),
                           out['depth'].detach().cpu().numpy())[0]
    pred_3d_real_size = pred_3d * 4
    pred_3d_real_size[:, 0] = pred_3d_real_size[:, 0] - 40
    # print(pred_3d)
    # pdb.set_trace()
    pred_3d_ordered = np.zeros([15, 3])
    # the last one as mid hip for spline compute
    for i in range(16):
        pred_3d_ordered[corres[i]] = pred_3d_real_size[i]

    pred_3d_ordered[1] = (pred_3d_ordered[2] + pred_3d_ordered[5]) / 2
    pred_3d_ordered[14] = (pred_3d_ordered[8] + pred_3d_ordered[11]) / 2

    pred_3d_ordered[0] = -1
    pred_3d_ordered[9:11] = -1
    pred_3d_ordered[12:14] = -1

    from good_order_cood_angle_convert import absolute_angles, anglelimbtoxyz2
    # bias
    # neck as the offset
    # if pred_3d[8,:][0] != 0 or pred_3d[8,:][1] != 0:

    # bias = np.array([pred[8,0], pred[8,1]])
    absolute_angles, limbs, offset = absolute_angles(pred_3d_ordered)
    # pdb.set_trace()
    # rev = anglelimbtoxyz2(offset, absolute_angles, limbs)
    pred_2d = pred_3d_ordered[:, :2]

    dic = {
        'absolute_angles': absolute_angles,
        'limbs': limbs,
        'offset': offset
    }
    # pdb.set_trace()
    np.save(name, dic)
Esempio n. 14
0
def show_det(dets, image, det_size, debugger, opt, pause=False, name=None):
    dets = dets.reshape(1, -1, dets.shape[2])
    h, w = image.shape[0:2]
    debugger.add_img(image, img_id='ctdet')

    c = np.array([w / 2, h / 2], dtype=np.float32)
    s = np.array([w, h], dtype=np.float32)
    dets[0, :, :2] = transform_preds(dets[0, :, 0:2], c, s, det_size)
    dets[0, :, 2:4] = transform_preds(dets[0, :, 2:4], c, s, det_size)
    classes = dets[0, :, -1]

    for j in range(opt.num_classes):
        inds = (classes == j)
        top_preds = dets[0, inds, :5].astype(np.float32)
        for bbox in top_preds:
            if bbox[4] > opt.vis_thresh:
                debugger.add_coco_bbox(bbox[:4], j, bbox[4], img_id='ctdet')

    if name:
        print('detecting:', name)
        debugger.save_all_imgs(path='./', prefix=name)
    else:
        debugger.show_all_imgs(pause=pause)
Esempio n. 15
0
    def convert_eval_format(self, pred, conf, meta):
        preds = np.zeros((pred.shape[0], pred.shape[1], 2))
        for i in range(pred.shape[0]):
            preds[i] = transform_preds(pred[i], meta['center'][i].numpy(),
                                       meta['scale'][i].numpy(),
                                       [self.opt.output_h, self.opt.output_w])

        ret = []
        for i in range(pred.shape[0]):
            kpts = np.concatenate([preds[i], conf[i]], axis=1).astype(
                np.int32).reshape(self.num_joints * 3).tolist()
            score = int(meta['score'][i])
            ret.append({'category_id': 1, 'image_id': int(meta['image_id'].numpy()), \
                        'keypoints': kpts, 'score': score})
        return ret
def demo_image(image, model, opt):
    s = max(image.shape[0], image.shape[1]) * 1.0
    c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32)
    trans_input = get_affine_transform(
        c, s, 0, [opt.input_w, opt.input_h])
    inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h),
                         flags=cv2.INTER_LINEAR)
    inp = (inp / 255. - mean) / std
    inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
    inp = torch.from_numpy(inp).to(opt.device)
    out = model(inp)[-1]
    pred = get_preds(out['hm'].detach().cpu().numpy())[0]
    pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h))
    pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(),
                           out['depth'].detach().cpu().numpy())[0]

    return image, pred, pred_3d
Esempio n. 17
0
    def predict(self, image):
        s = max(image.shape[0], image.shape[1]) * 1.0
        c = np.array([image.shape[1] / 2., image.shape[0] / 2.],
                     dtype=np.float32)
        trans_input = get_affine_transform(
            c, s, 0, [self.opt.input_w, self.opt.input_h])
        inp = cv2.warpAffine(image,
                             trans_input, (self.opt.input_w, self.opt.input_h),
                             flags=cv2.INTER_LINEAR)
        inp = (inp / 255. - mean) / std
        inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
        inp = torch.from_numpy(inp).to(self.opt.device)
        out = self.model(inp)[-1]
        pred = get_preds(out['hm'].detach().cpu().numpy())[0]
        pred = transform_preds(pred, c, s,
                               (self.opt.output_w, self.opt.output_h))
        # pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0]

        # Overlay points on top fo image
        #return show_2d(image, pred, (255, 0, 0), mpii_edges)

        return image, pred, mpii_edges
Esempio n. 18
0
def demo_image(image, model, opt, name):
  s = max(image.shape[0], image.shape[1]) * 1.0
  c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32)
  trans_input = get_affine_transform(
      c, s, 0, [opt.input_w, opt.input_h])
  inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h),
                         flags=cv2.INTER_LINEAR)
  inp = (inp / 255. - mean) / std
  inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32)
  inp = torch.from_numpy(inp).to(opt.device)
  out = model(inp)[-1]

  pred = get_preds(out['hm'].detach().cpu().numpy())[0]
  pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h))
  pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), 
                         out['depth'].detach().cpu().numpy())[0]
  
  if pred_3d[6,:][0] != 0 or pred_3d[6,:][1] != 0:
    print("A different bias!!")
    pdb.set_trace()

  bias = np.array([pred[6,0], pred[6,1]])
  dic = {'pred_3d': pred_3d, 'bias':bias}
  np.save(name, dic)
    def val_map(epoch):
        print_log('\n Val@Epoch: %d' % epoch)
        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        speed_list = []
        with torch.no_grad():
            for inputs in val_loader:
                img_id, inputs = inputs[0]
                start_image_time = time.time()
                segmentations = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)
                    # dict_tensor = torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device, non_blocking=True)
                    # dict_tensor.requires_grad = False

                    # hmap, regs, w_h_, _, _, codes, offsets = model(inputs[scale]['image'])[-1]
                    hmap, regs, w_h_, codes, offsets = model(
                        inputs[scale]['image'])[-1]
                    output = [hmap, regs, w_h_, codes, offsets]

                    segms = ctsegm_inmodal_code_decode(
                        *output,
                        torch.from_numpy(dictionary.astype(np.float32)).to(
                            cfg.device),
                        K=cfg.test_topk)

                    segms = segms.detach().cpu().numpy().reshape(
                        1, -1, segms.shape[2])[0]

                    top_preds = {}
                    for j in range(cfg.n_vertices):
                        segms[:, 2 * j:2 * j + 2] = transform_preds(
                            segms[:, 2 * j:2 * j + 2], inputs[scale]['center'],
                            inputs[scale]['scale'],
                            (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    segms[:, cfg.n_vertices * 2:cfg.n_vertices * 2 +
                          2] = transform_preds(
                              segms[:,
                                    cfg.n_vertices * 2:cfg.n_vertices * 2 + 2],
                              inputs[scale]['center'], inputs[scale]['scale'],
                              (inputs[scale]['fmap_w'],
                               inputs[scale]['fmap_h']))
                    segms[:, cfg.n_vertices * 2 + 2:cfg.n_vertices * 2 +
                          4] = transform_preds(
                              segms[:, cfg.n_vertices * 2 +
                                    2:cfg.n_vertices * 2 + 4],
                              inputs[scale]['center'], inputs[scale]['scale'],
                              (inputs[scale]['fmap_w'],
                               inputs[scale]['fmap_h']))

                    clses = segms[:, -1]
                    for j in range(val_dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = segms[inds, :cfg.n_vertices * 2 +
                                                 5].astype(np.float32)
                        top_preds[j + 1][:, :cfg.n_vertices * 2 + 4] /= scale

                    segmentations.append(top_preds)

                end_image_time = time.time()
                segms_and_scores = {
                    j: np.concatenate([d[j] for d in segmentations], axis=0)
                    for j in range(1, val_dataset.num_classes + 1)
                }
                scores = np.hstack([
                    segms_and_scores[j][:, cfg.n_vertices * 2 + 4]
                    for j in range(1, val_dataset.num_classes + 1)
                ])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, val_dataset.num_classes + 1):
                        keep_inds = (
                            segms_and_scores[j][:, cfg.n_vertices * 2 + 4] >=
                            thresh)
                        segms_and_scores[j] = segms_and_scores[j][keep_inds]

                results[img_id] = segms_and_scores
                speed_list.append(end_image_time - start_image_time)

        eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir)
        print_log(eval_results)
        summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch)
        print_log('Average speed on val set:{:.2f}'.format(
            1. / np.mean(speed_list)))

        return eval_results[0]
Esempio n. 20
0
def main():
    # Create Test set labels for DETRAC
    detrac_root = cfg.label_dir
    dataType = 'Test'
    test_images = list()
    test_objects = list()

    annotation_folder = 'DETRAC-{}-Annotations-XML'.format(dataType)
    annotation_path = os.path.join(detrac_root, annotation_folder)
    if not os.path.exists(annotation_path):
        print('annotation_path not exist')
        raise FileNotFoundError

    label_file = os.path.join(annotation_path, cfg.video_name + '.xml')
    tree = ET.parse(label_file)
    root = tree.getroot()
    object_list = list()

    Box_dict = {}
    for obj in root.iter('frame'):
        boxes = list()
        frame_num = int(obj.attrib['num'])
        target_list = obj.find('target_list')
        for target in target_list:
            bbox = target.find('box').attrib
            left = float(bbox['left'])
            top = float(bbox['top'])
            width = float(bbox['width'])
            height = float(bbox['height'])
            boxes.append([left, top, left + width,
                          top + height])  # x1, y1, x2, y2

        Box_dict[frame_num] = boxes

    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 150
    num_classes = 80 if cfg.dataset == 'coco' else 4

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    # Set up parameters for outputing video
    width = cfg.video_width
    height = cfg.video_height
    fps = cfg.video_fps  # output video configuration
    video_out = cv2.VideoWriter(
        os.path.join(cfg.root_dir, cfg.video_name + '_compare.mkv'),
        cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height))

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     nstack=2,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
                     num_classes=num_classes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading images
    speed_list = []
    frame_list = sorted(os.listdir(os.path.join(cfg.img_dir, cfg.video_name)))
    n_frames = len(frame_list)

    for frame_id in range(n_frames):
        frame_n = frame_id + 1
        frame_name = frame_list[frame_id]
        image_path = os.path.join(cfg.img_dir, cfg.video_name, frame_name)

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.],
                                  dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size],
                                       dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2],
                                  dtype=np.float32)
                scaled_size = np.array([img_width, img_height],
                                       dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0,
                                             [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(
                COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN,
                dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD,
                            dtype=np.float32)[None, None, :]
            img = img.transpose(
                2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {
                'image': torch.from_numpy(img).float(),
                'center': np.array(center),
                'scale': np.array(scaled_size),
                'fmap_h': np.array(img_height // 4),
                'fmap_w': np.array(img_width // 4)
            }

        with torch.no_grad():
            detections = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                dets = ctdet_decode(*output, K=cfg.test_topk)
                dets = dets.detach().cpu().numpy().reshape(
                    1, -1, dets.shape[2])[0]

                top_preds = {}
                dets[:, :2] = transform_preds(
                    dets[:, 0:2], imgs[scale]['center'], imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                dets[:, 2:4] = transform_preds(
                    dets[:, 2:4], imgs[scale]['center'], imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                cls = dets[:, -1]
                for j in range(num_classes):
                    inds = (cls == j)
                    top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                    top_preds[j + 1][:, :4] /= scale

                detections.append(top_preds)

            bbox_and_scores = {}
            for j in range(1, num_classes + 1):
                bbox_and_scores[j] = np.concatenate([d[j] for d in detections],
                                                    axis=0)
                if len(cfg.test_scales) > 1:
                    soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
            scores = np.hstack(
                [bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                    bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            speed_list.append(time.time() - start_time)
            output_image = original_image

            # Plot the GT boxes
            gt_bboxes = Box_dict[frame_n]
            for rect in gt_bboxes:
                x1, y1, x2, y2 = float(rect[0]), float(rect[1]), float(
                    rect[2]), float(rect[3])
                cv2.rectangle(output_image,
                              pt1=(int(x1), int(y1)),
                              pt2=(int(x2), int(y2)),
                              color=(0, 255, 0),
                              thickness=2)

            counter = 1
            for lab in bbox_and_scores:
                if cfg.dataset == 'coco':
                    if names[lab] not in DETRAC_compatible_names:
                        continue
                for boxes in bbox_and_scores[lab]:
                    x1, y1, x2, y2, score = boxes
                    if score > cfg.detect_thres:
                        text = names[lab] + '%.2f' % score
                        label_size = cv2.getTextSize(text,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     0.3, 1)
                        text_location = [
                            x1 + 2, y1 + 2, x1 + 2 + label_size[0][0],
                            y1 + 2 + label_size[0][1]
                        ]
                        cv2.rectangle(output_image,
                                      pt1=(int(x1), int(y1)),
                                      pt2=(int(x2), int(y2)),
                                      color=(0, 0, 255),
                                      thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3,
                        #             color=(0, 0, 255))

            cv2.imshow('Frames'.format(frame_id), output_image)
            video_out.write(output_image)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    print('Test frame rate:', 1. / np.mean(speed_list))
Esempio n. 21
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100

    image = cv2.imread(cfg.img_dir)
    # orig_image = image
    height, width = image.shape[0:2]
    padding = 127 if 'hourglass' in cfg.arch else 31
    imgs = {}
    for scale in cfg.test_scales:
        new_height = int(height * scale)
        new_width = int(width * scale)

        if cfg.img_size > 0:
            img_height, img_width = cfg.img_size, cfg.img_size
            center = np.array([new_width / 2., new_height / 2.],
                              dtype=np.float32)
            scaled_size = max(height, width) * 1.0
            scaled_size = np.array([scaled_size, scaled_size],
                                   dtype=np.float32)
        else:
            img_height = (new_height | padding) + 1
            img_width = (new_width | padding) + 1
            center = np.array([new_width // 2, new_height // 2],
                              dtype=np.float32)
            scaled_size = np.array([img_width, img_height], dtype=np.float32)

        img = cv2.resize(image, (new_width, new_height))
        trans_img = get_affine_transform(center, scaled_size, 0,
                                         [img_width, img_height])
        img = cv2.warpAffine(img, trans_img, (img_width, img_height))

        img = img.astype(np.float32) / 255.
        img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else VOC_MEAN,
                        dtype=np.float32)[None, None, :]
        img /= np.array(COCO_STD if cfg.dataset == 'coco' else VOC_STD,
                        dtype=np.float32)[None, None, :]
        img = img.transpose(2, 0,
                            1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

        if cfg.test_flip:
            img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

        imgs[scale] = {
            'image': torch.from_numpy(img).float(),
            'center': np.array(center),
            'scale': np.array(scaled_size),
            'fmap_h': np.array(img_height // 4),
            'fmap_w': np.array(img_width // 4)
        }

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    elif 'resdcn' in cfg.arch:
        model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]),
                             num_classes=80 if cfg.dataset == 'coco' else 20)
    else:
        raise NotImplementedError

    model = load_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    with torch.no_grad():
        detections = []
        for scale in imgs:
            imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

            output = model(imgs[scale]['image'])[-1]
            dets = ctdet_decode(*output, K=cfg.test_topk)
            dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0]

            top_preds = {}
            dets[:, :2] = transform_preds(
                dets[:, 0:2], imgs[scale]['center'], imgs[scale]['scale'],
                (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
            dets[:, 2:4] = transform_preds(
                dets[:, 2:4], imgs[scale]['center'], imgs[scale]['scale'],
                (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
            cls = dets[:, -1]
            for j in range(80):
                inds = (cls == j)
                top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                top_preds[j + 1][:, :4] /= scale

            detections.append(top_preds)

        bbox_and_scores = {}
        for j in range(1, 81 if cfg.dataset == 'coco' else 21):
            bbox_and_scores[j] = np.concatenate([d[j] for d in detections],
                                                axis=0)
            if len(cfg.test_scales) > 1:
                soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
        scores = np.hstack([
            bbox_and_scores[j][:, 4]
            for j in range(1, 81 if cfg.dataset == 'coco' else 21)
        ])

        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, 81 if cfg.dataset == 'coco' else 21):
                keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

        # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        # plt.show()
        fig = plt.figure(0)
        colors = COCO_COLORS if cfg.dataset == 'coco' else VOC_COLORS
        names = COCO_NAMES if cfg.dataset == 'coco' else VOC_NAMES
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        for lab in bbox_and_scores:
            for boxes in bbox_and_scores[lab]:
                x1, y1, x2, y2, score = boxes
                if score > 0.3:
                    plt.gca().add_patch(
                        Rectangle((x1, y1),
                                  x2 - x1,
                                  y2 - y1,
                                  linewidth=2,
                                  edgecolor=colors[lab],
                                  facecolor='none'))
                    plt.text(x1 + 3,
                             y1 + 3,
                             names[lab] + '%.2f' % score,
                             bbox=dict(facecolor=colors[lab], alpha=0.5),
                             fontsize=7,
                             color='k')

        fig.patch.set_visible(False)
        plt.axis('off')
        plt.savefig('data/demo_results.png', dpi=300, transparent=True)
        plt.show()
Esempio n. 22
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4
    dictionary = np.load(cfg.dictionary_file)

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     nstack=2,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
                     num_classes=num_classes)
    else:
        model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]),
                             num_classes=80)
        # raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading COCO validation images
    annotation_file = '{}/annotations/instances_{}.json'.format(
        cfg.data_dir, cfg.data_type)
    coco = COCO(annotation_file)

    # Load all annotations
    cats = coco.loadCats(coco.getCatIds())
    nms = [cat['name'] for cat in cats]
    catIds = coco.getCatIds(catNms=nms)
    # imgIds = coco.getImgIds(catIds=catIds)
    imgIds = coco.getImgIds()
    # annIds = coco.getAnnIds(catIds=catIds)
    # all_anns = coco.loadAnns(ids=annIds)
    # print(len(imgIds), imgIds)

    for id in imgIds:
        annt_ids = coco.getAnnIds(imgIds=[id])
        annotations_per_img = coco.loadAnns(ids=annt_ids)
        # print('All annots: ', len(annotations_per_img), annotations_per_img)
        img = coco.loadImgs(id)[0]
        image_path = '%s/images/%s/%s' % (cfg.data_dir, cfg.data_type,
                                          img['file_name'])
        w_img = int(img['width'])
        h_img = int(img['height'])
        if w_img < 1 or h_img < 1:
            continue

        img_original = cv2.imread(image_path)
        img_connect = cv2.imread(image_path)
        img_recon = cv2.imread(image_path)
        print('Image id: ', id)

        for annt in annotations_per_img:
            if annt['iscrowd'] == 1 or type(annt['segmentation']) != list:
                continue

            polygons = get_connected_polygon_using_mask(
                annt['segmentation'], (h_img, w_img),
                n_vertices=cfg.num_vertices,
                closing_max_kernel=60)
            gt_bbox = annt['bbox']
            gt_x1, gt_y1, gt_w, gt_h = gt_bbox
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if len(contour) > cfg.num_vertices:
                resampled_contour = resample(contour, num=cfg.num_vertices)
            else:
                resampled_contour = turning_angle_resample(
                    contour, cfg.num_vertices)

            resampled_contour[:, 0] = np.clip(resampled_contour[:, 0], gt_x1,
                                              gt_x1 + gt_w)
            resampled_contour[:, 1] = np.clip(resampled_contour[:, 1], gt_y1,
                                              gt_y1 + gt_h)

            clockwise_flag = check_clockwise_polygon(resampled_contour)
            if not clockwise_flag:
                fixed_contour = np.flip(resampled_contour, axis=0)
            else:
                fixed_contour = resampled_contour.copy()

            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate(
                (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            x1, y1, x2, y2 = gt_x1, gt_y1, gt_x1 + gt_w, gt_y1 + gt_h

            # bbox_width, bbox_height = x2 - x1, y2 - y1
            # bbox = [x1, y1, bbox_width, bbox_height]
            # bbox_center = np.array([(x1 + x2) / 2., (y1 + y2) / 2.])
            bbox_center = np.mean(indexed_shape, axis=0)

            centered_shape = indexed_shape - bbox_center

            # visualize resampled points with multiple parts in image side by side
            for cnt in range(len(annt['segmentation'])):
                polys = np.array(annt['segmentation'][cnt]).reshape((-1, 2))
                cv2.polylines(img_original, [polys.astype(np.int32)],
                              True, (10, 10, 255),
                              thickness=2)
                # cv2.drawContours(img_original, [polys.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            cv2.polylines(img_connect, [indexed_shape.astype(np.int32)],
                          True, (10, 10, 255),
                          thickness=2)
            # cv2.drawContours(img_connect, [indexed_shape.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            learned_val_codes, _ = fast_ista(centered_shape.reshape((1, -1)),
                                             dictionary,
                                             lmbda=0.1,
                                             max_iter=60)
            recon_contour = np.matmul(learned_val_codes, dictionary).reshape(
                (-1, 2))
            recon_contour = recon_contour + bbox_center
            cv2.polylines(img_recon, [recon_contour.astype(np.int32)],
                          True, (10, 10, 255),
                          thickness=2)
            # cv2.drawContours(img_recon, [recon_contour.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            # plot gt mean and std
            # image = cv2.imread(image_path)
            # # cv2.ellipse(image, center=(int(contour_mean[0]), int(contour_mean[1])),
            # #             axes=(int(contour_std[0]), int(contour_std[1])),
            # #             angle=0, startAngle=0, endAngle=360, color=(0, 255, 0),
            # #             thickness=2)
            # cv2.rectangle(image, pt1=(int(contour_mean[0] - contour_std[0] / 2.), int(contour_mean[1] - contour_std[1] / 2.)),
            #               pt2=(int(contour_mean[0] + contour_std[0] / 2.), int(contour_mean[1] + contour_std[1] / 2.)),
            #               color=(0, 255, 0), thickness=2)
            # cv2.polylines(image, [fixed_contour.astype(np.int32)], True, (0, 0, 255))
            # cv2.rectangle(image, pt1=(int(min(fixed_contour[:, 0])), int(min(fixed_contour[:, 1]))),
            #               pt2=(int(max(fixed_contour[:, 0])), int(max(fixed_contour[:, 1]))),
            #               color=(255, 0, 0), thickness=2)
            # cv2.imshow('GT segments', image)
            # if cv2.waitKey() & 0xFF == ord('q'):
            #     break

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.],
                                  dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size],
                                       dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2],
                                  dtype=np.float32)
                scaled_size = np.array([img_width, img_height],
                                       dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0,
                                             [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(
                COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN,
                dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD,
                            dtype=np.float32)[None, None, :]
            img = img.transpose(
                2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {
                'image': torch.from_numpy(img).float(),
                'center': np.array(center),
                'scale': np.array(scaled_size),
                'fmap_h': np.array(img_height // 4),
                'fmap_w': np.array(img_width // 4)
            }

        with torch.no_grad():
            segmentations = []
            predicted_codes = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                # segms, codes_ = ctsegm_scaled_decode_debug(*output, torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device),
                #                             K=cfg.test_topk)
                segms = ctsegm_code_n_offset_decode(
                    *output,
                    torch.from_numpy(dictionary.astype(np.float32)).to(
                        cfg.device),
                    K=cfg.test_topk)
                segms = segms.detach().cpu().numpy().reshape(
                    1, -1, segms.shape[2])[0]
                # codes_ = codes_.detach().cpu().numpy().reshape(1, -1, codes_.shape[2])[0]

                top_preds = {}
                code_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(
                        segms[:, 2 * j:2 * j + 2], imgs[scale]['center'],
                        imgs[scale]['scale'],
                        (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 +
                      2] = transform_preds(
                          segms[:,
                                cfg.num_vertices * 2:cfg.num_vertices * 2 + 2],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 +
                      4] = transform_preds(
                          segms[:, cfg.num_vertices * 2 +
                                2:cfg.num_vertices * 2 + 4],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 +
                                             5].astype(np.float32)
                    top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale
                    # code_preds[j + 1] = codes_[inds, :]

                segmentations.append(top_preds)
                predicted_codes.append(code_preds)

            segms_and_scores = {
                j: np.concatenate([d[j] for d in segmentations], axis=0)
                for j in range(1, num_classes + 1)
            }  # a Dict label: segments
            # codes_and_scores = {j: np.concatenate([d[j] for d in predicted_codes], axis=0)
            #                     for j in range(1, num_classes + 1)}  # a Dict label: segments
            scores = np.hstack([
                segms_and_scores[j][:, cfg.num_vertices * 2 + 4]
                for j in range(1, num_classes + 1)
            ])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 +
                                                     4] >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]
                    # codes_and_scores[j] = codes_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            output_image = original_image
            blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8)
            # print(blend_mask.shape)

            for lab in segms_and_scores:
                for idx in range(len(segms_and_scores[lab])):
                    res = segms_and_scores[lab][idx]
                    # c_ = codes_and_scores[lab][idx]
                    # for res in segms_and_scores[lab]:
                    contour, bbox, score = res[:-5], res[-5:-1], res[-1]
                    bbox[0] = np.clip(bbox[0], 0, w_img)
                    bbox[1] = np.clip(bbox[1], 0, h_img)
                    bbox[2] = np.clip(bbox[2], 0, w_img)
                    bbox[3] = np.clip(bbox[3], 0, h_img)
                    if score > cfg.detect_thres:
                        text = names[lab]  # + ' %.2f' % score
                        # label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, thickness=2, fontScale=0.5)
                        polygon = contour.reshape((-1, 2))
                        # print('Shape: Poly -- ', polygon.shape)
                        # print(polygon)
                        polygon[:, 0] = np.clip(polygon[:, 0], 0, w_img - 1)
                        polygon[:, 1] = np.clip(polygon[:, 1], 0, h_img - 1)

                        # use bb tools to draw predictions
                        color = random.choice(COLOR_WORLD)
                        bb.add(output_image, bbox[0], bbox[1], bbox[2],
                               bbox[3], text, color)
                        cv2.polylines(output_image, [polygon.astype(np.int32)],
                                      True,
                                      RGB_DICT[color],
                                      thickness=1)
                        cv2.drawContours(blend_mask,
                                         [polygon.astype(np.int32)],
                                         contourIdx=-1,
                                         color=RGB_DICT[color],
                                         thickness=-1)

                        # color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                        # contour_mean = np.mean(polygon, axis=0)
                        # contour_std = np.std(polygon, axis=0)
                        # center_x, center_y = np.mean(polygon, axis=0).astype(np.int32)
                        # text_location = [bbox[0] + 1, bbox[1] + 1,
                        #                  bbox[1] + label_size[0][0] + 1,
                        #                  bbox[0] + label_size[0][1] + 1]
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=color, thickness=1)
                        # cv2.rectangle(output_image, pt1=(int(np.min(polygon[:, 0])), int(np.min(polygon[:, 1]))),
                        #               pt2=(int(np.max(polygon[:, 0])), int(np.max(polygon[:, 1]))),
                        #               color=(0, 255, 0), thickness=1)
                        # cv2.polylines(output_image, [polygon.astype(np.int32)], True, color, thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=2, fontScale=0.5,
                        #             color=(255, 0, 0))
                        # cv2.putText(output_image, text, org=(int(bbox[0]), int(bbox[1])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.5,
                        #             color=color)

                        # show the histgram for predicted codes
                        # fig = plt.figure()
                        # plt.plot(np.arange(cfg.n_codes), c_.reshape((-1,)), color='green',
                        #          marker='o', linestyle='dashed', linewidth=2, markersize=6)
                        # plt.ylabel('Value of each coefficient')
                        # plt.xlabel('All predicted {} coefficients'.format(cfg.n_codes))
                        # plt.title('Distribution of the predicted coefficients for {}'.format(text))
                        # plt.show()

            value = [255, 255, 255]
            dst_img = cv2.addWeighted(output_image, 0.5, blend_mask, 0.5, 0)
            dst_img[blend_mask == 0] = output_image[blend_mask == 0]
            img_original = cv2.copyMakeBorder(img_original, 0, 0, 0, 10,
                                              cv2.BORDER_CONSTANT, None, value)
            img_connect = cv2.copyMakeBorder(img_connect, 0, 0, 10, 10,
                                             cv2.BORDER_CONSTANT, None, value)
            img_recon = cv2.copyMakeBorder(img_recon, 0, 0, 10, 10,
                                           cv2.BORDER_CONSTANT, None, value)
            dst_img = cv2.copyMakeBorder(dst_img, 0, 0, 10, 0,
                                         cv2.BORDER_CONSTANT, None, value)
            im_cat = np.concatenate(
                (img_original, img_connect, img_recon, dst_img), axis=1)
            # im_cat = np.concatenate((img_original, img_connect, img_recon), axis=1)
            cv2.imshow('GT:Resample:Recons:Predict', im_cat)
            if cv2.waitKey() & 0xFF == ord('q'):
                break
Esempio n. 23
0
def main():
    cfg = get_cfg()
    max_per_image = 100
    num_classes = cfg.num_classes

    print('Loading model...')
    model_name = '%s_hc%s' % (cfg.arch, cfg.head_conv)
    model, shift_buffer = load_network_arch(cfg.arch,
                                            cfg.num_classes,
                                            cfg.head_conv,
                                            pretrained=False)
    model = load_model(model,
                       cfg.model_path,
                       is_nested=False,
                       map_location='cpu')

    model = model.to(cfg.device)
    model.eval()

    debugger = Debugger(dataset=cfg.dataset, ipynb=False, theme='black')

    all_inputs = [load_and_transform_image(cfg.fn_image, cfg.img_size)]

    results = {}
    with torch.no_grad():
        img_id, inputs = all_inputs[0]

        detections = []
        for scale in [1.]:
            img_numpy = inputs[scale]['image']
            img = torch.from_numpy(img_numpy).to(cfg.device)
            output = model(img)[-1]  # array of 3
            dets = ctdet_decode(*output,
                                K=cfg.test_topk)  # torch.Size([1, 100, 6])
            dets = dets.detach().cpu().numpy().reshape(
                1, -1, dets.shape[2])[0]  # (100,6)
            # debug img uses dets prior to post_process
            add_debug_image(debugger, img_numpy, dets, output, scale)

            # print( 'meta: ', inputs[scale]['center'], inputs[scale]['scale'], inputs[scale]['fmap_w'], inputs[scale]['fmap_h'] )

            dets[:, :2] = transform_preds(
                dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'],
                (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
            dets[:, 2:4] = transform_preds(
                dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'],
                (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))

            # print( 'dets post_proc: ', dets )
            # MNV3:     [[117.8218   132.52121  227.10435  351.23346    0.854211  14.      ]]
            # resnet18: [[115.41386, 133.93118, 230.14862, 356.79816, 0.90593797]]

            cls = dets[:, -1]  # (100,)
            top_preds = {}
            for j in range(num_classes):
                inds = (cls == j)
                top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                top_preds[j + 1][:, :4] /= scale

            detections.append(top_preds)

        bbox_and_scores = {}
        for j in range(1, num_classes + 1):
            bbox_and_scores[j] = np.concatenate([d[j] for d in detections],
                                                axis=0)
            # if len(dataset.test_scales) > 1:
            # soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
        scores = np.hstack(
            [bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)])

        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, num_classes + 1):
                keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

        results[img_id] = bbox_and_scores
        # print( 'bbox_and_scores: ', bbox_and_scores )

        # show_results(debugger, image, results)
        debugger.show_all_imgs(pause=True)
Esempio n. 24
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    if cfg.dataset == 'coco':
        num_classes = 80
        colors = COCO_COLORS
        names = COCO_NAMES
    elif cfg.dataset == 'DETRAC':
        num_classes = 3
        colors = DETRAC_COLORS
        names = DETRAC_NAMES
    elif cfg.dataset == 'kins':
        num_classes = 7
        colors = KINS_COLORS
        names = KINS_NAMES
    else:
        print('Please specify correct dataset name.')
        raise NotImplementedError

    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    # Set up parameters for outputing video
    output_folder = os.path.join(cfg.root_dir, 'demo')
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    width = cfg.video_width
    height = cfg.video_height
    fps = cfg.video_fps  # output video configuration
    video_out = cv2.VideoWriter(
        os.path.join(output_folder, cfg.output_video_file),
        cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height))
    text_out = open(os.path.join(output_folder, cfg.output_text_file), 'w')
    dictionary = np.load(cfg.dictionary_file)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     nstack=2,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
                     num_classes=num_classes)
    elif 'resdcn' in cfg.arch:
        model = get_pose_resdcn(num_layers=int(cfg.arch.split('_')[-1]),
                                head_conv=64,
                                num_classes=num_classes,
                                num_codes=cfg.n_codes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading images
    speed_list = []
    frame_list = sorted(os.listdir(cfg.img_dir))
    n_frames = len(frame_list)

    for frame_id in range(n_frames):
        frame_name = frame_list[frame_id]
        image_path = os.path.join(cfg.img_dir, frame_name)

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size[0] > 0 and cfg.img_size[1] > 0:
                img_height, img_width = cfg.img_size[0], cfg.img_size[1]
                center = np.array([new_width / 2., new_height / 2.],
                                  dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size],
                                       dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2],
                                  dtype=np.float32)
                scaled_size = np.array([img_width, img_height],
                                       dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0,
                                             [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(
                COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN,
                dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD,
                            dtype=np.float32)[None, None, :]
            img = img.transpose(
                2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            imgs[scale] = {
                'image': torch.from_numpy(img).float(),
                'center': np.array(center),
                'scale': np.array(scaled_size),
                'fmap_h': np.array(img_height // 4),
                'fmap_w': np.array(img_width // 4)
            }

        with torch.no_grad():
            segmentations = []
            predicted_codes = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)
                hmap, regs, w_h_, offsets, _, _, codes = model(
                    imgs[scale]['image'])[-1]
                output = [hmap, regs, w_h_, codes, offsets]

                segms = ctsegm_scale_decode(
                    *output,
                    torch.from_numpy(dictionary.astype(np.float32)).to(
                        cfg.device),
                    K=cfg.test_topk)
                segms = segms.detach().cpu().numpy().reshape(
                    1, -1, segms.shape[2])[0]

                top_preds = {}
                code_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(
                        segms[:, 2 * j:2 * j + 2], imgs[scale]['center'],
                        imgs[scale]['scale'],
                        (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 +
                      2] = transform_preds(
                          segms[:,
                                cfg.num_vertices * 2:cfg.num_vertices * 2 + 2],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 +
                      4] = transform_preds(
                          segms[:, cfg.num_vertices * 2 +
                                2:cfg.num_vertices * 2 + 4],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 +
                                             5].astype(np.float32)
                    top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale

                segmentations.append(top_preds)
                predicted_codes.append(code_preds)

            segms_and_scores = {
                j: np.concatenate([d[j] for d in segmentations], axis=0)
                for j in range(1, num_classes + 1)
            }  # a Dict label: segments
            scores = np.hstack([
                segms_and_scores[j][:, cfg.num_vertices * 2 + 4]
                for j in range(1, num_classes + 1)
            ])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 +
                                                     4] >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]
                    # codes_and_scores[j] = codes_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            output_image = original_image
            blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8)

            counter = 1
            for lab in segms_and_scores:
                if cfg.dataset == 'coco':
                    if names[lab] not in display_cat and cfg.dataset != 'kins':
                        continue
                for idx in range(len(segms_and_scores[lab])):
                    res = segms_and_scores[lab][idx]
                    contour, bbox, score = res[:-5], res[-5:-1], res[-1]
                    bbox[0] = np.clip(bbox[0], 0, width - 1)
                    bbox[1] = np.clip(bbox[1], 0, height - 1)
                    bbox[2] = np.clip(bbox[2], 0, width - 1)
                    bbox[3] = np.clip(bbox[3], 0, height - 1)

                    polygon = contour.reshape((-1, 2))
                    polygon[:, 0] = np.clip(polygon[:, 0], 0, width - 1)
                    polygon[:, 1] = np.clip(polygon[:, 1], 0, height - 1)
                    if score > cfg.detect_thres:
                        text = names[lab] + ' %.2f' % score
                        label_size = cv2.getTextSize(text,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     0.3, 1)
                        text_location = [
                            int(bbox[0]) + 2,
                            int(bbox[1]) + 2,
                            int(bbox[0]) + 2 + label_size[0][0],
                            int(bbox[1]) + 2 + label_size[0][1]
                        ]
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=colors[lab], thickness=2)
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=nice_colors[names[lab]], thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3,
                        #             color=nice_colors[names[lab]])

                        cv2.polylines(output_image, [polygon.astype(np.int32)],
                                      True,
                                      color=nice_colors[names[lab]],
                                      thickness=2)
                        cv2.drawContours(blend_mask,
                                         [polygon.astype(np.int32)],
                                         contourIdx=-1,
                                         color=nice_colors[names[lab]],
                                         thickness=-1)

                        # add to text file
                        new_line = '{0},{1},{2:.3f},{3:.3f},{4:.3f},{5:.3f},{6:.4f}\n'.format(
                            str(frame_id + 1), counter, int(bbox[0]),
                            int(bbox[1]),
                            int(bbox[2]) - int(bbox[0]),
                            int(bbox[3]) - int(bbox[1]), score)
                        counter += 1
                        text_out.write(new_line)

            dst_img = cv2.addWeighted(output_image, 0.4, blend_mask, 0.6, 0)
            dst_img[blend_mask == 0] = output_image[blend_mask == 0]
            output_image = dst_img

            cv2.imshow('Frames', output_image)
            video_out.write(output_image)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    print('Test frame rate:', 1. / np.mean(speed_list))
Esempio n. 25
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    # Set up parameters for outputing video
    output_name = 'demo/'
    width = cfg.video_width
    height = cfg.video_height
    fps = cfg.video_fps  # output video configuration
    video_out = cv2.VideoWriter(cfg.output_video_dir,
                                cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height))
    text_out = open(cfg.output_text_dir, 'w')

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading images
    speed_list = []
    frame_list = sorted(os.listdir(cfg.img_dir))
    n_frames = len(frame_list)

    for frame_id in range(n_frames):
        frame_name = frame_list[frame_id]
        image_path = os.path.join(cfg.img_dir, frame_name)

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2], dtype=np.float32)
                scaled_size = np.array([img_width, img_height], dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :]
            img = img.transpose(2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {'image': torch.from_numpy(img).float(),
                           'center': np.array(center),
                           'scale': np.array(scaled_size),
                           'fmap_h': np.array(img_height // 4),
                           'fmap_w': np.array(img_width // 4)}

        with torch.no_grad():
            detections = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                dets = ctdet_decode(*output, K=cfg.test_topk)
                dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0]

                top_preds = {}
                dets[:, :2] = transform_preds(dets[:, 0:2],
                                              imgs[scale]['center'],
                                              imgs[scale]['scale'],
                                              (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                dets[:, 2:4] = transform_preds(dets[:, 2:4],
                                               imgs[scale]['center'],
                                               imgs[scale]['scale'],
                                               (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                cls = dets[:, -1]
                for j in range(num_classes):
                    inds = (cls == j)
                    top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                    top_preds[j + 1][:, :4] /= scale

                detections.append(top_preds)

            bbox_and_scores = {}
            for j in range(1, num_classes + 1):
                bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0)
                if len(cfg.test_scales) > 1:
                    soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
            scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                    bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            # output_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
            speed_list.append(time.time() - start_time)
            output_image = original_image

            counter = 1
            for lab in bbox_and_scores:
                if cfg.dataset == 'coco':
                    if names[lab] not in DETRAC_compatible_names:
                        continue
                for boxes in bbox_and_scores[lab]:
                    x1, y1, x2, y2, score = boxes
                    if score > cfg.detect_thres:
                        text = names[lab] + '%.2f' % score
                        label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1)
                        text_location = [x1 + 2, y1 + 2,
                                         x1 + 2 + label_size[0][0],
                                         y1 + 2 + label_size[0][1]]
                        # cv2.rectangle(output_image, pt1=(int(x1), int(y1)),
                        #               pt2=(int(x2), int(y2)),
                        #               color=colors[lab], thickness=2)
                        cv2.rectangle(output_image, pt1=(int(x1), int(y1)),
                                      pt2=(int(x2), int(y2)),
                                      color=(0, 255, 0), thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3,
                        #             color=(0, 0, 255))

                        # add to text file
                        new_line = '{0},{1},{2:.3f},{3:.3f},{4:.3f},{5:.3f},{6:.4f}\n'.format(str(frame_id + 1),
                                                                                              counter,
                                                                                              x1, y1, x2 - x1, y2 - y1,
                                                                                              score)
                        counter += 1
                        text_out.write(new_line)

            cv2.imshow('Frames'.format(frame_id), output_image)
            video_out.write(output_image)
            if cv2.waitKey(5) & 0xFF == ord('q'):
                break

    print('Test frame rate:', 1. / np.mean(speed_list))
Esempio n. 26
0
    def Evaluate(epoch, model):
        print('\n Evaluate@Epoch: %d' % epoch)

        start_time = time.clock()
        print('Start time %s Seconds' % start_time)

        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        with torch.no_grad():
            for inputs in data_loader:
                img_id, inputs, img_path = inputs[0]

                detections = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)  # (1,3)
                    output = model(
                        inputs[scale]['image'])[-1]  # hmap, regs, pxpy
                    dets = ctdet_decode(
                        *output, K=cfg.test_topk
                    )  # torch.cat([bboxes, scores, clses], dim=2)
                    dets = dets.detach().cpu().numpy().reshape(
                        1, -1, dets.shape[2])[0]

                    top_preds = {}
                    dets[:, :2] = transform_preds(
                        dets[:, 0:2], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    dets[:, 2:4] = transform_preds(
                        dets[:, 2:4], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    clses = dets[:, -1]
                    for j in range(dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                        top_preds[j + 1][:, :4] /= scale

                    detections.append(top_preds)

                bbox_and_scores = {
                    j: np.concatenate([d[j] for d in detections], axis=0)
                    for j in range(1, dataset.num_classes + 1)
                }
                scores = np.hstack([
                    bbox_and_scores[j][:, 4]
                    for j in range(1, dataset.num_classes + 1)
                ])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, dataset.num_classes + 1):
                        keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                        bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

                results[img_id] = bbox_and_scores

        end_time = time.clock()

        eval_results = dataset.run_eval(results, save_dir=cfg.ckpt_dir)
        print(eval_results)

        print('End time %s Seconds' % end_time)
        Run_time = end_time - start_time
        FPS = 100 / Run_time  # replace 100 with the number of images
        print('FPS %s ' % FPS)

        #summary_writer.add_scalar('Evaluate_mAP/mAP', eval_results[0], epoch)
        return eval_results[0]
Esempio n. 27
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4
    dictionary = np.load(cfg.dictionary_file)

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes)
    elif 'resdcn' in cfg.arch:
        model = get_pose_resdcn(num_layers=int(cfg.arch.split('_')[-1]), head_conv=64,
                                num_classes=num_classes, num_codes=cfg.n_codes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading COCO validation images
    if 'train' in cfg.data_type:
        annotation_file = '{}/annotations/instances_train2017.json'.format(cfg.data_dir)
        cfg.data_type = 'train2017'
    elif 'test' in cfg.data_type:
        annotation_file = '{}/annotations/image_info_test-dev2017.json'.format(cfg.data_dir)
        cfg.data_type = 'test2017'
    else:
        annotation_file = '{}/annotations/instances_val2017.json'.format(cfg.data_dir)
        cfg.data_type = 'val2017'
    coco = COCO(annotation_file)

    # Load all annotations
    # cats = coco.loadCats(coco.getCatIds())
    # nms = [cat['name'] for cat in cats]
    # catIds = coco.getCatIds(catNms=nms)
    # imgIds = np.sort(coco.getImgIds()).tolist()
    imgIds = coco.getImgIds()
    # annIds = coco.getAnnIds(catIds=catIds)
    # all_anns = coco.loadAnns(ids=annIds)

    for img_id in imgIds:
        img = coco.loadImgs(img_id)[0]
        image_path = '%s/coco/%s/%s' % (cfg.data_dir, cfg.data_type, img['file_name'])
        w_img = int(img['width'])
        h_img = int(img['height'])
        if w_img < 1 or h_img < 1:
            continue

        ann_ids = coco.getAnnIds(imgIds=img_id)
        gt_anns = coco.loadAnns(ids=ann_ids)

        # plot gt mean and std
        # image = cv2.imread(image_path)
        # # cv2.ellipse(image, center=(int(contour_mean[0]), int(contour_mean[1])),
        # #             axes=(int(contour_std[0]), int(contour_std[1])),
        # #             angle=0, startAngle=0, endAngle=360, color=(0, 255, 0),
        # #             thickness=2)
        # cv2.rectangle(image, pt1=(int(contour_mean[0] - contour_std[0] / 2.), int(contour_mean[1] - contour_std[1] / 2.)),
        #               pt2=(int(contour_mean[0] + contour_std[0] / 2.), int(contour_mean[1] + contour_std[1] / 2.)),
        #               color=(0, 255, 0), thickness=2)
        # cv2.polylines(image, [fixed_contour.astype(np.int32)], True, (0, 0, 255))
        # cv2.rectangle(image, pt1=(int(min(fixed_contour[:, 0])), int(min(fixed_contour[:, 1]))),
        #               pt2=(int(max(fixed_contour[:, 0])), int(max(fixed_contour[:, 1]))),
        #               color=(255, 0, 0), thickness=2)
        # cv2.imshow('GT segments', image)
        # if cv2.waitKey() & 0xFF == ord('q'):
        #     break

        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        if image is None:
            continue
        print('Loading image of id:', img_id)

        # plotting the groundtruth
        gt_image = image.copy()
        gt_blend_mask = np.zeros(shape=gt_image.shape, dtype=np.uint8)
        for ann_ in gt_anns:
            if ann_['iscrowd'] == 1:
                continue
            polygons_ = ann_['segmentation']
            use_color_key = COLOR_WORLD[random.randint(1, len(COLOR_WORLD)) - 1]
            for poly in polygons_:
                poly = np.array(poly).reshape((-1, 2))
                cv2.polylines(gt_image, [poly.astype(np.int32)], True,
                              color=switch_tuple(RGB_DICT[use_color_key]),
                              thickness=2)
                cv2.drawContours(gt_blend_mask, [poly.astype(np.int32)], contourIdx=-1,
                                 color=switch_tuple(RGB_DICT[use_color_key]),
                                 thickness=-1)

        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2], dtype=np.float32)
                scaled_size = np.array([img_width, img_height], dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :]
            img = img.transpose(2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {'image': torch.from_numpy(img).float(),
                           'center': np.array(center),
                           'scale': np.array(scaled_size),
                           'fmap_h': np.array(img_height // 4),
                           'fmap_w': np.array(img_width // 4)}

        with torch.no_grad():
            segmentations = []
            predicted_codes = []
            start_time = time.time()
            print('Start running model ......')
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)
                hmap, regs, w_h_, _, _, codes, offsets = model(imgs[scale]['image'])[-1]
                output = [hmap, regs, w_h_, codes, offsets]

                segms = ctsegm_scale_decode(*output,
                                            torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device),
                                            K=cfg.test_topk)
                segms = segms.detach().cpu().numpy().reshape(1, -1, segms.shape[2])[0]

                top_preds = {}
                code_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(segms[:, 2 * j:2 * j + 2],
                                                                imgs[scale]['center'],
                                                                imgs[scale]['scale'],
                                                                (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2] = transform_preds(
                    segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2],
                    imgs[scale]['center'],
                    imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4] = transform_preds(
                    segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4],
                    imgs[scale]['center'],
                    imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 + 5].astype(np.float32)
                    top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale

                segmentations.append(top_preds)
                predicted_codes.append(code_preds)

            segms_and_scores = {j: np.concatenate([d[j] for d in segmentations], axis=0)
                                for j in range(1, num_classes + 1)}  # a Dict label: segments
            scores = np.hstack(
                [segms_and_scores[j][:, cfg.num_vertices * 2 + 4] for j in range(1, num_classes + 1)])

            print('Image processing time {:.4f} sec, preparing output image ......'.format(time.time() - start_time))

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 + 4] >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]

            # Use opencv functions to output
            output_image = original_image
            blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8)

            counter = 1
            for lab in segms_and_scores:
                # if cfg.dataset == 'coco':
                #     if names[lab] not in display_cat and cfg.dataset != 'kins':
                #         continue
                for idx in range(len(segms_and_scores[lab])):
                    res = segms_and_scores[lab][idx]
                    contour, bbox, score = res[:-5], res[-5:-1], res[-1]
                    bbox[0] = np.clip(bbox[0], 0, width - 1)
                    bbox[1] = np.clip(bbox[1], 0, height - 1)
                    bbox[2] = np.clip(bbox[2], 0, width - 1)
                    bbox[3] = np.clip(bbox[3], 0, height - 1)

                    polygon = contour.reshape((-1, 2))
                    polygon[:, 0] = np.clip(polygon[:, 0], 0, width - 1)
                    polygon[:, 1] = np.clip(polygon[:, 1], 0, height - 1)
                    if score > cfg.detect_thres:
                        # text = names[lab] + ' %.2f' % score
                        # label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1)
                        # text_location = [int(bbox[0]) + 2, int(bbox[1]) + 2,
                        #                  int(bbox[0]) + 2 + label_size[0][0],
                        #                  int(bbox[1]) + 2 + label_size[0][1]]
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=colors[lab], thickness=2)
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=nice_colors[names[lab]], thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3,
                        #             color=nice_colors[names[lab]])

                        use_color_key = COLOR_WORLD[random.randint(1, len(COLOR_WORLD)) - 1]
                        cv2.polylines(output_image, [polygon.astype(np.int32)], True,
                                      color=switch_tuple(RGB_DICT[use_color_key]),
                                      thickness=2)
                        cv2.drawContours(blend_mask, [polygon.astype(np.int32)], contourIdx=-1,
                                         color=switch_tuple(RGB_DICT[use_color_key]),
                                         thickness=-1)

                        counter += 1

            dst_img = cv2.addWeighted(output_image, 0.4, blend_mask, 0.6, 0)
            dst_img[blend_mask == 0] = output_image[blend_mask == 0]

            gt_dst_img = cv2.addWeighted(gt_image, 0.4, gt_blend_mask, 0.6, 0)
            gt_dst_img[gt_blend_mask == 0] = gt_image[gt_blend_mask == 0]

            cat_image = np.concatenate([dst_img, gt_dst_img], axis=1)

            cv2.imshow('Frames', cat_image)

            if cv2.waitKey() & 0xFF == ord('q'):
                break
Esempio n. 28
0
def main():
  logger = create_logger(save_dir=cfg.log_dir)
  print = logger.info
  print(cfg)

  cfg.device = torch.device('cuda')
  torch.backends.cudnn.benchmark = False

  max_per_image = 100
  
  Dataset_eval = Damage_eval # your own data set

  # Crack RE Spalling
  dataset = Dataset_eval(cfg.data_dir, split='val', test_scales=cfg.test_scales, test_flip=cfg.test_flip) # split test
  
  data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False,
                                            num_workers=1, pin_memory=True,
                                            collate_fn=dataset.collate_fn)
                                            
  print('Creating model...')
  if 'hourglass' in cfg.arch:
    model = get_hourglass[cfg.arch]
  elif 'resdcn' in cfg.arch:
    model = get_pose_net_resdcn(num_layers=18, head_conv=64, num_classes=3)
  elif cfg.arch == 'resnet':
    model = get_pose_net(num_layers=18, head_conv=64, num_classes=3) 
  elif cfg.arch == 'res_CBAM':
    model = get_pose_net_resnet_CBAM(num_layers=18, head_conv=64, num_classes=3)
  elif cfg.arch == 'resnet_PAM':
    model = get_pose_net_resnet_PAM(num_layers=18, head_conv=64, num_classes=3)
  elif cfg.arch == 'resnet_SE':
    model = get_pose_net_resnet_SE(num_layers=18, head_conv=64, num_classes=3)

  model = load_model(model, cfg.pretrain_dir)
  model = model.to(cfg.device)
  model.eval()

  results = {}
  with torch.no_grad():
    for inputs in tqdm(data_loader):
      img_id, inputs,img_path = inputs[0]
      print('id%s ',img_id)
      
      detections = []
      for scale in inputs:
        inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device)

        output = model(inputs[scale]['image'])[-1]
        dets = ctdet_decode(*output, K=cfg.test_topk) 
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0]

        top_preds = {}
        dets[:, :2] = transform_preds(dets[:, 0:2],  
                                      inputs[scale]['center'],
                                      inputs[scale]['scale'],
                                      (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
        dets[:, 2:4] = transform_preds(dets[:, 2:4],
                                       inputs[scale]['center'],
                                       inputs[scale]['scale'],
                                       (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
        cls = dets[:, -1]
        for j in range(dataset.num_classes):
          inds = (cls == j)
          top_preds[j + 1] = dets[inds, :5].astype(np.float32) 
          top_preds[j + 1][:, :4] /= scale
        
        detections.append(top_preds)

      bbox_and_scores = {}
      for j in range(1, dataset.num_classes + 1):
        bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0)
        if len(dataset.test_scales) > 1:
          soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
      scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, dataset.num_classes + 1)])

      if len(scores) > max_per_image: 
        kth = len(scores) - max_per_image
        thresh = np.partition(scores, kth)[kth]
        for j in range(1, dataset.num_classes + 1):
          keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
          bbox_and_scores[j] = bbox_and_scores[j][keep_inds] 

      images_test = cv2.imread(img_path)
      fig = plt.figure(0) 
      colors = COCO_COLORS
      names = COCO_NAMES
      #cv2.imwrite('E:/test1.png',images_test)
      
      plt.imshow(cv2.cvtColor(images_test, cv2.COLOR_BGR2RGB))
      for lab in bbox_and_scores: 
        for boxes in bbox_and_scores[lab]: 
          x1, y1, x2, y2, score = boxes
          if (x1 < 0):
            x1 = 0
          if (y1 < 0):
            y1 = 0
          if (x2 > 511):
            x2 = 511
          if (y2 > 511):
            y2 = 511
          
          if score > 0.2:
            plt.gca().add_patch(Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor=colors[lab], facecolor='none'))
            plt.text(x1 -12 , y1 - 12 , names[lab], bbox=dict(facecolor=colors[lab], alpha=0.5), fontsize=7, color='k')
      
      fig.patch.set_visible(False)
      Save_dir = 'data/damage/Predict_images' # save images
      Image_name = img_path[-10:] 
      Save_dir = os.path.join(Save_dir, Image_name)
      plt.axis('off')
      plt.savefig(Save_dir, dpi=400, transparent=True, bbox_inches="tight", pad_inches=0.1) # 保存
      plt.close(0) 

      results[img_id] = bbox_and_scores 

  eval_results = dataset.run_eval(results, cfg.ckpt_dir)
  print(eval_results)
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4
    dictionary = np.load(cfg.dictionary_file)

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     nstack=2,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
                     num_classes=num_classes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading COCO validation images
    annotation_file = '{}/annotations/instances_{}.json'.format(
        cfg.data_dir, cfg.data_type)
    coco = COCO(annotation_file)

    # Load all annotations
    imgIds = coco.getImgIds()

    det_results = []
    seg_results = []

    for img_id in imgIds:
        img = coco.loadImgs(img_id)[0]
        image_path = '%s/images/%s/%s' % (cfg.data_dir, cfg.data_type,
                                          img['file_name'])
        w_img = int(img['width'])
        h_img = int(img['height'])
        if w_img < 1 or h_img < 1:
            continue

        image = cv2.imread(image_path)
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.],
                                  dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size],
                                       dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2],
                                  dtype=np.float32)
                scaled_size = np.array([img_width, img_height],
                                       dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0,
                                             [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(
                COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN,
                dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD,
                            dtype=np.float32)[None, None, :]
            img = img.transpose(
                2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {
                'image': torch.from_numpy(img).float(),
                'center': np.array(center),
                'scale': np.array(scaled_size),
                'fmap_h': np.array(img_height // 4),
                'fmap_w': np.array(img_width // 4)
            }

        with torch.no_grad():
            # print('In with no_grads()')
            segmentations = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                segms = ctsegm_decode(*output,
                                      torch.from_numpy(
                                          dictionary.astype(np.float32)).to(
                                              cfg.device),
                                      K=cfg.test_topk)
                segms = segms.detach().cpu().numpy().reshape(
                    1, -1, segms.shape[2])[0]

                top_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(
                        segms[:, 2 * j:2 * j + 2], imgs[scale]['center'],
                        imgs[scale]['scale'],
                        (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 +
                                             1].astype(np.float32)
                    top_preds[j + 1][:, :cfg.num_vertices * 2] /= scale

                segmentations.append(top_preds)

            segms_and_scores = {
                j: np.concatenate([d[j] for d in segmentations], axis=0)
                for j in range(1, num_classes + 1)
            }  # a Dict label: segments
            scores = np.hstack([
                segms_and_scores[j][:, cfg.num_vertices * 2]
                for j in range(1, num_classes + 1)
            ])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2]
                                 >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]

            # generate coco results for server eval
            # print('generate coco results for server eval ...')
            for lab in segms_and_scores:
                for res in segms_and_scores[lab]:
                    poly, score = res[:-1], res[-1]
                    recon_contour = poly.reshape((-1, 2))
                    recon_contour[:, 0] = np.clip(recon_contour[:, 0], 0,
                                                  img_width - 1)
                    recon_contour[:, 1] = np.clip(recon_contour[:, 1], 0,
                                                  img_height - 1)
                    category_id = int(COCO_IDS[lab - 1])
                    if score > cfg.detect_thres:
                        x1, y1, x2, y2 = int(min(recon_contour[:, 0])), int(min(recon_contour[:, 1])), \
                                         int(max(recon_contour[:, 0])), int(max(recon_contour[:, 1]))
                        bbox = [x1, y1, x2 - x1, y2 - y1]
                        det = {
                            'image_id': int(img_id),
                            'category_id': int(category_id),
                            'score': float("{:.2f}".format(score)),
                            'bbox': bbox
                        }
                        det_results.append(det)

                        # convert polygons to rle masks
                        poly = np.ndarray.flatten(
                            recon_contour,
                            order='C').tolist()  # row major flatten
                        rles = cocomask.frPyObjects([poly], img_height,
                                                    img_width)
                        rle = cocomask.merge(rles)
                        m = cocomask.decode(rle)
                        rle_new = encode_mask(m.astype(np.uint8))

                        seg = {
                            'image_id': int(img_id),
                            'category_id': int(category_id),
                            'score': float("{:.2f}".format(score)),
                            'segmentation': rle_new
                        }
                        seg_results.append(seg)

    with open(
            '{}/coco_result/{}_det_results_v{}.json'.format(
                cfg.root_dir, cfg.data_type, cfg.num_vertices), 'w') as f_det:
        json.dump(det_results, f_det)
    with open(
            '{}/coco_result/{}_seg_results_v{}.json'.format(
                cfg.root_dir, cfg.data_type, cfg.num_vertices), 'w') as f_seg:
        json.dump(seg_results, f_seg)

    # run COCO detection evaluation
    print('Running COCO detection val17 evaluation ...')
    coco_pred = coco.loadRes('{}/coco_result/{}_det_results_v{}.json'.format(
        cfg.root_dir, cfg.data_type, cfg.num_vertices))
    imgIds = sorted(coco.getImgIds())
    coco_eval = COCOeval(coco, coco_pred, 'bbox')
    coco_eval.params.imgIds = imgIds
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    print(
        '---------------------------------------------------------------------------------'
    )
    print('Running COCO segmentation val17 evaluation ...')
    coco_pred = coco.loadRes('{}/coco_result/{}_seg_results_v{}.json'.format(
        cfg.root_dir, cfg.data_type, cfg.num_vertices))
    coco_eval = COCOeval(coco, coco_pred, 'segm')
    coco_eval.params.imgIds = imgIds
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
    def val_map(epoch):
        print('\n Val@Epoch: %d' % epoch)
        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        input_scales = {}
        speed_list = []
        with torch.no_grad():
            for inputs in val_loader:
                img_id, inputs = inputs[0]
                start_image_time = time.time()
                segmentations = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device)
                    if scale == 1. and img_id not in input_scales.keys():  # keep track of the input image Sizes
                        _, _, input_h, input_w = inputs[scale]['image'].shape
                        input_scales[img_id] = {'h': input_h, 'w': input_w}

                    output = model(inputs[scale]['image'])[-1]

                    segms = ctsegm_decode(*output, torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device),
                                          K=cfg.test_topk)
                    segms = segms.detach().cpu().numpy().reshape(1, -1, segms.shape[2])[0]

                    top_preds = {}
                    for j in range(cfg.n_vertices):
                        segms[:, 2 * j:2 * j + 2] = transform_preds(segms[:, 2 * j:2 * j + 2],
                                                                    inputs[scale]['center'],
                                                                    inputs[scale]['scale'],
                                                                    (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))

                    clses = segms[:, -1]
                    for j in range(val_dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = segms[inds, :cfg.n_vertices * 2 + 1].astype(np.float32)
                        top_preds[j + 1][:, :cfg.n_vertices * 2] /= scale

                    segmentations.append(top_preds)

                end_image_time = time.time()
                segms_and_scores = {j: np.concatenate([d[j] for d in segmentations], axis=0)
                                    for j in range(1, val_dataset.num_classes + 1)}
                scores = np.hstack([segms_and_scores[j][:, cfg.n_vertices * 2] for j in range(1, val_dataset.num_classes + 1)])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, val_dataset.num_classes + 1):
                        keep_inds = (segms_and_scores[j][:, cfg.n_vertices * 2] >= thresh)
                        segms_and_scores[j] = segms_and_scores[j][keep_inds]

                results[img_id] = segms_and_scores
                # print(segms_and_scores)
                # exit()
                speed_list.append(end_image_time - start_image_time)

        eval_results = val_dataset.run_eval(results, input_scales, save_dir=cfg.ckpt_dir)
        print(eval_results)
        summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch)
        print('Average speed on val set:{:.2f}'.format(1. / np.mean(speed_list)))