Ejemplo n.º 1
0
    def post_processing(self, output, padding_info, resize):
        """
        返回值:[batch, n, 6]
        格式:x1,y1,x2,y2,confidence,class_id
        """
        dets = ctdet_decode(*output)
        res = []

        for det in dets:
            labels = det[:, -1]
            conf = torch.sigmoid(det[:, 4])
            box_array = det[:, :4] * cfg.down_ratio  # 还原到网络的输入图像尺寸

            labels = labels.detach().cpu().numpy()
            conf = conf.detach().cpu().numpy()
            box_array = box_array.detach().cpu().numpy()

            box_array = (box_array - padding_info) / resize  # 还原到原始尺寸
            bboxes = np.zeros((0, 6))
            for class_index in range(1, len(class_names)):
                cls_argwhere = labels == class_index
                ll_max_id = labels[cls_argwhere].reshape(-1, 1)
                scores = conf[cls_argwhere].reshape(-1, 1)
                box_array_new = box_array[cls_argwhere, :]
                bboxes = np.vstack(
                    (bboxes, np.hstack((box_array_new, scores, ll_max_id))))
            res.append(bboxes)
        return res
Ejemplo n.º 2
0
    def val_map(epoch):
        print('\n Val@Epoch: %d' % epoch)
        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        with torch.no_grad():
            for inputs in val_loader:
                img_id, inputs = inputs[0]

                detections = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)
                    output = model(inputs[scale]['image'])[-1]

                    dets = ctdet_decode(*output, K=cfg.test_topk)
                    dets = dets.detach().cpu().numpy().reshape(
                        1, -1, dets.shape[2])[0]

                    top_preds = {}
                    dets[:, :2] = transform_preds(
                        dets[:, 0:2], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    dets[:, 2:4] = transform_preds(
                        dets[:, 2:4], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    clses = dets[:, -1]
                    for j in range(val_dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                        top_preds[j + 1][:, :4] /= scale

                    detections.append(top_preds)

                bbox_and_scores = {
                    j: np.concatenate([d[j] for d in detections], axis=0)
                    for j in range(1, val_dataset.num_classes + 1)
                }
                scores = np.hstack([
                    bbox_and_scores[j][:, 4]
                    for j in range(1, val_dataset.num_classes + 1)
                ])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, val_dataset.num_classes + 1):
                        keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                        bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

                results[img_id] = bbox_and_scores

        eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir)
        print(eval_results)
        summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch)
Ejemplo n.º 3
0
    def Evaluate(epoch, model):
        print('\n Evaluate@Epoch: %d' % epoch)

        start_time = time.clock()
        print('Start time %s Seconds' % start_time)

        model.eval()
        torch.cuda.empty_cache()
        max_per_image = 100

        results = {}
        with torch.no_grad():
            for inputs in data_loader:
                img_id, inputs, img_path = inputs[0]

                detections = []
                for scale in inputs:
                    inputs[scale]['image'] = inputs[scale]['image'].to(
                        cfg.device)  # (1,3)
                    output = model(
                        inputs[scale]['image'])[-1]  # hmap, regs, pxpy
                    dets = ctdet_decode(
                        *output, K=cfg.test_topk
                    )  # torch.cat([bboxes, scores, clses], dim=2)
                    dets = dets.detach().cpu().numpy().reshape(
                        1, -1, dets.shape[2])[0]

                    top_preds = {}
                    dets[:, :2] = transform_preds(
                        dets[:, 0:2], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    dets[:, 2:4] = transform_preds(
                        dets[:, 2:4], inputs[scale]['center'],
                        inputs[scale]['scale'],
                        (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                    clses = dets[:, -1]
                    for j in range(dataset.num_classes):
                        inds = (clses == j)
                        top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                        top_preds[j + 1][:, :4] /= scale

                    detections.append(top_preds)

                bbox_and_scores = {
                    j: np.concatenate([d[j] for d in detections], axis=0)
                    for j in range(1, dataset.num_classes + 1)
                }
                scores = np.hstack([
                    bbox_and_scores[j][:, 4]
                    for j in range(1, dataset.num_classes + 1)
                ])
                if len(scores) > max_per_image:
                    kth = len(scores) - max_per_image
                    thresh = np.partition(scores, kth)[kth]
                    for j in range(1, dataset.num_classes + 1):
                        keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                        bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

                results[img_id] = bbox_and_scores

        end_time = time.clock()

        eval_results = dataset.run_eval(results, save_dir=cfg.ckpt_dir)
        print(eval_results)

        print('End time %s Seconds' % end_time)
        Run_time = end_time - start_time
        FPS = 100 / Run_time  # replace 100 with the number of images
        print('FPS %s ' % FPS)

        #summary_writer.add_scalar('Evaluate_mAP/mAP', eval_results[0], epoch)
        return eval_results[0]
Ejemplo n.º 4
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    # Set up parameters for outputing video
    output_name = 'demo/'
    width = cfg.video_width
    height = cfg.video_height
    fps = cfg.video_fps  # output video configuration
    video_out = cv2.VideoWriter(cfg.output_video_dir,
                                cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height))
    text_out = open(cfg.output_text_dir, 'w')

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading images
    speed_list = []
    frame_list = sorted(os.listdir(cfg.img_dir))
    n_frames = len(frame_list)

    for frame_id in range(n_frames):
        frame_name = frame_list[frame_id]
        image_path = os.path.join(cfg.img_dir, frame_name)

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2], dtype=np.float32)
                scaled_size = np.array([img_width, img_height], dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :]
            img = img.transpose(2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {'image': torch.from_numpy(img).float(),
                           'center': np.array(center),
                           'scale': np.array(scaled_size),
                           'fmap_h': np.array(img_height // 4),
                           'fmap_w': np.array(img_width // 4)}

        with torch.no_grad():
            detections = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                dets = ctdet_decode(*output, K=cfg.test_topk)
                dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0]

                top_preds = {}
                dets[:, :2] = transform_preds(dets[:, 0:2],
                                              imgs[scale]['center'],
                                              imgs[scale]['scale'],
                                              (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                dets[:, 2:4] = transform_preds(dets[:, 2:4],
                                               imgs[scale]['center'],
                                               imgs[scale]['scale'],
                                               (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                cls = dets[:, -1]
                for j in range(num_classes):
                    inds = (cls == j)
                    top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                    top_preds[j + 1][:, :4] /= scale

                detections.append(top_preds)

            bbox_and_scores = {}
            for j in range(1, num_classes + 1):
                bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0)
                if len(cfg.test_scales) > 1:
                    soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
            scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                    bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            # output_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
            speed_list.append(time.time() - start_time)
            output_image = original_image

            counter = 1
            for lab in bbox_and_scores:
                if cfg.dataset == 'coco':
                    if names[lab] not in DETRAC_compatible_names:
                        continue
                for boxes in bbox_and_scores[lab]:
                    x1, y1, x2, y2, score = boxes
                    if score > cfg.detect_thres:
                        text = names[lab] + '%.2f' % score
                        label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1)
                        text_location = [x1 + 2, y1 + 2,
                                         x1 + 2 + label_size[0][0],
                                         y1 + 2 + label_size[0][1]]
                        # cv2.rectangle(output_image, pt1=(int(x1), int(y1)),
                        #               pt2=(int(x2), int(y2)),
                        #               color=colors[lab], thickness=2)
                        cv2.rectangle(output_image, pt1=(int(x1), int(y1)),
                                      pt2=(int(x2), int(y2)),
                                      color=(0, 255, 0), thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3,
                        #             color=(0, 0, 255))

                        # add to text file
                        new_line = '{0},{1},{2:.3f},{3:.3f},{4:.3f},{5:.3f},{6:.4f}\n'.format(str(frame_id + 1),
                                                                                              counter,
                                                                                              x1, y1, x2 - x1, y2 - y1,
                                                                                              score)
                        counter += 1
                        text_out.write(new_line)

            cv2.imshow('Frames'.format(frame_id), output_image)
            video_out.write(output_image)
            if cv2.waitKey(5) & 0xFF == ord('q'):
                break

    print('Test frame rate:', 1. / np.mean(speed_list))
Ejemplo n.º 5
0
def main():
  logger = create_logger(save_dir=cfg.log_dir)
  print = logger.info
  print(cfg)

  cfg.device = torch.device('cuda')
  torch.backends.cudnn.benchmark = False

  max_per_image = 100
  
  Dataset_eval = Damage_eval # your own data set

  # Crack RE Spalling
  dataset = Dataset_eval(cfg.data_dir, split='val', test_scales=cfg.test_scales, test_flip=cfg.test_flip) # split test
  
  data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False,
                                            num_workers=1, pin_memory=True,
                                            collate_fn=dataset.collate_fn)
                                            
  print('Creating model...')
  if 'hourglass' in cfg.arch:
    model = get_hourglass[cfg.arch]
  elif 'resdcn' in cfg.arch:
    model = get_pose_net_resdcn(num_layers=18, head_conv=64, num_classes=3)
  elif cfg.arch == 'resnet':
    model = get_pose_net(num_layers=18, head_conv=64, num_classes=3) 
  elif cfg.arch == 'res_CBAM':
    model = get_pose_net_resnet_CBAM(num_layers=18, head_conv=64, num_classes=3)
  elif cfg.arch == 'resnet_PAM':
    model = get_pose_net_resnet_PAM(num_layers=18, head_conv=64, num_classes=3)
  elif cfg.arch == 'resnet_SE':
    model = get_pose_net_resnet_SE(num_layers=18, head_conv=64, num_classes=3)

  model = load_model(model, cfg.pretrain_dir)
  model = model.to(cfg.device)
  model.eval()

  results = {}
  with torch.no_grad():
    for inputs in tqdm(data_loader):
      img_id, inputs,img_path = inputs[0]
      print('id%s ',img_id)
      
      detections = []
      for scale in inputs:
        inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device)

        output = model(inputs[scale]['image'])[-1]
        dets = ctdet_decode(*output, K=cfg.test_topk) 
        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0]

        top_preds = {}
        dets[:, :2] = transform_preds(dets[:, 0:2],  
                                      inputs[scale]['center'],
                                      inputs[scale]['scale'],
                                      (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
        dets[:, 2:4] = transform_preds(dets[:, 2:4],
                                       inputs[scale]['center'],
                                       inputs[scale]['scale'],
                                       (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
        cls = dets[:, -1]
        for j in range(dataset.num_classes):
          inds = (cls == j)
          top_preds[j + 1] = dets[inds, :5].astype(np.float32) 
          top_preds[j + 1][:, :4] /= scale
        
        detections.append(top_preds)

      bbox_and_scores = {}
      for j in range(1, dataset.num_classes + 1):
        bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0)
        if len(dataset.test_scales) > 1:
          soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
      scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, dataset.num_classes + 1)])

      if len(scores) > max_per_image: 
        kth = len(scores) - max_per_image
        thresh = np.partition(scores, kth)[kth]
        for j in range(1, dataset.num_classes + 1):
          keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
          bbox_and_scores[j] = bbox_and_scores[j][keep_inds] 

      images_test = cv2.imread(img_path)
      fig = plt.figure(0) 
      colors = COCO_COLORS
      names = COCO_NAMES
      #cv2.imwrite('E:/test1.png',images_test)
      
      plt.imshow(cv2.cvtColor(images_test, cv2.COLOR_BGR2RGB))
      for lab in bbox_and_scores: 
        for boxes in bbox_and_scores[lab]: 
          x1, y1, x2, y2, score = boxes
          if (x1 < 0):
            x1 = 0
          if (y1 < 0):
            y1 = 0
          if (x2 > 511):
            x2 = 511
          if (y2 > 511):
            y2 = 511
          
          if score > 0.2:
            plt.gca().add_patch(Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor=colors[lab], facecolor='none'))
            plt.text(x1 -12 , y1 - 12 , names[lab], bbox=dict(facecolor=colors[lab], alpha=0.5), fontsize=7, color='k')
      
      fig.patch.set_visible(False)
      Save_dir = 'data/damage/Predict_images' # save images
      Image_name = img_path[-10:] 
      Save_dir = os.path.join(Save_dir, Image_name)
      plt.axis('off')
      plt.savefig(Save_dir, dpi=400, transparent=True, bbox_inches="tight", pad_inches=0.1) # 保存
      plt.close(0) 

      results[img_id] = bbox_and_scores 

  eval_results = dataset.run_eval(results, cfg.ckpt_dir)
  print(eval_results)
Ejemplo n.º 6
0
def main():
    cfg = get_cfg()
    max_per_image = 100
    num_classes = cfg.num_classes

    print('Loading model...')
    model_name = '%s_hc%s' % (cfg.arch, cfg.head_conv)
    model, shift_buffer = load_network_arch(cfg.arch,
                                            cfg.num_classes,
                                            cfg.head_conv,
                                            pretrained=False)
    model = load_model(model,
                       cfg.model_path,
                       is_nested=False,
                       map_location='cpu')

    model = model.to(cfg.device)
    model.eval()

    debugger = Debugger(dataset=cfg.dataset, ipynb=False, theme='black')

    all_inputs = [load_and_transform_image(cfg.fn_image, cfg.img_size)]

    results = {}
    with torch.no_grad():
        img_id, inputs = all_inputs[0]

        detections = []
        for scale in [1.]:
            img_numpy = inputs[scale]['image']
            img = torch.from_numpy(img_numpy).to(cfg.device)
            output = model(img)[-1]  # array of 3
            dets = ctdet_decode(*output,
                                K=cfg.test_topk)  # torch.Size([1, 100, 6])
            dets = dets.detach().cpu().numpy().reshape(
                1, -1, dets.shape[2])[0]  # (100,6)
            # debug img uses dets prior to post_process
            add_debug_image(debugger, img_numpy, dets, output, scale)

            # print( 'meta: ', inputs[scale]['center'], inputs[scale]['scale'], inputs[scale]['fmap_w'], inputs[scale]['fmap_h'] )

            dets[:, :2] = transform_preds(
                dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'],
                (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
            dets[:, 2:4] = transform_preds(
                dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'],
                (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))

            # print( 'dets post_proc: ', dets )
            # MNV3:     [[117.8218   132.52121  227.10435  351.23346    0.854211  14.      ]]
            # resnet18: [[115.41386, 133.93118, 230.14862, 356.79816, 0.90593797]]

            cls = dets[:, -1]  # (100,)
            top_preds = {}
            for j in range(num_classes):
                inds = (cls == j)
                top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                top_preds[j + 1][:, :4] /= scale

            detections.append(top_preds)

        bbox_and_scores = {}
        for j in range(1, num_classes + 1):
            bbox_and_scores[j] = np.concatenate([d[j] for d in detections],
                                                axis=0)
            # if len(dataset.test_scales) > 1:
            # soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
        scores = np.hstack(
            [bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)])

        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, num_classes + 1):
                keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

        results[img_id] = bbox_and_scores
        # print( 'bbox_and_scores: ', bbox_and_scores )

        # show_results(debugger, image, results)
        debugger.show_all_imgs(pause=True)
Ejemplo n.º 7
0
class CenterNet(object):
    def __init__(self, cfg):
        self.cfg = cfg
        if cfg.arch == 'resnet50':
            self.model = get_pose_net(50, 64, cfg.num_classes)
        else:
            self.model = get_hourglass(cfg.arch, num_classes=cfg.num_classes)

        if cfg.pretrained_weights is not None:
            weight_file = os.path.join(cfg.save_folder, cfg.pretrained_weights)
            load_model(self.model, weight_file)
            print("load pretrain mode:{}".format(weight_file))

        if cfg.num_gpu > 1 and torch.cuda.is_available():
            self.model = torch.nn.DataParallel(self.model).cuda()
        else:
            self.model = self.model.to(cfg.device)
        self.save_folder = cfg.ckpt_dir
        self.optim = optim.Adam(self.model.parameters(), lr=cfg.lr)
        self.scheduler = optim.lr_scheduler.ExponentialLR(self.optim, gamma=0.99)

    def train(self, data_counts, data_loader, eval_loder, n_epochs):
        max_map = 0.28
        for epoch in range(n_epochs):
            evaluator = self.train_epoch(data_counts, data_loader, eval_loder, epoch, n_epochs)
            stats = evaluator.coco_eval['bbox'].stats
            eval_map = stats[0]
            if eval_map > max_map:
                max_map = eval_map
                ckpt_path = os.path.join(self.save_folder, 'centernet_Epoch{0}_map{1}.pth'.format(epoch, max_map))
                torch.save(self.model.state_dict(), ckpt_path)
                print('weights {0} saved success!'.format(ckpt_path))
            self.scheduler.step()

    def train_epoch(self, data_counts, data_loader, eval_loder, epoch, n_epochs):
        with tqdm.tqdm(total=data_counts, desc=f'Epoch {epoch}/{n_epochs}', unit='img', ncols=150) as pbar:
            step = 0
            for batch in data_loader:
                step += 1
                load_t0 = time.time()
                for k in batch:
                    batch[k] = batch[k].to(device=self.cfg.device, non_blocking=True)

                outputs = self.model(batch['image'])
                hmap, regs, w_h_ = zip(*outputs)
                regs = [_tranpose_and_gather_feature(r, batch['inds']) for r in regs]
                w_h_ = [_tranpose_and_gather_feature(r, batch['inds']) for r in w_h_]

                hmap_loss = _neg_loss(hmap, batch['hmap'])
                reg_loss = _reg_loss(regs, batch['regs'], batch['ind_masks'])
                w_h_loss = _reg_loss(w_h_, batch['w_h_'], batch['ind_masks'])
                loss = hmap_loss + 1 * reg_loss + 0.1 * w_h_loss

                self.model.zero_grad()
                loss.backward()
                self.optim.step()

                load_t1 = time.time()
                batch_time = load_t1 - load_t0
                pbar.set_postfix(**{'hmap_loss': hmap_loss.item(),
                                    'reg_loss': reg_loss.item(),
                                    'w_h_loss': w_h_loss.item(),
                                    'LR': self.optim.param_groups[0]['lr'],
                                    'Batchtime': batch_time})
                pbar.update(batch['image'].shape[0])

        cons_acc = self._evaluate(eval_loder)
        return cons_acc

    @torch.no_grad()
    def _evaluate(self, data_loader):
        coco = convert_to_coco_api(data_loader.dataset, bbox_fmt='coco')
        coco_evaluator = CocoEvaluator(coco, iou_types=["bbox"], bbox_fmt='coco')

       if self.cfg.arch == 'resnet50':
            eval_net = get_pose_net(50, 64, self.cfg.num_classes)
        else:
            eval_net = get_hourglass(self.cfg.arch, num_classes=self.cfg.num_classes, is_training=False)
            
        if self.cfg.num_gpu > 1 and torch.cuda.is_available():
            eval_net = torch.nn.DataParallel(eval_net).cuda()
        else:
            eval_net = eval_net.to(self.cfg.device)
        eval_net.load_state_dict(self.model.state_dict())
        eval_net = eval_net.to(self.cfg.device)
        eval_net.eval()

        for inputs, targets in data_loader:
            targets = [{k: v.to(self.cfg.device) for k, v in t.items()} for t in targets]
            model_input = torch.stack(inputs, 0)
            model_input = model_input.to(self.cfg.device)
            output = eval_net(model_input)[-1]
            dets = ctdet_decode(*output, K=self.cfg.test_topk)
            # dets = dets.detach().cpu().numpy()
            res = {}
            for target, det in zip(targets, dets):
                labels = det[:, -1]
                scores = det[:, 4]
                boxes = det[:, :4]
                boxes[..., 2:] = boxes[..., 2:] - boxes[..., :2]  # Transform [x1, y1, x2, y2] to [x1, y1, w, h]
                boxes = boxes.reshape((boxes.shape[0], 1, 4))
                res[target["image_id"].item()] = {
                    "boxes": boxes,
                    "scores": scores,
                    "labels": labels,
                }
                coco_evaluator.update(res)

        coco_evaluator.synchronize_between_processes()
        coco_evaluator.accumulate()
        coco_evaluator.summarize()
        del eval_net
        return coco_evaluator
Ejemplo n.º 8
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100

    image = cv2.imread(cfg.img_dir)
    # orig_image = image
    height, width = image.shape[0:2]
    padding = 127 if 'hourglass' in cfg.arch else 31
    imgs = {}
    for scale in cfg.test_scales:
        new_height = int(height * scale)
        new_width = int(width * scale)

        if cfg.img_size > 0:
            img_height, img_width = cfg.img_size, cfg.img_size
            center = np.array([new_width / 2., new_height / 2.],
                              dtype=np.float32)
            scaled_size = max(height, width) * 1.0
            scaled_size = np.array([scaled_size, scaled_size],
                                   dtype=np.float32)
        else:
            img_height = (new_height | padding) + 1
            img_width = (new_width | padding) + 1
            center = np.array([new_width // 2, new_height // 2],
                              dtype=np.float32)
            scaled_size = np.array([img_width, img_height], dtype=np.float32)

        img = cv2.resize(image, (new_width, new_height))
        trans_img = get_affine_transform(center, scaled_size, 0,
                                         [img_width, img_height])
        img = cv2.warpAffine(img, trans_img, (img_width, img_height))

        img = img.astype(np.float32) / 255.
        img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else VOC_MEAN,
                        dtype=np.float32)[None, None, :]
        img /= np.array(COCO_STD if cfg.dataset == 'coco' else VOC_STD,
                        dtype=np.float32)[None, None, :]
        img = img.transpose(2, 0,
                            1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

        if cfg.test_flip:
            img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

        imgs[scale] = {
            'image': torch.from_numpy(img).float(),
            'center': np.array(center),
            'scale': np.array(scaled_size),
            'fmap_h': np.array(img_height // 4),
            'fmap_w': np.array(img_width // 4)
        }

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    elif 'resdcn' in cfg.arch:
        model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]),
                             num_classes=80 if cfg.dataset == 'coco' else 20)
    else:
        raise NotImplementedError

    model = load_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    with torch.no_grad():
        detections = []
        for scale in imgs:
            imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

            output = model(imgs[scale]['image'])[-1]
            dets = ctdet_decode(*output, K=cfg.test_topk)
            dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0]

            top_preds = {}
            dets[:, :2] = transform_preds(
                dets[:, 0:2], imgs[scale]['center'], imgs[scale]['scale'],
                (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
            dets[:, 2:4] = transform_preds(
                dets[:, 2:4], imgs[scale]['center'], imgs[scale]['scale'],
                (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
            cls = dets[:, -1]
            for j in range(80):
                inds = (cls == j)
                top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                top_preds[j + 1][:, :4] /= scale

            detections.append(top_preds)

        bbox_and_scores = {}
        for j in range(1, 81 if cfg.dataset == 'coco' else 21):
            bbox_and_scores[j] = np.concatenate([d[j] for d in detections],
                                                axis=0)
            if len(cfg.test_scales) > 1:
                soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
        scores = np.hstack([
            bbox_and_scores[j][:, 4]
            for j in range(1, 81 if cfg.dataset == 'coco' else 21)
        ])

        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, 81 if cfg.dataset == 'coco' else 21):
                keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

        # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        # plt.show()
        fig = plt.figure(0)
        colors = COCO_COLORS if cfg.dataset == 'coco' else VOC_COLORS
        names = COCO_NAMES if cfg.dataset == 'coco' else VOC_NAMES
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        for lab in bbox_and_scores:
            for boxes in bbox_and_scores[lab]:
                x1, y1, x2, y2, score = boxes
                if score > 0.3:
                    plt.gca().add_patch(
                        Rectangle((x1, y1),
                                  x2 - x1,
                                  y2 - y1,
                                  linewidth=2,
                                  edgecolor=colors[lab],
                                  facecolor='none'))
                    plt.text(x1 + 3,
                             y1 + 3,
                             names[lab] + '%.2f' % score,
                             bbox=dict(facecolor=colors[lab], alpha=0.5),
                             fontsize=7,
                             color='k')

        fig.patch.set_visible(False)
        plt.axis('off')
        plt.savefig('data/demo_results.png', dpi=300, transparent=True)
        plt.show()
Ejemplo n.º 9
0
def main():
    # Create Test set labels for DETRAC
    detrac_root = cfg.label_dir
    dataType = 'Test'
    test_images = list()
    test_objects = list()

    annotation_folder = 'DETRAC-{}-Annotations-XML'.format(dataType)
    annotation_path = os.path.join(detrac_root, annotation_folder)
    if not os.path.exists(annotation_path):
        print('annotation_path not exist')
        raise FileNotFoundError

    label_file = os.path.join(annotation_path, cfg.video_name + '.xml')
    tree = ET.parse(label_file)
    root = tree.getroot()
    object_list = list()

    Box_dict = {}
    for obj in root.iter('frame'):
        boxes = list()
        frame_num = int(obj.attrib['num'])
        target_list = obj.find('target_list')
        for target in target_list:
            bbox = target.find('box').attrib
            left = float(bbox['left'])
            top = float(bbox['top'])
            width = float(bbox['width'])
            height = float(bbox['height'])
            boxes.append([left, top, left + width,
                          top + height])  # x1, y1, x2, y2

        Box_dict[frame_num] = boxes

    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 150
    num_classes = 80 if cfg.dataset == 'coco' else 4

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    # Set up parameters for outputing video
    width = cfg.video_width
    height = cfg.video_height
    fps = cfg.video_fps  # output video configuration
    video_out = cv2.VideoWriter(
        os.path.join(cfg.root_dir, cfg.video_name + '_compare.mkv'),
        cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height))

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     nstack=2,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
                     num_classes=num_classes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading images
    speed_list = []
    frame_list = sorted(os.listdir(os.path.join(cfg.img_dir, cfg.video_name)))
    n_frames = len(frame_list)

    for frame_id in range(n_frames):
        frame_n = frame_id + 1
        frame_name = frame_list[frame_id]
        image_path = os.path.join(cfg.img_dir, cfg.video_name, frame_name)

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.],
                                  dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size],
                                       dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2],
                                  dtype=np.float32)
                scaled_size = np.array([img_width, img_height],
                                       dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0,
                                             [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(
                COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN,
                dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD,
                            dtype=np.float32)[None, None, :]
            img = img.transpose(
                2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {
                'image': torch.from_numpy(img).float(),
                'center': np.array(center),
                'scale': np.array(scaled_size),
                'fmap_h': np.array(img_height // 4),
                'fmap_w': np.array(img_width // 4)
            }

        with torch.no_grad():
            detections = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                dets = ctdet_decode(*output, K=cfg.test_topk)
                dets = dets.detach().cpu().numpy().reshape(
                    1, -1, dets.shape[2])[0]

                top_preds = {}
                dets[:, :2] = transform_preds(
                    dets[:, 0:2], imgs[scale]['center'], imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                dets[:, 2:4] = transform_preds(
                    dets[:, 2:4], imgs[scale]['center'], imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                cls = dets[:, -1]
                for j in range(num_classes):
                    inds = (cls == j)
                    top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                    top_preds[j + 1][:, :4] /= scale

                detections.append(top_preds)

            bbox_and_scores = {}
            for j in range(1, num_classes + 1):
                bbox_and_scores[j] = np.concatenate([d[j] for d in detections],
                                                    axis=0)
                if len(cfg.test_scales) > 1:
                    soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
            scores = np.hstack(
                [bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                    bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            speed_list.append(time.time() - start_time)
            output_image = original_image

            # Plot the GT boxes
            gt_bboxes = Box_dict[frame_n]
            for rect in gt_bboxes:
                x1, y1, x2, y2 = float(rect[0]), float(rect[1]), float(
                    rect[2]), float(rect[3])
                cv2.rectangle(output_image,
                              pt1=(int(x1), int(y1)),
                              pt2=(int(x2), int(y2)),
                              color=(0, 255, 0),
                              thickness=2)

            counter = 1
            for lab in bbox_and_scores:
                if cfg.dataset == 'coco':
                    if names[lab] not in DETRAC_compatible_names:
                        continue
                for boxes in bbox_and_scores[lab]:
                    x1, y1, x2, y2, score = boxes
                    if score > cfg.detect_thres:
                        text = names[lab] + '%.2f' % score
                        label_size = cv2.getTextSize(text,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     0.3, 1)
                        text_location = [
                            x1 + 2, y1 + 2, x1 + 2 + label_size[0][0],
                            y1 + 2 + label_size[0][1]
                        ]
                        cv2.rectangle(output_image,
                                      pt1=(int(x1), int(y1)),
                                      pt2=(int(x2), int(y2)),
                                      color=(0, 0, 255),
                                      thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3,
                        #             color=(0, 0, 255))

            cv2.imshow('Frames'.format(frame_id), output_image)
            video_out.write(output_image)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    print('Test frame rate:', 1. / np.mean(speed_list))
def main():
    logger = create_logger(save_dir=cfg.log_dir)
    print = logger.info
    print(cfg)

    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100

    Dataset_eval = COCO_eval if cfg.dataset == 'coco' else PascalVOC_eval
    dataset = Dataset_eval(cfg.data_dir,
                           split='val',
                           img_size=cfg.img_size,
                           test_scales=cfg.test_scales,
                           test_flip=cfg.test_flip)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=1,
                                              pin_memory=False,
                                              collate_fn=dataset.collate_fn)

    print('Creating model...')
    if 'hourglass' in cfg.arch:
        model = get_hourglass[cfg.arch]
    elif 'resdcn' in cfg.arch:
        model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]),
                             num_classes=dataset.num_classes)
    else:
        raise NotImplementedError

    model = load_model(model, cfg.pretrain_dir)
    model = model.to(cfg.device)
    model.eval()

    results = {}
    with torch.no_grad():
        for inputs in data_loader:
            img_id, inputs = inputs[0]

            detections = []
            for scale in inputs:
                inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device)

                output = model(inputs[scale]['image'])[-1]
                dets = ctdet_decode(*output, K=cfg.test_topk)
                dets = dets.detach().cpu().numpy().reshape(
                    1, -1, dets.shape[2])[0]

                top_preds = {}
                dets[:, :2] = transform_preds(
                    dets[:,
                         0:2], inputs[scale]['center'], inputs[scale]['scale'],
                    (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                dets[:, 2:4] = transform_preds(
                    dets[:,
                         2:4], inputs[scale]['center'], inputs[scale]['scale'],
                    (inputs[scale]['fmap_w'], inputs[scale]['fmap_h']))
                cls = dets[:, -1]
                for j in range(dataset.num_classes):
                    inds = (cls == j)
                    top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                    top_preds[j + 1][:, :4] /= scale

                detections.append(top_preds)

            bbox_and_scores = {}
            for j in range(1, dataset.num_classes + 1):
                bbox_and_scores[j] = np.concatenate([d[j] for d in detections],
                                                    axis=0)
                if len(dataset.test_scales) > 1:
                    soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
            scores = np.hstack([
                bbox_and_scores[j][:, 4]
                for j in range(1, dataset.num_classes + 1)
            ])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, dataset.num_classes + 1):
                    keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                    bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

            results[img_id] = bbox_and_scores

    eval_results = dataset.run_eval(results, cfg.ckpt_dir)
    print(eval_results)