Exemple #1
0
def main():
    args = parse_args()
    Messages = post_messages(args.msgrest)

    cfg = mmcv.Config.fromfile(args.cfg)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)
    # build the dataloader
    cfg.pred_dir = args.image_dir
    cfg.data.pred.img_prefix = args.image_dir
    cfg.data.pred.ann_file = os.path.join(args.image_dir,
                                          'ImageSets/Main/test.txt')
    dataset = build_dataset(cfg.data.pred)
    data_loader = build_dataloader(dataset,
                                   imgs_per_gpu=cfg.data.imgs_per_gpu,
                                   workers_per_gpu=cfg.data.workers_per_gpu,
                                   dist=distributed,
                                   shuffle=False)
    model = build_detector(cfg.model,
                           train_cfg=cfg.train_cfg,
                           test_cfg=cfg.test_cfg)
    model.eval()
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fg16_model(model)
    checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
    if 'CLASSES' in checkpoint['meta']:
        model.CLASSES = checkpoint['meta']['CLASSES']
    else:
        model.CLASSES = dataset.CLASSES
    outputs = []
    pred = []
    model = MMDataParallel(model, device_ids=[args.gpu])
    prog_bar = mmcv.ProgressBar(len(dataset))
    with torch.no_grad():
        for i, data in enumerate(data_loader):
            IDs = []
            file_list = []
            output_tmp = model(return_loss=True, **data)
            outputs += [output_tmp]
            for j in range(len(data['img_meta'].data[0])):
                IDs += [data['img_meta'].data[0][j]['IDs']]
                file_list += [data['img_meta'].data[0][j]['filename']]
            pred = align_box_and_type_color(data, output_tmp, IDs, file_list,
                                            pred)
            for _ in range(2):
                prog_bar.update()
    if not (args.output_dir is None):
        write_dir = args.output_dir
    else:
        write_dir = os.path.join('./work_dirs', 'pred_type_color.pth')
    with open(write_dir, 'w') as f:
        json.dump(pred, f)
    print('file saved at: ', write_dir)
    if args.msgrest:
        print("send http message")
        msg = {}
        msg['type'] = "15"  #distiguish picture
        msg['taskid'] = args.taskid
        msg['index'] = '1'
        msg['label_info'] = pred
        msg['total'] = '1'
        data_json = json.dumps(msg)
        Messages.post(data_json)
Exemple #2
0
def make_lmdb(data_path,
              lmdb_path,
              img_path_list,
              keys,
              batch=5000,
              compress_level=1,
              multiprocessing_read=False,
              n_thread=40):
    """Make lmdb.

    Contents of lmdb. The file structure is:
    example.lmdb
    ├── data.mdb
    ├── lock.mdb
    ├── meta_info.txt

    The data.mdb and lock.mdb are standard lmdb files and you can refer to
    https://lmdb.readthedocs.io/en/release/ for more details.

    The meta_info.txt is a specified txt file to record the meta information
    of our datasets. It will be automatically created when preparing
    datasets by our provided dataset tools.
    Each line in the txt file records 1)image name (with extension),
    2)image shape, and 3)compression level, separated by a white space.

    For example, the meta information could be:
    `000_00000000.png (720,1280,3) 1`, which means:
    1) image name (with extension): 000_00000000.png;
    2) image shape: (720,1280,3);
    3) compression level: 1

    We use the image name without extension as the lmdb key.

    If `multiprocessing_read` is True, it will read all the images to memory
    using multiprocessing. Thus, your server needs to have enough memory.

    Args:
        data_path (str): Data path for reading images.
        lmdb_path (str): Lmdb save path.
        img_path_list (str): Image path list.
        keys (str): Used for lmdb keys.
        batch (int): After processing batch images, lmdb commits.
            Default: 5000.
        compress_level (int): Compress level when encoding images. Default: 1.
        multiprocessing_read (bool): Whether use multiprocessing to read all
            the images to memory. Default: False.
        n_thread (int): For multiprocessing.
    """
    assert len(img_path_list) == len(keys), (
        'img_path_list and keys should have the same length, '
        f'but got {len(img_path_list)} and {len(keys)}')
    print(f'Create lmdb for {data_path}, save to {lmdb_path}...')
    print(f'Total images: {len(img_path_list)}')
    if not lmdb_path.endswith('.lmdb'):
        raise ValueError("lmdb_path must end with '.lmdb'.")
    if osp.exists(lmdb_path):
        print(f'Folder {lmdb_path} already exists. Exit.')
        sys.exit(1)

    if multiprocessing_read:
        # read all the images to memory (multiprocessing)
        dataset = {}  # use dict to keep the order for multiprocessing
        shapes = {}
        print(f'Read images with multiprocessing, #thread: {n_thread} ...')
        prog_bar = mmcv.ProgressBar(len(img_path_list))

        def callback(arg):
            """get the image data and update prog_bar."""
            key, dataset[key], shapes[key] = arg
            prog_bar.update()

        pool = Pool(n_thread)
        for path, key in zip(img_path_list, keys):
            pool.apply_async(read_img_worker,
                             args=(osp.join(data_path,
                                            path), key, compress_level),
                             callback=callback)
        pool.close()
        pool.join()
        print(f'Finish reading {len(img_path_list)} images.')

    # create lmdb environment
    # obtain data size for one image
    img = mmcv.imread(osp.join(data_path, img_path_list[0]), flag='unchanged')
    _, img_byte = cv2.imencode('.png', img,
                               [cv2.IMWRITE_PNG_COMPRESSION, compress_level])
    data_size_per_img = img_byte.nbytes
    print('Data size per image is: ', data_size_per_img)
    data_size = data_size_per_img * len(img_path_list)
    env = lmdb.open(lmdb_path, map_size=data_size * 10)

    # write data to lmdb
    prog_bar = mmcv.ProgressBar(len(img_path_list))
    txn = env.begin(write=True)
    txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w')
    for idx, (path, key) in enumerate(zip(img_path_list, keys)):
        prog_bar.update()
        key_byte = key.encode('ascii')
        if multiprocessing_read:
            img_byte = dataset[key]
            h, w, c = shapes[key]
        else:
            _, img_byte, img_shape = read_img_worker(osp.join(data_path, path),
                                                     key, compress_level)
            h, w, c = img_shape

        txn.put(key_byte, img_byte)
        # write meta information
        txt_file.write(f'{key}.png ({h},{w},{c}) {compress_level}\n')
        if idx % batch == 0:
            txn.commit()
            txn = env.begin(write=True)
    txn.commit()
    env.close()
    txt_file.close()
    print('\nFinish writing lmdb.')
def extract_inception_features(dataloader,
                               inception,
                               num_samples,
                               inception_style='pytorch'):
    """Extract inception features for FID metric.

    Args:
        dataloader (:obj:`DataLoader`): Dataloader for images.
        inception (nn.Module): Inception network.
        num_samples (int): The number of samples to be extracted.
        inception_style (str): The style of Inception network, "pytorch" or
            "stylegan". Defaults to "pytorch".

    Returns:
        torch.Tensor: Inception features.
    """
    batch_size = dataloader.batch_size
    num_iters = num_samples // batch_size
    if num_iters * batch_size < num_samples:
        num_iters += 1
    # define mmcv progress bar
    pbar = mmcv.ProgressBar(num_iters)

    feature_list = []
    curr_iter = 1
    for data in dataloader:
        # a dirty walkround to support multiple datasets (mainly for the
        # unconditional dataset and conditional dataset). In our
        # implementation, unconditioanl dataset will return real images with
        # the key "real_img". However, the conditional dataset contains a key
        # "img" denoting the real images.
        if 'real_img' in data:
            # Mainly for the unconditional dataset in our MMGeneration
            img = data['real_img']
        else:
            # Mainly for conditional dataset in MMClassification
            img = data['img']
        pbar.update()

        # the inception network is not wrapped with module wrapper.
        if not is_module_wrapper(inception):
            # put the img to the module device
            img = img.to(get_module_device(inception))

        if inception_style == 'stylegan':
            img = (img * 127.5 + 128).clamp(0, 255).to(torch.uint8)
            feature = inception(img, return_features=True)
        else:
            feature = inception(img)[0].view(img.shape[0], -1)
        feature_list.append(feature.to('cpu'))

        if curr_iter >= num_iters:
            break
        curr_iter += 1

    # Attention: the number of features may be different as you want.
    features = torch.cat(feature_list, 0)

    assert features.shape[0] >= num_samples
    features = features[:num_samples]

    # to change the line after pbar
    sys.stdout.write('\n')
    return features
Exemple #4
0
def single_test_json(model,
                     data_loader,
                     post_processor,
                     save_json_file,
                     show=True,
                     show_path=None,
                     debug_f=True,
                     gt_ann_path=None):
    """
    :param model: model
    :param data_loader: data_loader
    :param post_processor:  use this to generate bbox from mask
    :param save_json_file:  the path of json file.
    :return:
    """
    # first get the pred, then get the bboxes from masks
    # use post_processor to filter the bboxes and then save in json file.
    # masks are generated by a set of data augmentation functions.
    model.eval()
    # get the img_infos from dataset,
    dataset = data_loader.dataset
    img_prefix = dataset.img_prefix
    img_norm_cfg = dataset.img_norm_cfg
    prog_bar = mmcv.ProgressBar(len(dataset))
    if debug_f:
        assert osp.isfile(gt_ann_path)
        with open(gt_ann_path, 'r', encoding='utf-8') as f:
            eval_gt_annotations = json.loads(f.read(),
                                             object_pairs_hook=OrderedDict)
    imgs_bboxes_results = {}

    if show and (show_path is not None and not osp.isdir(show_path)):
        os.mkdir(show_path)

    for i, data in enumerate(data_loader):
        """ get the name, height, width from data['img_meta'] 
            for multi-scale test, img_meta contains several img_meta
        """
        with torch.no_grad():
            # can change this to True.
            result = model(return_loss=False, rescale=True, **data)
        # deal with the mask and post processing here.
        # as masks are fit to the original imgs, just reload the original imgs
        #
        if isinstance(result, tuple):
            bbox_result, segm_result = result
        else:
            bbox_result, segm_result = result, None
        img_tensor = data['img'][0]  # for aug test data['img'] is a list.
        img_metas = data['img_meta'][0].data[0]  # datacontainer, return ._data
        filename = img_metas[0]['filename']
        img_name = osp.splitext(filename)[0]
        # for eval.
        img_name = img_name.replace('gt_', 'res_')
        imgs = tensor2imgs(img_tensor, **img_norm_cfg)
        assert len(imgs) == len(img_metas)
        img_meta_0 = img_metas[0]
        vs_bbox_result = np.vstack(bbox_result)
        if segm_result is None:
            pred_bboxes, pred_bbox_scores = [], []
        else:
            if isinstance(segm_result, tuple):
                segm_scores = segm_result[-1]
                segms = mmcv.concat_list(segm_result[0])
            else:
                segm_scores = np.asarray(vs_bbox_result[:, -1])
                segms = mmcv.concat_list(segm_result)

            pred_bboxes, pred_bbox_scores = post_processor.process(
                segms,
                segm_scores,
                mask_shape=img_meta_0['ori_shape'],
                scale_factor=(1.0, 1.0))
        # save the results.
        single_pred_results = []
        for pred_bbox, pred_bbox_score in zip(pred_bboxes, pred_bbox_scores):
            pred_bbox = np.asarray(pred_bbox).reshape((-1, 2)).astype(np.int32)
            pred_bbox = pred_bbox.tolist()
            single_bbox_dict = {
                "points": pred_bbox,
                "confidence": float(pred_bbox_score)
            }
            single_pred_results.append(single_bbox_dict)
        imgs_bboxes_results[img_name] = single_pred_results

        if show:
            img = cv2.imread(osp.join(img_prefix, filename))
            for idx in range(len(single_pred_results)):
                bbox = np.asarray(single_pred_results[idx]["points"]).reshape(
                    -1, 2).astype(np.int64)
                cv2.drawContours(img, [bbox], -1, (0, 255, 0), 2)
            if debug_f and eval_gt_annotations is not None:
                gt_annos = eval_gt_annotations[img_name]
                for gt_idx in range(len(gt_annos)):
                    gt_bbox = np.asarray(gt_annos[gt_idx]["points"]).reshape(
                        -1, 2).astype(np.int64)
                    if gt_annos[gt_idx]["illegibility"]:
                        # if ignore red
                        color = (255, 0, 0)
                    else:  # if not ignore blue
                        color = (0, 0, 255)
                    cv2.drawContours(img, [gt_bbox], -1, color, 2)
            cv2.imwrite(osp.join(show_path, filename), img)

        batch_size = data['img'][0].size(0)
        for _ in range(batch_size):
            prog_bar.update()
        # print the postmodule pics.
    with open(save_json_file, 'w+', encoding='utf-8') as f:
        json.dump(imgs_bboxes_results, f)
Exemple #5
0
def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    show_score_thr=0.3):
    model.eval()
    results = []
    dataset = data_loader.dataset
    PALETTE = getattr(dataset, 'PALETTE', None)
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)

        batch_size = len(result)
        if show or out_dir:
            if batch_size == 1 and isinstance(data['img'][0], torch.Tensor):
                img_tensor = data['img'][0]
            else:
                img_tensor = data['img'][0].data[0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result[i],
                                         bbox_color=PALETTE,
                                         text_color=PALETTE,
                                         mask_color=PALETTE,
                                         show=show,
                                         out_file=out_file,
                                         score_thr=show_score_thr)

        # encode mask results
        if isinstance(result[0], tuple):
            result = [(bbox_results, encode_mask_results(mask_results))
                      for bbox_results, mask_results in result]
        # This logic is only used in panoptic segmentation test.
        elif isinstance(result[0], dict) and 'ins_results' in result[0]:
            for j in range(len(result)):
                bbox_results, mask_results = result[j]['ins_results']
                result[j]['ins_results'] = (bbox_results,
                                            encode_mask_results(mask_results))

        results.extend(result)

        for _ in range(batch_size):
            prog_bar.update()
    return results
Exemple #6
0
def my_make_lmdb(mode, data_path, lmdb_path, batch=5000, compress_level=1):
    """Create lmdb for the Vimeo90K dataset.(do not need training list)

    Contents of lmdb. The file structure is:
    example.lmdb
    ├── data.mdb
    ├── lock.mdb
    ├── meta_info.txt

    The data.mdb and lock.mdb are standard lmdb files and you can refer to
    https://lmdb.readthedocs.io/en/release/ for more details.

    The meta_info.txt is a specified txt file to record the meta information
    of our datasets. It will be automatically created when preparing
    datasets by our provided dataset tools.
    Each line in the txt file records 1)image name (with extension),
    2)image shape, and 3)compression level, separated by a white space.

    For example, the meta information could be:
    `000_00000000.png (720,1280,3) 1`, which means:
    1) image name (with extension): 000_00000000.png;
    2) image shape: (720,1280,3);
    3) compression level: 1

    We use the image name without extension as the lmdb key.

    Args:
        mode (str): Dataset mode. 'gt' or 'lq'.
        data_path (str): Data path for reading images.
        lmdb_path (str): Lmdb save path.
        batch (int): After processing batch images, lmdb commits.
            Default: 5000.
        compress_level (int): Compress level when encoding images. Default: 1.
    """

    print(f'Create lmdb for {data_path}, save to {lmdb_path}...')
    if mode == 'gt':
        h_dst, w_dst = 256, 448
    else:
        h_dst, w_dst = 64, 112

    if osp.exists(lmdb_path):
        print(f'Folder {lmdb_path} already exists. Exit.')
        sys.exit(1)

    print('Reading image path list ...')
    train_list = []
    for filedir in os.listdir(data_path):
        train_list.append(filedir)

    all_img_list = []
    keys = []
    for line in train_list:
        folder, sub_folder = line.split('_')
        for j in range(0, 7):
            all_img_list.append(osp.join(data_path, line,
                                         f'frame_0000{j}.png'))
            keys.append('{}_{}_{}'.format(folder, sub_folder, j + 1))
    all_img_list = sorted(all_img_list)
    keys = sorted(keys)

    if mode == 'gt':  # only read the 4th frame for the gt mode
        print('Only keep the 4th frame for gt mode.')
        all_img_list = [v for v in all_img_list if v.endswith('3.png')]
        keys = [v for v in keys if v.endswith('_4')]

    # create lmdb environment
    # obtain data size for one image
    img = mmcv.imread(osp.join(data_path, all_img_list[0]), flag='unchanged')
    _, img_byte = cv2.imencode('.png', img,
                               [cv2.IMWRITE_PNG_COMPRESSION, compress_level])
    data_size_per_img = img_byte.nbytes
    print('Data size per image is: ', data_size_per_img)
    data_size = data_size_per_img * len(all_img_list)
    env = lmdb.open(lmdb_path, map_size=data_size * 10)

    # write data to lmdb
    pbar = mmcv.ProgressBar(len(all_img_list))
    txn = env.begin(write=True)
    txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w')
    for idx, (path, key) in enumerate(zip(all_img_list, keys)):
        pbar.update()
        key_byte = key.encode('ascii')
        img = mmcv.imread(osp.join(data_path, path), flag='unchanged')
        h, w, c = img.shape
        _, img_byte = cv2.imencode(
            '.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level])
        assert h == h_dst and w == w_dst and c == 3, f'Wrong shape ({h, w}), should be ({h_dst, w_dst}).'
        txn.put(key_byte, img_byte)
        # write meta information
        txt_file.write(f'{key}.png ({h},{w},{c}) {compress_level}\n')
        if idx % batch == 0:
            txn.commit()
            txn = env.begin(write=True)
    txn.commit()
    env.close()
    txt_file.close()
    print('\nFinish writing lmdb.')
Exemple #7
0
    def after_train_iter(self, runner):
        """The behavior after each train iteration.

        Args:
            runner (``mmcv.runner.BaseRunner``): The runner.
        """
        interval = self.get_current_interval(runner)
        if not self.every_n_iters(runner, interval):
            return

        runner.model.eval()
        source_domain = runner.model.module.get_other_domains(
            self.target_domain)[0]
        # feed real images
        max_num_images = max(metric.num_images for metric in self.metrics)
        for metric in self.metrics:
            if metric.num_real_feeded >= metric.num_real_need:
                continue
            mmcv.print_log(f'Feed reals to {metric.name} metric.', 'mmgen')
            # feed in real images
            for data in self.dataloader:
                # key for translation model
                if f'img_{self.target_domain}' in data:
                    reals = data[f'img_{self.target_domain}']
                # key for conditional GAN
                else:
                    raise KeyError(
                        'Cannot found key for images in data_dict. ')
                num_feed = metric.feed(reals, 'reals')
                if num_feed <= 0:
                    break

        mmcv.print_log(f'Sample {max_num_images} fake images for evaluation',
                       'mmgen')

        rank, ws = get_dist_info()

        # define mmcv progress bar
        if rank == 0:
            pbar = mmcv.ProgressBar(max_num_images)

        # feed in fake images
        for data in self.dataloader:
            # key for translation model
            if f'img_{source_domain}' in data:
                with torch.no_grad():
                    output_dict = runner.model(
                        data[f'img_{source_domain}'],
                        test_mode=True,
                        target_domain=self.target_domain,
                        **self.sample_kwargs)
                fakes = output_dict['target']
            # key Error
            else:
                raise KeyError('Cannot found key for images in data_dict. ')
            # sampling fake images and directly send them to metrics
            # pbar update number for one proc
            num_update = 0
            for metric in self.metrics:
                if metric.num_fake_feeded >= metric.num_fake_need:
                    continue
                num_feed = metric.feed(fakes, 'fakes')
                num_update = max(num_update, num_feed)
                if num_feed <= 0:
                    break

            if rank == 0:
                if num_update > 0:
                    pbar.update(num_update * ws)

        runner.log_buffer.clear()
        # a dirty walkround to change the line at the end of pbar
        if rank == 0:
            sys.stdout.write('\n')
            for metric in self.metrics:
                with torch.no_grad():
                    metric.summary()
                for name, val in metric._result_dict.items():
                    runner.log_buffer.output[name] = val

                    # record best metric and save the best ckpt
                    if self.save_best_ckpt and name in self.best_metric:
                        self._save_best_ckpt(runner, val, name)

            runner.log_buffer.ready = True
        runner.model.train()

        # clear all current states for next evaluation
        for metric in self.metrics:
            metric.clear()
Exemple #8
0
        sequence_info = sequence.sequence_info
        min_frame_idx = sequence.min_frame_idx
        max_frame_idx = sequence.max_frame_idx
        image_size = sequence.image_size
        print(f'[{i + 1}/{num_seqs}] Processing sequence {sequence_name} ...')
        if args.save_video:
            # create video writer
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            save_video_path = os.path.join(cfg.output_dir,
                                           f'{sequence_name}.avi')
            writer = cv2.VideoWriter(save_video_path, fourcc, 20,
                                     (image_size[1], image_size[0]))

        videotracker.init_tracker()

        prog_bar = mmcv.ProgressBar(max_frame_idx)
        for frame_idx in range(min_frame_idx, max_frame_idx + 1):
            frame_info = sequence_info[frame_idx]
            videotracker.step(frame_info)
            prog_bar.update()

        results = videotracker.get_results()

        print('\n')
        output_file = os.path.join(cfg.output_dir, f'{sequence_name}.txt')
        with open(output_file, 'w') as f:
            for frame_idx, frame_result in results.items():
                if args.save_video:
                    img = cv2.imread(sequence_info[frame_idx].filename)
                for row in frame_result:
                    print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' %
Exemple #9
0
def main():
    args = parse_args()
    config_file1 = './swa/swa_cascade_rcnn_r50_rfp_sac_iou_alldata-v3_e15/swa_cascade_rcnn_r50_rfp_sac_iou_alldata-v3_e15.py'  #
    checkpoint_file1 = './swa/swa_cascade_rcnn_r50_rfp_sac_iou_alldata-v3_e15/swa_model_12.pth'
    config_file2 = './swa/cascade_rcnn_r50_rfp_sac_iou_ls_alldata-v3_e15.py'
    checkpoint_file2 = './swa/epoch_15.pth'

    device = 'cuda:0'
    cfg1 = Config.fromfile(config_file1)
    cfg2 = Config.fromfile(config_file2)
    # build model
    # model1
    model1 = build_detector(cfg1.model, test_cfg=cfg1.get('test_cfg'))
    load_checkpoint(model1, checkpoint_file1, map_location=device)
    # model2
    model2 = build_detector(cfg2.model, test_cfg=cfg2.get('test_cfg'))
    load_checkpoint(model2, checkpoint_file2, map_location=device)

    test_json_raw = json.load(open(cfg1.data.test.ann_file))
    imgid2name = {}
    for imageinfo in test_json_raw['images']:
        imgid = imageinfo['id']
        imgid2name[imageinfo['file_name']] = imgid
    wrap_fp16_model(model1)  # 采用fp16加速预测
    wrap_fp16_model(model2)

    # build the dataloader
    samples_per_gpu = cfg1.data.test.pop('samples_per_gpu',
                                         1)  # aug_test不支持batch_size>1
    dataset = build_dataset(cfg1.data.test)
    data_loader = build_dataloader(dataset,
                                   samples_per_gpu=samples_per_gpu,
                                   workers_per_gpu=4,
                                   dist=False,
                                   shuffle=False)
    model1 = MMDataParallel(model1, device_ids=[0])  # 为啥加?(不加就错了)
    model2 = MMDataParallel(model2, device_ids=[0])
    model1.eval()
    model2.eval()

    json_results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result1 = model1(return_loss=False, rescale=True, **data)
            result2 = model2(return_loss=False, rescale=True, **data)
        batch_size = len(result1)
        assert len(result1) == len(result2)

        result1 = result1[0]  # 每次只输入一张
        result2 = result2[0]
        img_metas = data['img_metas'][0].data[0]
        img_shape = img_metas[0]['ori_shape']
        bboxes, scores, labels = post_predictions(result1, img_shape)
        e_bboxes, e_scores, e_labels = post_predictions(result2, img_shape)
        bboxes_list = [bboxes, e_bboxes]
        scores_list = [scores, e_scores]
        labels_list = [labels, e_labels]
        bboxes, scores, labels = weighted_boxes_fusion(bboxes_list,
                                                       scores_list,
                                                       labels_list,
                                                       weights=[1, 1],
                                                       iou_thr=0.6,
                                                       skip_box_thr=0.0001,
                                                       conf_type='max')
        # basename = img_metas[0]['ori_filename']
        # image = cv2.imread(os.path.join(cfg.data.test.img_prefix, basename))
        for (box, score, label) in zip(bboxes, scores, labels):
            xmin, ymin, xmax, ymax = box.tolist()
            xmin, ymin, xmax, ymax = round(
                float(xmin) * img_shape[1],
                2), round(float(ymin) * img_shape[0],
                          2), round(float(xmax) * img_shape[1],
                                    2), round(float(ymax) * img_shape[0], 2)
            data = dict()
            data['image_id'] = imgid2name[img_metas[0]['ori_filename']]
            data['bbox'] = [xmin, ymin, xmax - xmin, ymax - ymin]
            data['score'] = float(score)
            data['category_id'] = label + 1
            json_results.append(data)
        for _ in range(batch_size):
            prog_bar.update()
    mmcv.dump(json_results, args.jsonfile)
Exemple #10
0
def multi_gpu_test(model,
                   data_loader,
                   tmpdir=None,
                   gpu_collect=False,
                   bbox_head=None):
    """Test model with multiple gpus.

    This method tests model with multiple gpus and collects the results
    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
    it encodes results to gpu tensors and use gpu communication for results
    collection. On cpu mode it saves the results on different gpus to 'tmpdir'
    and collects them by the rank 0 worker.

    Args:
        model (nn.Module): Model to be tested.
        data_loader (nn.Dataloader): Pytorch data loader.
        tmpdir (str): Path of directory to save the temporary results from
            different gpus under cpu mode.
        gpu_collect (bool): Option to use either gpu or cpu to collect results.

    Returns:
        list: The prediction results.
    """
    model.eval()
    results = []
    dataset = data_loader.dataset
    rank, world_size = get_dist_info()
    if rank == 0:
        prog_bar = mmcv.ProgressBar(len(dataset))
    time.sleep(2)  # This line can prevent deadlock problem in some cases.
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)
            if bbox_head.type == 'LSHead':
                if bbox_head.task == 'bbox':
                    extremes = result.pop(-1)
                    result = result[0]
                elif bbox_head.task == 'segm':
                    bbox_results, poly_results = result
                    img_metas = data['img_metas'][0].data[0]
                    ori_h, ori_w = img_metas[0]['ori_shape'][:-1]
                    encoded_poly_results = encode_poly_results(
                        poly_results, ori_h, ori_w)
                    result = bbox_results, encoded_poly_results
            elif isinstance(result, tuple):
                bbox_results, mask_results = result
                encoded_mask_results = encode_mask_results(mask_results)
                result = bbox_results, encoded_mask_results
        results.append(result)

        if rank == 0:
            batch_size = len(data['img_metas'][0].data)
            for _ in range(batch_size * world_size):
                prog_bar.update()

    # collect results from all ranks
    if gpu_collect:
        results = collect_results_gpu(results, len(dataset))
    else:
        results = collect_results_cpu(results, len(dataset), tmpdir)
    return results
Exemple #11
0
    def __init__(self,
                 release_version,
                 imageset,
                 save_vis=False,
                 show_pseudomask=False,
                 encode='centerness',
                 heatmap_rate=0.5,
                 factor=4,
                 method='min_area',
                 multi_processing=False,
                 num_processor=4):
        self.release_version = release_version
        self.imageset = imageset
        self.encode = encode
        self.factor = factor
        self.method = method

        self.save_dir_names = {
            'centerness': 'reverse_centerness_seg',
            'gaussian': 'reverse_gaussian_seg',
            'ellipse': 'reverse_ellipse_seg'
        }

        self.imgDir = './data/{}/{}/coco/{}/'.format(core_dataset,
                                                     self.release_version,
                                                     self.imageset)
        self.annFile = './data/{}/{}/coco/annotations/{}.json'.format(
            core_dataset, self.release_version, "_".join(ann_file_name))

        self.save_vis = save_vis
        self.show_pseudomask = show_pseudomask

        self.save_path = './data/{}/{}/{}/{}'.format(
            core_dataset, self.release_version, self.imageset,
            self.save_dir_names[self.encode])
        self.save_vis_path = './data/{}/{}/{}/pseudomask_vis'.format(
            core_dataset, self.release_version, self.imageset)

        mmcv.mkdir_or_exist(self.save_path)
        mmcv.mkdir_or_exist(self.save_vis_path)

        self.heatmap_rate = heatmap_rate

        self.gaussian_image = 255 - generate_gaussian_image(
            512, 512, 2.5, threshold=int(self.heatmap_rate * 255))
        self.centerness_image = 255 - generate_centerness_image(
            512,
            512,
            factor=self.factor,
            threshold=int(self.heatmap_rate * 255))
        self.ellipse_image = 255 - generate_ellipse_image(
            512, 512, threshold=int(self.heatmap_rate * 255))

        self.anchor_image = {
            'centerness': self.centerness_image,
            'gaussian': self.gaussian_image,
            'ellipse': self.ellipse_image
        }

        self.coco = COCO(self.annFile)
        self.catIds = self.coco.getCatIds(catNms=[''])
        self.imgIds = self.coco.getImgIds(catIds=self.catIds)
        self.progress_bar = mmcv.ProgressBar(len(self.imgIds))
        self.multi_processing = multi_processing
        self.pool = Pool(num_processor)
Exemple #12
0
def single_gpu_test(model,
                    data_loader,
                    bbox_head=None,
                    show=False,
                    out_dir=None,
                    show_score_thr=0.3):
    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False,
                           rescale=True,
                           show=show,
                           out_dir=out_dir,
                           **data)

        if show or out_dir:
            img_tensor = data['img'][0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for img, img_meta in zip(imgs, img_metas):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result,
                                         show=show,
                                         out_file=out_file,
                                         score_thr=show_score_thr)

        if bbox_head.type == 'LSHead':
            if bbox_head.task == 'bbox':
                extremes = result.pop(-1)
                result = result[0]
            elif bbox_head.task == 'segm':
                bbox_results, poly_results = result
                img_metas = data['img_metas'][0].data[0]
                ori_h, ori_w = img_metas[0]['ori_shape'][:-1]
                encoded_poly_results = encode_poly_results(
                    poly_results, ori_h, ori_w)
                result = bbox_results, encoded_poly_results
        elif isinstance(result, tuple):
            bbox_results, mask_results = result
            encoded_mask_results = encode_mask_results(mask_results)
            result = bbox_results, encoded_mask_results
        results.append(result)

        batch_size = len(data['img_metas'][0].data)
        for _ in range(batch_size):
            prog_bar.update()
    return results
Exemple #13
0
def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    show_score_thr=0.3):
    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)

        if show or out_dir:
            img_tensor = data['img'][0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for img, img_meta in zip(imgs, img_metas):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result,
                                         show=show,
                                         out_file=out_file,
                                         score_thr=show_score_thr)

        # encode mask results
        if isinstance(result, tuple) and len(result) == 2:
            # Mask R-CNN
            bbox_results, mask_results = result
            encoded_mask_results = encode_mask_results(mask_results)
            result = bbox_results, encoded_mask_results
        elif isinstance(result, tuple) and len(result) == 3:
            # Mask R-CNN + Offset
            bbox_results, mask_results, offset_results = result
            if mask_results is not None:
                encoded_mask_results = encode_mask_results(mask_results)
                result = bbox_results, encoded_mask_results, offset_results
            else:
                # only pred offset
                result = bbox_results, offset_results
        elif isinstance(result, tuple) and len(result) == 4:
            # Mask R-CNN + Offset + Height
            bbox_results, mask_results, offset_results, height_results = result
            encoded_mask_results = encode_mask_results(mask_results)
            result = bbox_results, encoded_mask_results, offset_results, height_results

        results.append(result)

        batch_size = len(data['img_metas'][0].data)
        for _ in range(batch_size):
            prog_bar.update()
    return results
Exemple #14
0
def update_bn_stats(model, data_loader, num_iters=200, logger=None):
    """Recompute and update the batch norm stats to make them more precise.

    During
    training both BN stats and the weight are changing after every iteration,
    so the running average can not precisely reflect the actual stats of the
    current model.
    In this function, the BN stats are recomputed with fixed weights, to make
    the running average more precise. Specifically, it computes the true
    average of per-batch mean/variance instead of the running average.

    Args:
        model (nn.Module): The model whose bn stats will be recomputed.
        data_loader (iterator): The DataLoader iterator.
        num_iters (int): number of iterations to compute the stats.
        logger (:obj:`logging.Logger` | None): Logger for logging.
            Default: None.
    """

    model.train()

    assert len(data_loader) >= num_iters, (
        f'length of dataloader {len(data_loader)} must be greater than '
        f'iteration number {num_iters}')

    if is_parallel_module(model):
        parallel_module = model
        model = model.module
    else:
        parallel_module = model
    # Finds all the bn layers with training=True.
    bn_layers = [
        m for m in model.modules() if m.training and isinstance(m, _BatchNorm)
    ]

    if len(bn_layers) == 0:
        print_log('No BN found in model', logger=logger, level=logging.WARNING)
        return
    print_log(f'{len(bn_layers)} BN found', logger=logger)

    # Finds all the other norm layers with training=True.
    for m in model.modules():
        if m.training and isinstance(m, (_InstanceNorm, GroupNorm)):
            print_log('IN/GN stats will be updated like training.',
                      logger=logger,
                      level=logging.WARNING)

    # In order to make the running stats only reflect the current batch, the
    # momentum is disabled.
    # bn.running_mean = (1 - momentum) * bn.running_mean + momentum *
    # batch_mean
    # Setting the momentum to 1.0 to compute the stats without momentum.
    momentum_actual = [bn.momentum for bn in bn_layers]  # pyre-ignore
    for bn in bn_layers:
        bn.momentum = 1.0

    # Note that running_var actually means "running average of variance"
    running_mean = [torch.zeros_like(bn.running_mean) for bn in bn_layers]
    running_var = [torch.zeros_like(bn.running_var) for bn in bn_layers]

    finish_before_loader = False
    prog_bar = mmcv.ProgressBar(len(data_loader))
    for ind, data in enumerate(data_loader):
        with torch.no_grad():
            parallel_module(**data, return_loss=False)
        prog_bar.update()
        for i, bn in enumerate(bn_layers):
            # Accumulates the bn stats.
            running_mean[i] += (bn.running_mean - running_mean[i]) / (ind + 1)
            # running var is actually
            running_var[i] += (bn.running_var - running_var[i]) / (ind + 1)

        if (ind + 1) >= num_iters:
            finish_before_loader = True
            break
    assert finish_before_loader, 'Dataloader stopped before ' \
                                 f'iteration {num_iters}'

    for i, bn in enumerate(bn_layers):
        # Sets the precise bn stats.
        bn.running_mean = running_mean[i]
        bn.running_var = running_var[i]
        bn.momentum = momentum_actual[i]
Exemple #15
0
def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    fps=3,
                    show_score_thr=0.3):
    """Test model with single gpu.

    Args:
        model (nn.Module): Model to be tested.
        data_loader (nn.Dataloader): Pytorch data loader.
        show (bool, optional): If True, visualize the prediction results.
            Defaults to False.
        out_dir (str, optional): Path of directory to save the
            visualization results. Defaults to None.
        fps (int, optional): FPS of the output video.
            Defaults to 3.
        show_score_thr (float, optional): The score threshold of visualization
            (Only used in VID for now). Defaults to 0.3.

    Returns:
        dict[str, list]: The prediction results.
    """
    model.eval()
    results = defaultdict(list)
    dataset = data_loader.dataset
    prev_img_meta = None
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)

        batch_size = data['img'][0].size(0)
        if show or out_dir:
            assert batch_size == 1, 'Only support batch_size=1 when testing.'
            img_tensor = data['img'][0]
            img_meta = data['img_metas'][0].data[0][0]
            img = tensor2imgs(img_tensor, **img_meta['img_norm_cfg'])[0]

            h, w, _ = img_meta['img_shape']
            img_show = img[:h, :w, :]

            ori_h, ori_w = img_meta['ori_shape'][:-1]
            img_show = mmcv.imresize(img_show, (ori_w, ori_h))

            if out_dir:
                out_file = osp.join(out_dir, img_meta['ori_filename'])
            else:
                out_file = None

            model.module.show_result(img_show,
                                     result,
                                     show=show,
                                     out_file=out_file,
                                     score_thr=show_score_thr)

            # Whether need to generate a video from images.
            # The frame_id == 0 means the model starts processing
            # a new video, therefore we can write the previous video.
            # There are two corner cases.
            # Case 1: prev_img_meta == None means there is no previous video.
            # Case 2: i == len(dataset) means processing the last video
            need_write_video = (prev_img_meta is not None
                                and img_meta['frame_id'] == 0
                                or i == len(dataset))
            if out_dir and need_write_video:
                prev_img_prefix, prev_img_name = prev_img_meta[
                    'ori_filename'].rsplit('/', 1)
                prev_img_idx, prev_img_type = prev_img_name.split('.')
                prev_filename_tmpl = '{:0' + str(
                    len(prev_img_idx)) + 'd}.' + prev_img_type
                prev_img_dirs = f'{out_dir}/{prev_img_prefix}'
                prev_img_names = sorted(os.listdir(prev_img_dirs))
                prev_start_frame_id = int(prev_img_names[0].split('.')[0])
                prev_end_frame_id = int(prev_img_names[-1].split('.')[0])

                mmcv.frames2video(prev_img_dirs,
                                  f'{prev_img_dirs}/out_video.mp4',
                                  fps=fps,
                                  fourcc='mp4v',
                                  filename_tmpl=prev_filename_tmpl,
                                  start=prev_start_frame_id,
                                  end=prev_end_frame_id,
                                  show_progress=False)

            prev_img_meta = img_meta

        for key in result:
            if 'mask' in key:
                result[key] = encode_mask_results(result[key])

        for k, v in result.items():
            results[k].append(v)

        for _ in range(batch_size):
            prog_bar.update()

    return results
def single_gpu_test(model, data_loader, show=False):
    model.eval()
    results = []
    #pdb.set_trace()
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    elapsed_time = 0.0
    total_elapsed_time = 0.0
    training_examples = 0.0
    #pdb.set_trace()
    #load_trt.init()

    for i, data in enumerate(data_loader):
        #with torch.autograd.profiler.profile(use_cuda=True) as prof:
        with torch.no_grad():
            #sychronise torch. Executes the command in order
            #print("Data", len(data['img']))
            # b,c,h,w  = data['img'][0].shape
            # h1 = int((h*scale))
            # w1 = int((w*scale))
            # data['img'][0] = data['img'][0][:,:,:h1+1,:w1+1]
            t1_start = perf_counter()
            #pdb.set_trace()
            result = model(return_loss=False, rescale=not show, **data)

            torch.cuda.synchronize()
            t1_stop = perf_counter()
            elapsed_time += t1_stop - t1_start
            training_examples = training_examples + 1

            if (i % 100 == 0):
                print("Average Elapsed time", elapsed_time / 100.0)
                total_elapsed_time += elapsed_time
                elapsed_time = 0.0
            #print("-----------------------------------------------------------------------")
            #pdb.set_trace()
            #print("Result",len(result))

        results.append(result)

        if show:

            model.module.show_result(data, result)

        batch_size = data['img'][0].size(0)
        for _ in range(batch_size):

            prog_bar.update()
    #print(prof)
    from mmdet.models.detectors.single_stage import time_torch2trt
    init_time_single = time_torch2trt
    from mmdet.models.detectors.two_stage import time_torch2trt
    init_time_two = time_torch2trt
    print("\n Total ELAPSED TIME", total_elapsed_time)
    print("\n Training examples", training_examples)
    print("Time taken to initialize torch2trt",
          max(init_time_single, init_time_two))
    avg_elapsed_time = (
        (total_elapsed_time - max(init_time_single, init_time_two)) /
        training_examples)
    #avg_elapsed_time = ((total_elapsed_time)/training_examples)
    print("\n Average elapsed time", avg_elapsed_time)
    with open(
            "/home/nsathish/Efficient_object_detection/mmdetection/results/TensorRT_FP16-1080.txt",
            "a") as myfile:
        myfile.write("Average elapsed time:" + str(avg_elapsed_time) + "\n")
    return results
Exemple #17
0
def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    efficient_test=False,
                    opacity=0.5):
    """Test with single GPU.

    Args:
        model (nn.Module): Model to be tested.
        data_loader (utils.data.Dataloader): Pytorch data loader.
        show (bool): Whether show results during inference. Default: False.
        out_dir (str, optional): If specified, the results will be dumped into
            the directory to save output results.
        efficient_test (bool): Whether save the results as local numpy files to
            save CPU memory during evaluation. Default: False.
        opacity(float): Opacity of painted segmentation map.
            Default 0.5.
            Must be in (0, 1] range.
    Returns:
        list: The prediction results.
    """

    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, **data)

        if show or out_dir:
            img_tensor = data['img'][0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for img, img_meta in zip(imgs, img_metas):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(img_show,
                                         result,
                                         palette=dataset.PALETTE,
                                         show=show,
                                         out_file=out_file,
                                         opacity=opacity)

        if isinstance(result, list):
            if efficient_test:
                result = [np2tmp(_) for _ in result]
            results.extend(result)
        else:
            if efficient_test:
                result = np2tmp(result)
            results.append(result)

        batch_size = len(result)
        for _ in range(batch_size):
            prog_bar.update()
    return results
def main():
    args = parse_args()

    frame_paths, original_frames = frame_extraction(args.video)
    num_frame = len(frame_paths)
    h, w, _ = original_frames[0].shape

    # resize frames to shortside 256
    new_w, new_h = mmcv.rescale_size((w, h), (256, np.Inf))
    frames = [mmcv.imresize(img, (new_w, new_h)) for img in original_frames]
    w_ratio, h_ratio = new_w / w, new_h / h

    # Get clip_len, frame_interval and calculate center index of each clip
    config = mmcv.Config.fromfile(args.config)
    config.merge_from_dict(args.cfg_options)
    val_pipeline = config.data.val.pipeline

    sampler = [x for x in val_pipeline if x['type'] == 'SampleAVAFrames'][0]
    clip_len, frame_interval = sampler['clip_len'], sampler['frame_interval']
    window_size = clip_len * frame_interval
    assert clip_len % 2 == 0, 'We would like to have an even clip_len'
    # Note that it's 1 based here
    timestamps = np.arange(window_size // 2, num_frame + 1 - window_size // 2,
                           args.predict_stepsize)

    # Load label_map
    label_map = load_label_map(args.label_map)
    try:
        if config['data']['train']['custom_classes'] is not None:
            label_map = {
                id + 1: label_map[cls]
                for id, cls in enumerate(config['data']['train']
                                         ['custom_classes'])
            }
    except KeyError:
        pass

    # Get Human detection results
    center_frames = [frame_paths[ind - 1] for ind in timestamps]
    human_detections = detection_inference(args, center_frames)
    for i in range(len(human_detections)):
        det = human_detections[i]
        det[:, 0:4:2] *= w_ratio
        det[:, 1:4:2] *= h_ratio
        human_detections[i] = torch.from_numpy(det[:, :4]).to(args.device)

    # Get img_norm_cfg
    img_norm_cfg = config['img_norm_cfg']
    if 'to_rgb' not in img_norm_cfg and 'to_bgr' in img_norm_cfg:
        to_bgr = img_norm_cfg.pop('to_bgr')
        img_norm_cfg['to_rgb'] = to_bgr
    img_norm_cfg['mean'] = np.array(img_norm_cfg['mean'])
    img_norm_cfg['std'] = np.array(img_norm_cfg['std'])

    # Build STDET model
    try:
        # In our spatiotemporal detection demo, different actions should have
        # the same number of bboxes.
        config['model']['test_cfg']['rcnn']['action_thr'] = .0
    except KeyError:
        pass

    config.model.backbone.pretrained = None
    model = build_detector(config.model, test_cfg=config.get('test_cfg'))

    load_checkpoint(model, args.checkpoint, map_location=args.device)
    model.to(args.device)
    model.eval()

    predictions = []

    print('Performing SpatioTemporal Action Detection for each clip')
    assert len(timestamps) == len(human_detections)
    prog_bar = mmcv.ProgressBar(len(timestamps))
    for timestamp, proposal in zip(timestamps, human_detections):
        if proposal.shape[0] == 0:
            predictions.append(None)
            continue

        start_frame = timestamp - (clip_len // 2 - 1) * frame_interval
        frame_inds = start_frame + np.arange(0, window_size, frame_interval)
        frame_inds = list(frame_inds - 1)
        imgs = [frames[ind].astype(np.float32) for ind in frame_inds]
        _ = [mmcv.imnormalize_(img, **img_norm_cfg) for img in imgs]
        # THWC -> CTHW -> 1CTHW
        input_array = np.stack(imgs).transpose((3, 0, 1, 2))[np.newaxis]
        input_tensor = torch.from_numpy(input_array).to(args.device)

        with torch.no_grad():
            result = model(
                return_loss=False,
                img=[input_tensor],
                img_metas=[[dict(img_shape=(new_h, new_w))]],
                proposals=[[proposal]])
            result = result[0]
            prediction = []
            # N proposals
            for i in range(proposal.shape[0]):
                prediction.append([])
            # Perform action score thr
            for i in range(len(result)):
                if i + 1 not in label_map:
                    continue
                for j in range(proposal.shape[0]):
                    if result[i][j, 4] > args.action_score_thr:
                        prediction[j].append((label_map[i + 1], result[i][j,
                                                                          4]))
            predictions.append(prediction)
        prog_bar.update()

    results = []
    for human_detection, prediction in zip(human_detections, predictions):
        results.append(pack_result(human_detection, prediction, new_h, new_w))

    def dense_timestamps(timestamps, n):
        """Make it nx frames."""
        old_frame_interval = (timestamps[1] - timestamps[0])
        start = timestamps[0] - old_frame_interval / n * (n - 1) / 2
        new_frame_inds = np.arange(
            len(timestamps) * n) * old_frame_interval / n + start
        return new_frame_inds.astype(np.int)

    dense_n = int(args.predict_stepsize / args.output_stepsize)
    frames = [
        cv2.imread(frame_paths[i - 1])
        for i in dense_timestamps(timestamps, dense_n)
    ]
    print('Performing visualization')
    vis_frames = visualize(frames, results)
    vid = mpy.ImageSequenceClip([x[:, :, ::-1] for x in vis_frames],
                                fps=args.output_fps)
    vid.write_videofile(args.out_filename)

    tmp_frame_dir = osp.dirname(frame_paths[0])
    shutil.rmtree(tmp_frame_dir)
Exemple #19
0
    def after_train_iter(self, runner):
        """The behavior after each train iteration.

        Args:
            runner (``mmcv.runner.BaseRunner``): The runner.
        """
        interval = self.get_current_interval(runner)
        if not self.every_n_iters(runner, interval):
            return

        runner.model.eval()

        batch_size = self.dataloader.batch_size
        rank, ws = get_dist_info()
        total_batch_size = batch_size * ws

        # sample real images
        max_real_num_images = max(metric.num_images - metric.num_real_feeded
                                  for metric in self.metrics)
        # define mmcv progress bar
        if rank == 0 and max_real_num_images > 0:
            mmcv.print_log(
                f'Sample {max_real_num_images} real images for evaluation',
                'mmgen')
            pbar = mmcv.ProgressBar(max_real_num_images)

        if max_real_num_images > 0:
            for data in self.dataloader:
                if 'real_img' in data:
                    reals = data['real_img']
                # key for conditional GAN
                elif 'img' in data:
                    reals = data['img']
                else:
                    raise KeyError('Cannot found key for images in data_dict. '
                                   'Only support `real_img` for unconditional '
                                   'datasets and `img` for conditional '
                                   'datasets.')

                if reals.shape[1] not in [1, 3]:
                    raise RuntimeError('real images should have one or three '
                                       'channels in the first, '
                                       'not % d' % reals.shape[1])
                if reals.shape[1] == 1:
                    reals = reals.repeat(1, 3, 1, 1)

                num_feed = 0
                for metric in self.metrics:
                    num_feed_ = metric.feed(reals, 'reals')
                    num_feed = max(num_feed_, num_feed)

                if num_feed <= 0:
                    break

                if rank == 0:
                    pbar.update(num_feed)

        max_num_images = max(metric.num_images for metric in self.metrics)
        if rank == 0:
            mmcv.print_log(
                f'Sample {max_num_images} fake images for evaluation', 'mmgen')

        # define mmcv progress bar
        if rank == 0:
            pbar = mmcv.ProgressBar(max_num_images)

        # sampling fake images and directly send them to metrics
        for _ in range(0, max_num_images, total_batch_size):

            with torch.no_grad():
                fakes = runner.model(
                    None,
                    num_batches=batch_size,
                    return_loss=False,
                    **self.sample_kwargs)

                for metric in self.metrics:
                    # feed in fake images
                    metric.feed(fakes, 'fakes')

            if rank == 0:
                pbar.update(total_batch_size)

        runner.log_buffer.clear()
        # a dirty walkround to change the line at the end of pbar
        if rank == 0:
            sys.stdout.write('\n')
            for metric in self.metrics:
                with torch.no_grad():
                    metric.summary()
                for name, val in metric._result_dict.items():
                    runner.log_buffer.output[name] = val

                    # record best metric and save the best ckpt
                    if self.save_best_ckpt and name in self.best_metric:
                        self._save_best_ckpt(runner, val, name)

            runner.log_buffer.ready = True
        runner.model.train()

        # clear all current states for next evaluation
        for metric in self.metrics:
            metric.clear()
Exemple #20
0
def single_selsa_gpu_test(model,
                          data_loader,
                          all_frame_interval=21,
                          show=False,
                          rank=0,
                          world_size=1):
    model.eval()
    results = []
    dataset = data_loader.dataset
    if rank == 0:
        prog_bar = mmcv.ProgressBar(len(dataset))

    num_images = dataset.size
    video_frames_strts = []
    for x in dataset.img_infos:
        if 'unique_ids' in x:
            video_frames_strts.append(x['unique_ids'])
        else:
            video_frames_strts.append(x['frame_id'])
    video_seg_lens = [x['frame_seg_len'] if 'frame_seg_len' in x else x['video_len'] \
                        for x in dataset.img_infos]
    batch_size = False
    video_idx = -1
    frame_idx = 0
    frame_ids = np.zeros(num_images, dtype=np.int)
    all_bboxes = [None for i in range(num_images)]

    t = time.time()
    for i, data in enumerate(data_loader):
        batch_size = batch_size if batch_size else len(
            data['img_meta'].data[0])
        img_meta = data['img_meta'].data[0][0]
        frame_offset = img_meta['frame_offset']
        key_frame_flag = dataset.key_frame_flag
        seg_len = img_meta['seg_len']

        t_data = time.time() - t
        t = time.time()

        #TODO:X An implementation in `selsa_rcnn` assembling `base.forward()` to extract backbone features
        #TODO:X An api feeding collated backbone features into selsa_bboxhead
        #TODO:X Collect the detection results as the original function does
        if key_frame_flag == 0:
            feat_list = deque(maxlen=all_frame_interval)
            frame_offset_list = deque(maxlen=all_frame_interval)
            img_meta_list = deque(maxlen=all_frame_interval)
            video_idx += 1
            with torch.no_grad():
                cur_feat = model(backbone_feat=True, **data)
            while len(feat_list) < int(all_frame_interval + 1) / 2:
                feat_list.append(cur_feat[0])
                frame_offset_list.append(frame_offset)
                img_meta_list.append(img_meta)

        elif key_frame_flag == 2:
            if len(feat_list) < all_frame_interval - 1:
                with torch.no_grad():
                    feat = model(backbone_feat=True, **data)
                feat_list.append(feat[0])
                frame_offset_list.append(frame_offset)
                img_meta_list.append(img_meta)
            else:
                with torch.no_grad():
                    feat = model(backbone_feat=True, **data)
                feat_list.append(feat[0])
                frame_offset_list.append(frame_offset)
                img_meta_list.append(img_meta)
                with torch.no_grad():
                    # c_img_meta = collate(img_meta_list, all_frame_interval)
                    result = model(x=feat_list,
                                   img=None,
                                   img_meta=img_meta_list,
                                   forward_feat=True,
                                   return_loss=False,
                                   rescale=not show)
                    # print(result)
                if dataset.video_shuffle:
                    if not isinstance(video_frames_strts[video_idx], int):
                        frame_ids[frame_idx] = video_frames_strts[video_idx][
                            frame_offset_list[int(
                                (all_frame_interval - 1) / 2)]]
                    else:
                        frame_ids[frame_idx] = video_frames_strts[video_idx] + \
                                frame_offset_list[int((all_frame_interval-1)/2)]
                else:
                    assert "Unshuffled video validation not implemented"
                all_bboxes[frame_ids[frame_idx] - 1] = result
                frame_idx += batch_size
                t_net = time.time() - t
                if rank == 0:
                    for _ in range(batch_size * world_size):
                        prog_bar.update()
        elif key_frame_flag == 1:
            end_counter = 0
            with torch.no_grad():
                feat = model(backbone_feat=True, **data)

            while len(feat_list) < all_frame_interval - 1:
                feat_list.append(feat[0])
                frame_offset_list.append(frame_offset)
                img_meta_list.append(img_meta)
            while end_counter < min(seg_len, int(all_frame_interval + 1) / 2):
                feat_list.append(feat[0])
                frame_offset_list.append(frame_offset)
                img_meta_list.append(img_meta)
                end_counter += 1
                with torch.no_grad():
                    # c_img_meta = collate(img_meta_list, all_frame_interval)
                    result = model(x=feat_list,
                                   img=None,
                                   img_meta=img_meta_list,
                                   forward_feat=True,
                                   return_loss=False,
                                   rescale=not show)

                if dataset.video_shuffle:
                    if not isinstance(video_frames_strts[video_idx], int):
                        frame_ids[frame_idx] = video_frames_strts[video_idx][
                            frame_offset_list[int(
                                (all_frame_interval - 1) / 2)]]
                    else:
                        frame_ids[frame_idx] = video_frames_strts[video_idx] + \
                                frame_offset_list[int((all_frame_interval-1)/2)]
                else:
                    assert "Unshuffled video validation not implemented"
                all_bboxes[frame_ids[frame_idx] - 1] = result
                frame_idx += batch_size
                t_net = time.time() - t
                if rank == 0:
                    for _ in range(batch_size * world_size):
                        prog_bar.update()

    return all_bboxes
test = gradcheck(CARAFE(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4)
print(test)

print('Gradcheck for carafe naive...')
test = gradcheck(CARAFENAIVE(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4)
print(test)

feat = torch.randn(
    2, 1024, 100, 100, requires_grad=True, device='cuda:0').float()
mask = torch.randn(
    2, 25, 200, 200, requires_grad=True, device='cuda:0').sigmoid().float()
loop_num = 500

time_forward = 0
time_backward = 0
bar = mmcv.ProgressBar(loop_num)
timer = mmcv.Timer()
for i in range(loop_num):
    x = carafe(feat.clone(), mask.clone(), 5, 1, 2)
    torch.cuda.synchronize()
    time_forward += timer.since_last_check()
    x.sum().backward(retain_graph=True)
    torch.cuda.synchronize()
    time_backward += timer.since_last_check()
    bar.update()
print('\nCARAFE time forward: {} ms/iter | time backward: {} ms/iter'.format(
    (time_forward + 1e-3) * 1e3 / loop_num,
    (time_backward + 1e-3) * 1e3 / loop_num))

time_naive_forward = 0
time_naive_backward = 0
Exemple #22
0
def multi_hnl_gpu_test(model,
                       data_loader,
                       all_frame_interval,
                       tmpdir=None,
                       gpu_collect=False,
                       show=False):
    model.eval()
    dataset = data_loader.dataset
    rank, world_size = get_dist_info()
    if rank == 0:
        prog_bar = mmcv.ProgressBar(len(dataset))
        dataset.is_dataset_global = ['I am going to be mad!!!']

    # print("rank: {} message: {}".format(rank, hasattr(dataset, 'is_dataset_global')))

    num_images = dataset.local_frame_size_list[rank]
    # print(num_images)
    # print(num_images)
    pos_pointer = [np.max(t) + 1 for t in dataset.local_video_list]
    video_frames_strts = []
    if rank == 0:
        rank_video_infos = dataset.img_infos[:dataset.
                                             global_video_size_list[rank]]
    else:
        rank_video_infos = dataset.img_infos[
            sum(dataset.global_video_size_list[:rank]
                ):sum(dataset.global_video_size_list[:rank + 1])]
    for i, x in enumerate(rank_video_infos):
        if i == 0:
            assert x[
                'frame_id'] == 1, "Wrong frame_id of first video in local rank {}".format(
                    rank)
        if 'unique_ids' in x:
            video_frames_strts.append(x['unique_ids'])
        else:
            video_frames_strts.append(x['frame_id'])
    video_seg_lens = [x['frame_seg_len'] if 'frame_seg_len' in x else x['video_len'] \
                        for x in rank_video_infos]
    batch_size = False
    video_idx = -1
    frame_idx = 0
    frame_ids = np.zeros(num_images, dtype=np.int)
    all_bboxes = [None for i in range(num_images)]

    t = time.time()
    print("rank: {} len: {} size: {}".format(rank, len(data_loader),
                                             num_images))
    for i, data in enumerate(data_loader):
        batch_size = batch_size if batch_size else len(
            data['img_meta'].data[0])
        img_meta = data['img_meta'].data[0][0]
        frame_offset = img_meta['frame_offset']
        key_frame_flag = dataset.key_frame_flag
        seg_len = img_meta['seg_len']

        t_data = time.time() - t
        t = time.time()
        # print("i at rank {}: {}".format(rank, i))
        #TODO:X An implementation in `selsa_rcnn` assembling `base.forward()` to extract backbone features
        #TODO:X An api feeding collated backbone features into selsa_bboxhead
        #TODO:X Collect the detection results as the original function does
        if key_frame_flag == 0:
            feat_list = deque(maxlen=all_frame_interval)
            frame_offset_list = deque(maxlen=all_frame_interval)
            img_meta_list = deque(maxlen=all_frame_interval)
            video_idx += 1
            with torch.no_grad():
                cur_feat = model(backbone_feat=True, **data)
            while len(feat_list) < int(all_frame_interval + 1) / 2:
                feat_list.append(cur_feat[0])
                frame_offset_list.append(frame_offset)
                img_meta_list.append(img_meta)

        elif key_frame_flag == 2:
            if len(feat_list) < all_frame_interval - 1:
                with torch.no_grad():
                    feat = model(backbone_feat=True, **data)
                feat_list.append(feat[0])
                frame_offset_list.append(frame_offset)
                img_meta_list.append(img_meta)
            else:
                with torch.no_grad():
                    feat = model(backbone_feat=True, **data)
                feat_list.append(feat[0])
                frame_offset_list.append(frame_offset)
                img_meta_list.append(img_meta)
                with torch.no_grad():
                    # c_img_meta = collate(img_meta_list, all_frame_interval)
                    result = model(x=feat_list,
                                   img=None,
                                   img_meta=img_meta_list,
                                   forward_feat=True,
                                   return_loss=False,
                                   rescale=not show)
                    # print(result)
                if dataset.video_shuffle:
                    # print("video_indx: {}, len_video_frames_strts: {}".format(video_idx, len(video_frames_strts)))
                    if not isinstance(video_frames_strts[video_idx], int):
                        frame_ids[frame_idx] = video_frames_strts[video_idx][
                            frame_offset_list[int(
                                (all_frame_interval - 1) / 2)]]
                    else:
                        frame_ids[frame_idx] = video_frames_strts[video_idx] + \
                                frame_offset_list[int((all_frame_interval-1)/2)]
                else:
                    assert "Unshuffled video validation not implemented"
                try:
                    all_bboxes[frame_ids[frame_idx] - 1] = result
                except:
                    print("rank: {}, frame_idx: {}, frame_ids[frame_idx]:{}".
                          format(rank, frame_idx, frame_ids[frame_idx]))
                frame_idx += batch_size
                t_net = time.time() - t
                if rank == 0:
                    for _ in range(batch_size * world_size):
                        prog_bar.update()
        elif key_frame_flag == 1:
            end_counter = 0
            with torch.no_grad():
                feat = model(backbone_feat=True, **data)

            while len(feat_list) < all_frame_interval - 1:
                feat_list.append(feat[0])
                frame_offset_list.append(frame_offset)
                img_meta_list.append(img_meta)
            while end_counter < min(seg_len, int(all_frame_interval + 1) / 2):
                feat_list.append(feat[0])
                frame_offset_list.append(frame_offset)
                img_meta_list.append(img_meta)
                end_counter += 1
                with torch.no_grad():
                    # c_img_meta = collate(img_meta_list, all_frame_interval)
                    result = model(x=feat_list,
                                   img=None,
                                   img_meta=img_meta_list,
                                   forward_feat=True,
                                   return_loss=False,
                                   rescale=not show)

                if dataset.video_shuffle:
                    if not isinstance(video_frames_strts[video_idx], int):
                        frame_ids[frame_idx] = video_frames_strts[video_idx][
                            frame_offset_list[int(
                                (all_frame_interval - 1) / 2)]]
                    else:
                        frame_ids[frame_idx] = video_frames_strts[video_idx] + \
                                frame_offset_list[int((all_frame_interval-1)/2)]
                else:
                    assert "Unshuffled video validation not implemented"
                try:
                    all_bboxes[frame_ids[frame_idx] - 1] = result
                except:
                    print(
                        "rank: {}, frame_idx: {}, frame_ids[frame_idx]:{}, end_counter"
                    )
                frame_idx += batch_size
                t_net = time.time() - t
                if rank == 0:
                    for _ in range(batch_size * world_size):
                        prog_bar.update()
        # if rank == 3:
        #     print(i)

    # print("results at rank {}: {}".format(rank, len(all_bboxes)))
    # print(all_bboxes)
    if gpu_collect:
        assert "gpu_collectNot implemented yet!"
    else:
        # print(all_bboxes)
        results = collect_selsa_results_cpu(all_bboxes, len(dataset), tmpdir)
    return results
Exemple #23
0
def show_results():
    cap = cv2.VideoCapture(video_path)
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    msg = 'Preparing action recognition ...'
    text_info = {}
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    frame_size = (frame_width, frame_height)

    ind = 0
    video_writer = cv2.VideoWriter(out_file, fourcc, fps, frame_size)
    prog_bar = mmcv.ProgressBar(num_frames)
    backup_frames = []

    while ind < num_frames:
        ind += 1
        prog_bar.update()
        ret, frame = cap.read()
        backup_frames.append(np.array(frame)[:, :, ::-1])
        if ind == sample_length:
            # provide a quick show at the beginning
            frame_queue.extend(backup_frames)
            backup_frames = []
        elif ((len(backup_frames) == input_step and ind > sample_length)
              or ind == num_frames):
            # pick a frame from the backup
            # when the backup is full or reach the last frame
            chosen_frame = random.choice(backup_frames)
            backup_frames = []
            frame_queue.append(chosen_frame)

        ret, scores = inference()

        if ret:
            num_selected_labels = min(len(label), 5)
            scores_tuples = tuple(zip(label, scores))
            scores_sorted = sorted(scores_tuples,
                                   key=itemgetter(1),
                                   reverse=True)
            results = scores_sorted[:num_selected_labels]
            result_queue.append(results)

        if len(result_queue) != 0:
            text_info = {}
            results = result_queue.popleft()
            for i, result in enumerate(results):
                selected_label, score = result
                if score < threshold:
                    break
                location = (0, 40 + i * 20)
                text = selected_label + ': ' + str(round(score, 2))
                text_info[location] = text
                cv2.putText(frame, text, location, FONTFACE, FONTSCALE,
                            FONTCOLOR, THICKNESS, LINETYPE)
        elif len(text_info):
            for location, text in text_info.items():
                cv2.putText(frame, text, location, FONTFACE, FONTSCALE,
                            FONTCOLOR, THICKNESS, LINETYPE)
        else:
            cv2.putText(frame, msg, (0, 40), FONTFACE, FONTSCALE, MSGCOLOR,
                        THICKNESS, LINETYPE)
        video_writer.write(frame)
    cap.release()
    cv2.destroyAllWindows()
Exemple #24
0
def main():
    parser = ArgumentParser()
    parser.add_argument('config', help='config file')
    parser.add_argument('--input', help='input video file or folder')
    parser.add_argument('--output',
                        help='output video file (mp4 format) or folder')
    parser.add_argument('--checkpoint', help='checkpoint file')
    parser.add_argument('--device',
                        default='cuda:0',
                        help='device used for inference')
    parser.add_argument('--show',
                        action='store_true',
                        help='whether show the results on the fly')
    parser.add_argument('--backend',
                        choices=['cv2', 'plt'],
                        default='cv2',
                        help='the backend to visualize the results')
    parser.add_argument('--fps', help='FPS of the output video')
    args = parser.parse_args()
    assert args.output or args.show
    # load images
    if osp.isdir(args.input):
        imgs = sorted(os.listdir(args.input))
        IN_VIDEO = False
    else:
        imgs = mmcv.VideoReader(args.input)
        IN_VIDEO = True
    # define output
    if args.output is not None:
        if args.output.endswith('.mp4'):
            OUT_VIDEO = True
            out_dir = tempfile.TemporaryDirectory()
            out_path = out_dir.name
            _out = args.output.rsplit('/', 1)
            if len(_out) > 1:
                os.makedirs(_out[0], exist_ok=True)
        else:
            OUT_VIDEO = False
            out_path = args.output
            os.makedirs(out_path, exist_ok=True)

    fps = args.fps
    if args.show or OUT_VIDEO:
        if fps is None and IN_VIDEO:
            fps = imgs.fps
        if not fps:
            raise ValueError('Please set the FPS for the output video.')
        fps = int(fps)

    # build the model from a config file and a checkpoint file
    model = init_model(args.config, args.checkpoint, device=args.device)

    prog_bar = mmcv.ProgressBar(len(imgs))
    # test and show/save the images
    for i, img in enumerate(imgs):
        if isinstance(img, str):
            img = osp.join(args.input, img)
        result = inference_mot(model, img, frame_id=i)
        result = result['track_results']
        if args.output is not None:
            if IN_VIDEO or OUT_VIDEO:
                out_file = osp.join(out_path, f'{i:06d}.jpg')
            else:
                out_file = osp.join(out_path, img.rsplit('/', 1)[-1])
        else:
            out_file = None
        model.show_result(img,
                          result,
                          show=args.show,
                          wait_time=int(1000. / fps) if fps else 0,
                          out_file=out_file,
                          backend=args.backend)
        prog_bar.update()

    if OUT_VIDEO:
        print(f'making the output video at {args.output} with a FPS of {fps}')
        mmcv.frames2video(out_path, args.output, fps=fps)
        out_dir.cleanup()
Exemple #25
0
import numpy as np
import pandas as pd
import mmcv
gt_dir = './to_lmdb/val_label_new.txt'
pred_dir = './history/test_out_lr_0.00010_batchSize_1_time_0310105604_/epoch_0_step_0_data.txt'
stat_csv = './False_record/stat.csv'
f1 = open(gt_dir, 'r')
f2 = open(pred_dir, 'r')
d = {}
prog_bar = mmcv.ProgressBar(31402)
for i in range(31402):
    pred = f2.readline().split(' ')[1].strip()
    gt = f1.readline().split(' ')[1].strip()
    for p in gt:
        if p not in pred:
            if p in d.keys():
                d[p] += 1
            else:
                d[p] = 1
    prog_bar.update()
p = pd.DataFrame(columns=['word', 'num'])
for key in d.keys():
    p = p.append(pd.DataFrame([[key, d[key]]], columns=['word', 'num']))
print(p)
p = p.reset_index(drop=True)
ind = np.argsort(p['num'].values, )
p = p.loc[ind[::-1], :]
p = p.reset_index(drop=True)
print(p)
p.to_csv(stat_csv)
Exemple #26
0
    def after_train_iter(self, runner):
        """The behavior after each train iteration.

        Args:
            runner (``mmcv.runner.BaseRunner``): The runner.
        """
        if not self.every_n_iters(runner, self.interval):
            return

        runner.model.eval()

        # sample fake images
        max_num_images = max(metric.num_images for metric in self.metrics)
        for metric in self.metrics:
            mmcv.print_log(f'Feed reals to {metric.name} metric.', 'mmgen')
            # feed in real images
            for data in self.dataloader:
                reals = data['real_img']
                num_feed = metric.feed(reals, 'reals')
                if num_feed <= 0:
                    break

        mmcv.print_log(f'Sample {max_num_images} fake images for evaluation',
                       'mmgen')
        batch_size = self.dataloader.batch_size

        rank, ws = get_dist_info()
        total_batch_size = batch_size * ws

        # define mmcv progress bar
        if rank == 0:
            pbar = mmcv.ProgressBar(max_num_images)

        # sampling fake images and directly send them to metrics
        for _ in range(0, max_num_images, total_batch_size):

            with torch.no_grad():
                fakes = runner.model(None,
                                     num_batches=batch_size,
                                     return_loss=False,
                                     **self.sample_kwargs)

            for metric in self.metrics:
                # feed in fake images
                num_left = metric.feed(fakes, 'fakes')
                if num_left <= 0:
                    break

            if rank == 0:
                pbar.update(total_batch_size)

        runner.log_buffer.clear()
        # a dirty walkround to change the line at the end of pbar
        if rank == 0:
            sys.stdout.write('\n')
            for metric in self.metrics:
                metric.summary()
                for name, val in metric._result_dict.items():
                    runner.log_buffer.output[name] = val

            runner.log_buffer.ready = True
        runner.model.train()

        # clear all current states for next evaluation
        for metric in self.metrics:
            metric.clear()
Exemple #27
0
    def after_train_epoch(self, runner):
        if not self.every_n_epochs(runner, self.interval):
            return

        runner.logger.info("Start evaluation on {} dataset({} images).".format(
            self.dataset.name, len(self.dataset)))
        runner.model.eval()

        # get prog bar
        if runner.rank == 0:
            prog_bar = mmcv.ProgressBar(len(self.dataset))
        else:
            prog_bar = None

        results = [None for _ in range(len(self.dataset))]
        for idx in range(runner.rank, len(self.dataset), runner.world_size):
            data = self.dataset[idx]
            data_gpu = scatter(collate([data], samples_per_gpu=1),
                               [torch.cuda.current_device()])[0]

            # compute output
            with torch.no_grad():
                result, _ = runner.model(data_gpu)
                disps = result['disps']
                costs = result['costs']

                ori_size = data_gpu['original_size']
                disps = remove_padding(disps, ori_size)

                # process the ground truth disparity map
                data_gpu['leftDisp'] = data_gpu[
                    'leftDisp'] if 'leftDisp' in data_gpu else None
                if data_gpu['leftDisp'] is not None:
                    data_gpu['leftDisp'] = remove_padding(
                        data_gpu['leftDisp'], ori_size)
                data_gpu['rightDisp'] = data_gpu[
                    'rightDisp'] if 'rightDisp' in data_gpu else None
                if data_gpu['rightDisp'] is not None:
                    data_gpu['rightDisp'] = remove_padding(
                        data_gpu['rightDisp'], ori_size)

                # evaluation
                whole_error_dict = disp_evaluation(self.cfg, disps,
                                                   data_gpu['leftDisp'],
                                                   data_gpu['rightDisp'])

                result = {
                    'Disparity': disps,
                    'GroundTruth': data_gpu['leftDisp'],
                    'Error': whole_error_dict,
                }

                if self.cfg.model.eval.is_cost_return:
                    if self.cfg.model.eval.is_cost_to_cpu:
                        costs = [cost.cpu() for cost in costs]
                    result['Cost'] = costs

            # if result contains image, as the process advanced, the cuda cache explodes soon.
            result = to_cpu(result)

            filter_result = dict()
            filter_result['Error'] = result['Error']
            if 'Confidence' in result:
                filter_result['Confidence'] = self.process_conf(
                    result, bins_number=100)

            results[idx] = filter_result

            batch_size = runner.world_size

            if runner.rank == 0:
                for _ in range(batch_size):
                    prog_bar.update()

        if runner.rank == 0:
            print('\n')
            dist.barrier()
            for i in range(1, min(runner.world_size, len(self.dataset))):
                tmp_file = osp.join(runner.work_dir, "temp_{}.pkl".format(i))
                tmp_results = mmcv.load(tmp_file)
                for idx in range(i, len(results), runner.world_size):
                    results[idx] = tmp_results[idx]
                os.remove(tmp_file)
            self.evaluate(runner, results)
        else:
            tmp_file = osp.join(runner.work_dir,
                                "temp_{}.pkl".format(runner.rank))
            mmcv.dump(results, tmp_file)
            dist.barrier()
        dist.barrier()
        torch.cuda.empty_cache()
    def after_train_epoch(self, runner):
        if not self.every_n_epochs(runner, self.interval):
            return
        runner.model.eval()
        results = [None for _ in range(len(self.dataset))]
        if runner.rank == 0:
            prog_bar = mmcv.ProgressBar(len(self.dataset))
        for idx in range(runner.rank, len(self.dataset), runner.world_size):
            data = self.dataset[idx]
            data_gpu = scatter(collate([data], samples_per_gpu=1),
                               [torch.cuda.current_device()])[0]

            # compute output
            with torch.no_grad():
                result = runner.model(return_loss=False,
                                      rescale=True,
                                      **data_gpu)
            results[idx] = result
            """
            Yuan add following code for evaluating miss rate using matlab code.
            For each image, detection result will be written into it's corresponding text file.
            Matlab script will load those detection results and perform evaluation.
            It is finished with matlab engine on background.
            detection result -> text file -> matlab script
            """
            # image path
            img_path = self.dataset.img_infos[idx]['filename']
            res_path = img_path.replace('images', 'res')  # res : result
            # print()             # for debug
            # print(img_path)     # for debug
            # print(res_path)     # for debug
            if 'visible' in res_path:
                res_path = res_path.replace('/visible/', '/')
            res_path = res_path.replace('.jpg', '.txt')
            res_path = res_path.replace('.png', '.txt')
            # print(res_path)     # for debug
            if os.path.exists(res_path):
                os.remove(res_path)
            os.mknod(res_path)
            """
            For faster-rcnn, the result is a list, each element in list is result for a object class.
            In pedestrian detection, there is only one class.
            For RPN, the result is a numpy. The result of RPN is category-independent.
            """
            if isinstance(result, list):
                np.savetxt(res_path, result[0])
            else:
                np.savetxt(res_path, result)

            batch_size = runner.world_size
            for _ in range(batch_size):
                prog_bar.update()
        """
        yuan comment following code because of new evaluation method.
        """
        # if runner.rank == 0:
        #     print('\n')
        #     dist.barrier()
        #     for i in range(1, runner.world_size):
        #         tmp_file = osp.join(runner.work_dir, 'temp_{}.pkl'.format(i))
        #         tmp_results = mmcv.load(tmp_file)
        #         for idx in range(i, len(results), runner.world_size):
        #             results[idx] = tmp_results[idx]
        #         os.remove(tmp_file)
        #     self.evaluate(runner, results)
        # else:
        #     tmp_file = osp.join(runner.work_dir,
        #                         'temp_{}.pkl'.format(runner.rank))
        #     mmcv.dump(results, tmp_file)
        #     dist.barrier()
        """
        yuan add following line.
        """
        self.evaluate(runner, results)

        # dist.barrier()
        self._barrier(runner.rank, runner.world_size)
def show_results(model, data, label, args):
    frame_queue = deque(maxlen=args.sample_length)
    result_queue = deque(maxlen=1)

    cap = cv2.VideoCapture(args.video_path)
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    msg = 'Preparing action recognition ...'
    text_info = {}
    out_json = {}
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    frame_size = (frame_width, frame_height)

    ind = 0
    video_writer = None if args.out_file.endswith('.json') \
        else cv2.VideoWriter(args.out_file, fourcc, fps, frame_size)
    prog_bar = mmcv.ProgressBar(num_frames)
    backup_frames = []

    while ind < num_frames:
        ind += 1
        prog_bar.update()
        ret, frame = cap.read()
        if frame is None:
            # drop it when encounting None
            continue
        backup_frames.append(np.array(frame)[:, :, ::-1])
        if ind == args.sample_length:
            # provide a quick show at the beginning
            frame_queue.extend(backup_frames)
            backup_frames = []
        elif ((len(backup_frames) == args.input_step
               and ind > args.sample_length) or ind == num_frames):
            # pick a frame from the backup
            # when the backup is full or reach the last frame
            chosen_frame = random.choice(backup_frames)
            backup_frames = []
            frame_queue.append(chosen_frame)

        ret, scores = inference(model, data, args, frame_queue)

        if ret:
            num_selected_labels = min(len(label), 5)
            scores_tuples = tuple(zip(label, scores))
            scores_sorted = sorted(scores_tuples,
                                   key=itemgetter(1),
                                   reverse=True)
            results = scores_sorted[:num_selected_labels]
            result_queue.append(results)

        if args.out_file.endswith('.json'):
            text_info, out_json = get_results_json(result_queue, text_info,
                                                   args.threshold, msg, ind,
                                                   out_json)
        else:
            text_info = show_results_video(result_queue, text_info,
                                           args.threshold, msg, frame,
                                           video_writer, args.label_color,
                                           args.msg_color)

    cap.release()
    cv2.destroyAllWindows()
    if args.out_file.endswith('.json'):
        with open(args.out_file, 'w') as js:
            json.dump(out_json, js)
def make_lmdb(mode, data_path, lmdb_path, batch=5000, compress_level=1):
    """Create lmdb for the REDS dataset.

    Contents of lmdb. The file structure is:
    example.lmdb
    ├── data.mdb
    ├── lock.mdb
    ├── meta_info.txt

    The data.mdb and lock.mdb are standard lmdb files and you can refer to
    https://lmdb.readthedocs.io/en/release/ for more details.

    The meta_info.txt is a specified txt file to record the meta information
    of our datasets. It will be automatically created when preparing
    datasets by our provided dataset tools.
    Each line in the txt file records 1)image name (with extension),
    2)image shape, and 3)compression level, separated by a white space.

    For example, the meta information could be:
    `000_00000000.png (720,1280,3) 1`, which means:
    1) image name (with extension): 000_00000000.png;
    2) image shape: (720,1280,3);
    3) compression level: 1

    We use the image name without extension as the lmdb key.

    Args:
        mode (str): REDS dataset mode. Choices: ['train_sharp', 'train_blur',
            'train_blur_comp', 'train_sharp_bicubic', 'train_blur_bicubic'].
            They are used to identify different reds dataset for different
            tasks. Specifically:
            'train_sharp': GT frames;
            'train_blur': Blur frames for deblur task.
            'train_blur_comp': Blur and compressed frames for deblur and
                compression task.
            'train_sharp_bicubic': Bicubic downsampled sharp frames for SR
                task.
            'train_blur_bicubic': Bicubic downsampled blur frames for SR task.
        data_path (str): Data path for reading images.
        lmdb_path (str): Lmdb save path.
        batch (int): After processing batch images, lmdb commits.
            Default: 5000.
        compress_level (int): Compress level when encoding images. Default: 1.
    """

    print(f'Create lmdb for {data_path}, save to {lmdb_path}...')
    if mode in ['train_sharp', 'train_blur', 'train_blur_comp']:
        h_dst, w_dst = 720, 1280
    else:
        h_dst, w_dst = 180, 320

    if osp.exists(lmdb_path):
        print(f'Folder {lmdb_path} already exists. Exit.')
        sys.exit(1)

    print('Reading image path list ...')
    img_path_list = sorted(
        list(mmcv.scandir(data_path, suffix='png', recursive=True)))
    keys = []
    for img_path in img_path_list:
        parts = img_path.split('/')
        folder = parts[-2]
        img_name = parts[-1].split('.png')[0]
        keys.append(folder + '_' + img_name)  # example: 000_00000000

    # create lmdb environment
    # obtain data size for one image
    img = mmcv.imread(osp.join(data_path, img_path_list[0]), flag='unchanged')
    _, img_byte = cv2.imencode('.png', img,
                               [cv2.IMWRITE_PNG_COMPRESSION, compress_level])
    data_size_per_img = img_byte.nbytes
    print('Data size per image is: ', data_size_per_img)
    data_size = data_size_per_img * len(img_path_list)
    env = lmdb.open(lmdb_path, map_size=data_size * 10)

    # write data to lmdb
    pbar = mmcv.ProgressBar(len(img_path_list))
    txn = env.begin(write=True)
    txt_file = open(osp.join(lmdb_path, 'meta_info.txt'), 'w')
    for idx, (path, key) in enumerate(zip(img_path_list, keys)):
        pbar.update()
        key_byte = key.encode('ascii')
        img = mmcv.imread(osp.join(data_path, path), flag='unchanged')
        h, w, c = img.shape
        _, img_byte = cv2.imencode(
            '.png', img, [cv2.IMWRITE_PNG_COMPRESSION, compress_level])
        assert h == h_dst and w == w_dst and c == 3, (
            f'Wrong shape ({h, w}), should be ({h_dst, w_dst}).')
        txn.put(key_byte, img_byte)
        # write meta information
        txt_file.write(f'{key}.png ({h},{w},{c}) {compress_level}\n')
        if idx % batch == 0:
            txn.commit()
            txn = env.begin(write=True)
    txn.commit()
    env.close()
    txt_file.close()
    print('\nFinish writing lmdb.')