Example #1
0
 def __init__(self,
              datasets,
              separate_eval=True,
              pipeline=None,
              force_apply=False,
              **kwargs):
     new_datasets = []
     if pipeline is not None:
         assert isinstance(
             pipeline,
             list), 'pipeline must be list[dict] or list[list[dict]].'
         if is_type_list(pipeline, dict):
             self._apply_pipeline(datasets, pipeline, force_apply)
             new_datasets = datasets
         elif is_2dlist(pipeline):
             assert is_2dlist(datasets)
             assert len(datasets) == len(pipeline)
             for sub_datasets, tmp_pipeline in zip(datasets, pipeline):
                 self._apply_pipeline(sub_datasets, tmp_pipeline,
                                      force_apply)
                 new_datasets.extend(sub_datasets)
     else:
         if is_2dlist(datasets):
             for sub_datasets in datasets:
                 new_datasets.extend(sub_datasets)
         else:
             new_datasets = datasets
     datasets = [build_dataset(c, kwargs) for c in new_datasets]
     super().__init__(datasets, separate_eval)
Example #2
0
def show_pred_gt(preds,
                 gts,
                 show=False,
                 win_name='',
                 wait_time=0,
                 out_file=None):
    """Show detection and ground truth for one image.

    Args:
        preds (list[list[float]]): The detection boundary list.
        gts (list[list[float]]): The ground truth boundary list.
        show (bool): Whether to show the image.
        win_name (str): The window name.
        wait_time (int): The value of waitKey param.
        out_file (str): The filename of the output.
    """
    assert utils.is_2dlist(preds)
    assert utils.is_2dlist(gts)
    assert isinstance(show, bool)
    assert isinstance(win_name, str)
    assert isinstance(wait_time, int)
    assert utils.is_none_or_type(out_file, str)

    p_xy = [p for boundary in preds for p in boundary]
    gt_xy = [g for gt in gts for g in gt]

    max_xy = np.max(np.array(p_xy + gt_xy).reshape(-1, 2), axis=0)

    width = int(max_xy[0]) + 100
    height = int(max_xy[1]) + 100

    img = np.ones((height, width, 3), np.int8) * 255
    pred_color = mmcv.color_val('red')
    gt_color = mmcv.color_val('blue')
    thickness = 1

    for boundary in preds:
        cv2.polylines(
            img, [np.array(boundary).astype(np.int32).reshape(-1, 1, 2)],
            True,
            color=pred_color,
            thickness=thickness)
    for gt in gts:
        cv2.polylines(
            img, [np.array(gt).astype(np.int32).reshape(-1, 1, 2)],
            True,
            color=gt_color,
            thickness=thickness)
    if show:
        mmcv.imshow(img, win_name, wait_time)
    if out_file is not None:
        mmcv.imwrite(img, out_file)

    return img
    def __init__(self,
                 datasets,
                 separate_eval=True,
                 show_mean_scores='auto',
                 pipeline=None,
                 force_apply=False,
                 **kwargs):
        new_datasets = []
        if pipeline is not None:
            assert isinstance(
                pipeline,
                list), 'pipeline must be list[dict] or list[list[dict]].'
            if is_type_list(pipeline, dict):
                self._apply_pipeline(datasets, pipeline, force_apply)
                new_datasets = datasets
            elif is_2dlist(pipeline):
                assert is_2dlist(datasets)
                assert len(datasets) == len(pipeline)
                for sub_datasets, tmp_pipeline in zip(datasets, pipeline):
                    self._apply_pipeline(sub_datasets, tmp_pipeline,
                                         force_apply)
                    new_datasets.extend(sub_datasets)
        else:
            if is_2dlist(datasets):
                for sub_datasets in datasets:
                    new_datasets.extend(sub_datasets)
            else:
                new_datasets = datasets
        datasets = [build_dataset(c, kwargs) for c in new_datasets]
        super().__init__(datasets, separate_eval)

        if not separate_eval:
            raise NotImplementedError(
                'Evaluating datasets as a whole is not'
                ' supported yet. Please use "separate_eval=True"')

        assert isinstance(show_mean_scores, bool) or show_mean_scores == 'auto'
        if show_mean_scores == 'auto':
            show_mean_scores = len(self.datasets) > 1
        self.show_mean_scores = show_mean_scores
        if show_mean_scores is True or show_mean_scores == 'auto' and len(
                self.datasets) > 1:
            if len(set([type(ds) for ds in self.datasets])) != 1:
                raise NotImplementedError(
                    'To compute mean evaluation scores, all datasets'
                    'must have the same type')
Example #4
0
def text_model_inference(model, input_sentence):
    """Inference text(s) with the entity recognizer.

    Args:
        model (nn.Module): The loaded recognizer.
        input_sentence (str): A text entered by the user.

    Returns:
        result (dict): Predicted results.
    """

    assert isinstance(input_sentence, str)

    cfg = model.cfg
    if cfg.data.test.get('pipeline', None) is None:
        if is_2dlist(cfg.data.test.datasets):
            cfg.data.test.pipeline = cfg.data.test.datasets[0][0].pipeline
        else:
            cfg.data.test.pipeline = cfg.data.test.datasets[0].pipeline
    if is_2dlist(cfg.data.test.pipeline):
        cfg.data.test.pipeline = cfg.data.test.pipeline[0]
    test_pipeline = Compose(cfg.data.test.pipeline)
    data = {'text': input_sentence, 'label': {}}

    # build the data pipeline
    data = test_pipeline(data)
    if isinstance(data['img_metas'], dict):
        img_metas = data['img_metas']
    else:
        img_metas = data['img_metas'].data

    assert isinstance(img_metas, dict)
    img_metas = {
        'input_ids': img_metas['input_ids'].unsqueeze(0),
        'attention_masks': img_metas['attention_masks'].unsqueeze(0),
        'token_type_ids': img_metas['token_type_ids'].unsqueeze(0),
        'labels': img_metas['labels'].unsqueeze(0)
    }
    # forward the model
    with torch.no_grad():
        result = model(None, img_metas, return_loss=False)
    return result
Example #5
0
def disable_text_recog_aug_test(cfg, set_types=None):
    """Remove aug_test from test pipeline for text recognition.

    Args:
        cfg (mmcv.Config): Input config.
        set_types (list[str]): Type of dataset source. Should be
            None or sublist of ['test', 'val'].
    """
    assert set_types is None or isinstance(set_types, list)
    if set_types is None:
        set_types = ['val', 'test']

    cfg = copy.deepcopy(cfg)
    warnings.simplefilter('once')
    for set_type in set_types:
        assert set_type in ['val', 'test']
        dataset_type = cfg.data[set_type].type
        if dataset_type not in [
                'ConcatDataset', 'UniformConcatDataset', 'OCRDataset',
                'OCRSegDataset'
        ]:
            continue

        uniform_pipeline = cfg.data[set_type].get('pipeline', None)
        if is_type_list(uniform_pipeline, dict):
            update_pipeline_recog(cfg.data[set_type])
        elif is_2dlist(uniform_pipeline):
            for idx, _ in enumerate(uniform_pipeline):
                update_pipeline_recog(cfg.data[set_type].pipeline, idx)

        for dataset in cfg.data[set_type].get('datasets', []):
            if isinstance(dataset, list):
                for each_dataset in dataset:
                    update_pipeline_recog(each_dataset)
            else:
                update_pipeline_recog(dataset)

    return cfg
Example #6
0
def extract_boundary(result):
    """Extract boundaries and their scores from result.

    Args:
        result (dict): The detection result with the key 'boundary_result'
            of one image.

    Returns:
        boundaries_with_scores (list[list[float]]): The boundary and score
            list.
        boundaries (list[list[float]]): The boundary list.
        scores (list[float]): The boundary score list.
    """
    assert isinstance(result, dict)
    assert 'boundary_result' in result.keys()

    boundaries_with_scores = result['boundary_result']
    assert utils.is_2dlist(boundaries_with_scores)

    boundaries = [b[:-1] for b in boundaries_with_scores]
    scores = [b[-1] for b in boundaries_with_scores]

    return (boundaries_with_scores, boundaries, scores)
Example #7
0
def replace_image_to_tensor(cfg, set_types=None):
    """Replace 'ImageToTensor' to 'DefaultFormatBundle'."""
    assert set_types is None or isinstance(set_types, list)
    if set_types is None:
        set_types = ['val', 'test']

    cfg = copy.deepcopy(cfg)
    for set_type in set_types:
        assert set_type in ['val', 'test']
        uniform_pipeline = cfg.data[set_type].get('pipeline', None)
        if is_type_list(uniform_pipeline, dict):
            update_pipeline(cfg.data[set_type])
        elif is_2dlist(uniform_pipeline):
            for idx, _ in enumerate(uniform_pipeline):
                update_pipeline(cfg.data[set_type], idx)

        for dataset in cfg.data[set_type].get('datasets', []):
            if isinstance(dataset, list):
                for each_dataset in dataset:
                    update_pipeline(each_dataset)
            else:
                update_pipeline(dataset)

    return cfg
Example #8
0
def imshow_text_char_boundary(img,
                              text_quads,
                              boundaries,
                              char_quads,
                              chars,
                              show=False,
                              thickness=1,
                              font_scale=0.5,
                              win_name='',
                              wait_time=-1,
                              out_file=None):
    """Draw text boxes and char boxes on img.

    Args:
        img (str or ndarray): The img to be displayed.
        text_quads (list[list[int|float]]): The text boxes.
        boundaries (list[list[int|float]]): The boundary list.
        char_quads (list[list[list[int|float]]]): A 2d list of char boxes.
            char_quads[i] is for the ith text, and char_quads[i][j] is the jth
            char of the ith text.
        chars (list[list[char]]). The string for each text box.
        thickness (int): Thickness of lines.
        font_scale (float): Font scales of texts.
        show (bool): Whether to show the image.
        win_name (str): The window name.
        wait_time (int): Value of waitKey param.
        out_file (str or None): The filename of the output.
    """
    assert isinstance(img, (np.ndarray, str))
    assert utils.is_2dlist(text_quads)
    assert utils.is_2dlist(boundaries)
    assert utils.is_3dlist(char_quads)
    assert utils.is_2dlist(chars)
    assert utils.equal_len(text_quads, char_quads, boundaries)

    img = mmcv.imread(img)
    char_color = [mmcv.color_val('blue'), mmcv.color_val('green')]
    text_color = mmcv.color_val('red')
    text_inx = 0
    for text_box, boundary, char_box, txt in zip(text_quads, boundaries,
                                                 char_quads, chars):
        text_box = np.array(text_box)
        boundary = np.array(boundary)

        text_box = text_box.reshape(-1, 2).astype(np.int32)
        cv2.polylines(img, [text_box.reshape(-1, 1, 2)],
                      True,
                      color=text_color,
                      thickness=thickness)
        if boundary.shape[0] > 0:
            cv2.polylines(img, [boundary.reshape(-1, 1, 2)],
                          True,
                          color=text_color,
                          thickness=thickness)

        for b in char_box:
            b = np.array(b)
            c = char_color[text_inx % 2]
            b = b.astype(np.int32)
            cv2.polylines(img, [b.reshape(-1, 1, 2)],
                          True,
                          color=c,
                          thickness=thickness)

        label_text = ''.join(txt)
        cv2.putText(img, label_text, (text_box[0, 0], text_box[0, 1] - 2),
                    cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color)
        text_inx = text_inx + 1

    if show:
        mmcv.imshow(img, win_name, wait_time)
    if out_file is not None:
        mmcv.imwrite(img, out_file)

    return img
Example #9
0
def imshow_pred_boundary(img,
                         boundaries_with_scores,
                         labels,
                         score_thr=0,
                         boundary_color='blue',
                         text_color='blue',
                         thickness=1,
                         font_scale=0.5,
                         show=True,
                         win_name='',
                         wait_time=0,
                         out_file=None,
                         show_score=False):
    """Draw boundaries and class labels (with scores) on an image.

    Args:
        img (str or ndarray): The image to be displayed.
        boundaries_with_scores (list[list[float]]): Boundaries with scores.
        labels (list[int]): Labels of boundaries.
        score_thr (float): Minimum score of boundaries to be shown.
        boundary_color (str or tuple or :obj:`Color`): Color of boundaries.
        text_color (str or tuple or :obj:`Color`): Color of texts.
        thickness (int): Thickness of lines.
        font_scale (float): Font scales of texts.
        show (bool): Whether to show the image.
        win_name (str): The window name.
        wait_time (int): Value of waitKey param.
        out_file (str or None): The filename of the output.
        show_score (bool): Whether to show text instance score.
    """
    assert isinstance(img, (str, np.ndarray))
    assert utils.is_2dlist(boundaries_with_scores)
    assert utils.is_type_list(labels, int)
    assert utils.equal_len(boundaries_with_scores, labels)
    if len(boundaries_with_scores) == 0:
        warnings.warn('0 text found in ' + out_file)
        return

    utils.valid_boundary(boundaries_with_scores[0])
    img = mmcv.imread(img)

    scores = np.array([b[-1] for b in boundaries_with_scores])
    inds = scores > score_thr
    boundaries = [boundaries_with_scores[i][:-1] for i in np.where(inds)[0]]
    scores = [scores[i] for i in np.where(inds)[0]]
    labels = [labels[i] for i in np.where(inds)[0]]

    boundary_color = mmcv.color_val(boundary_color)
    text_color = mmcv.color_val(text_color)
    font_scale = 0.5

    for boundary, score, label in zip(boundaries, scores, labels):
        boundary_int = np.array(boundary).astype(np.int32)

        cv2.polylines(img, [boundary_int.reshape(-1, 1, 2)],
                      True,
                      color=boundary_color,
                      thickness=thickness)

        if show_score:
            label_text = f'{score:.02f}'
            cv2.putText(img, label_text,
                        (boundary_int[0], boundary_int[1] - 2),
                        cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color)
    if show:
        mmcv.imshow(img, win_name, wait_time)
    if out_file is not None:
        mmcv.imwrite(img, out_file)

    return img
Example #10
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    if args.cfg_options is not None:
        cfg.merge_from_dict(args.cfg_options)

    setup_multi_processes(cfg)

    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    # work_dir is determined in this priority: CLI > segment in file > filename
    if args.work_dir is not None:
        # update configs according to CLI args if args.work_dir is not None
        cfg.work_dir = args.work_dir
    elif cfg.get('work_dir', None) is None:
        # use config filename as default work_dir if cfg.work_dir is None
        cfg.work_dir = osp.join('./work_dirs',
                                osp.splitext(osp.basename(args.config))[0])
    if args.load_from is not None:
        cfg.load_from = args.load_from
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    if args.gpus is not None:
        cfg.gpu_ids = range(1)
        warnings.warn('`--gpus` is deprecated because we only support '
                      'single GPU mode in non-distributed training. '
                      'Use `gpus=1` now.')
    if args.gpu_ids is not None:
        cfg.gpu_ids = args.gpu_ids[0:1]
        warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
                      'Because we only support single GPU mode in '
                      'non-distributed training. Use the first GPU '
                      'in `gpu_ids` now.')
    if args.gpus is None and args.gpu_ids is None:
        cfg.gpu_ids = [args.gpu_id]

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)
        # re-set gpu_ids with distributed training mode
        _, world_size = get_dist_info()
        cfg.gpu_ids = range(world_size)

    # create work_dir
    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
    # dump config
    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
    # init the logger before other steps
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

    # init the meta dict to record some important information such as
    # environment info and seed, which will be logged
    meta = dict()
    # log env info
    env_info_dict = collect_env()
    env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
    dash_line = '-' * 60 + '\n'
    logger.info('Environment info:\n' + dash_line + env_info + '\n' +
                dash_line)
    meta['env_info'] = env_info
    meta['config'] = cfg.pretty_text
    # log some basic info
    logger.info(f'Distributed training: {distributed}')
    logger.info(f'Config:\n{cfg.pretty_text}')

    # set random seeds
    seed = init_random_seed(args.seed)
    seed = seed + dist.get_rank() if args.diff_seed else seed
    logger.info(f'Set random seed to {seed}, '
                f'deterministic: {args.deterministic}')
    set_random_seed(seed, deterministic=args.deterministic)
    cfg.seed = seed
    meta['seed'] = seed
    meta['exp_name'] = osp.basename(args.config)

    model = build_detector(cfg.model,
                           train_cfg=cfg.get('train_cfg'),
                           test_cfg=cfg.get('test_cfg'))
    model.init_weights()

    datasets = [build_dataset(cfg.data.train)]
    if len(cfg.workflow) == 2:
        val_dataset = copy.deepcopy(cfg.data.val)
        if cfg.data.train.get('pipeline', None) is None:
            if is_2dlist(cfg.data.train.datasets):
                train_pipeline = cfg.data.train.datasets[0][0].pipeline
            else:
                train_pipeline = cfg.data.train.datasets[0].pipeline
        elif is_2dlist(cfg.data.train.pipeline):
            train_pipeline = cfg.data.train.pipeline[0]
        else:
            train_pipeline = cfg.data.train.pipeline

        if val_dataset['type'] in ['ConcatDataset', 'UniformConcatDataset']:
            for dataset in val_dataset['datasets']:
                dataset.pipeline = train_pipeline
        else:
            val_dataset.pipeline = train_pipeline
        datasets.append(build_dataset(val_dataset))
    if cfg.checkpoint_config is not None:
        # save mmdet version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(mmocr_version=__version__ +
                                          get_git_hash()[:7],
                                          CLASSES=datasets[0].CLASSES)
    # add an attribute for visualization convenience
    model.CLASSES = datasets[0].CLASSES
    train_detector(model,
                   datasets,
                   cfg,
                   distributed=distributed,
                   validate=(not args.no_validate),
                   timestamp=timestamp,
                   meta=meta)
Example #11
0
def test_is_2dlist():

    assert utils.is_2dlist([])
    assert utils.is_2dlist([[]])
    assert utils.is_2dlist([[1]])
Example #12
0
def model_inference(model,
                    imgs,
                    ann=None,
                    batch_mode=False,
                    return_data=False):
    """Inference image(s) with the detector.

    Args:
        model (nn.Module): The loaded detector.
        imgs (str/ndarray or list[str/ndarray] or tuple[str/ndarray]):
            Either image files or loaded images.
        batch_mode (bool): If True, use batch mode for inference.
        ann (dict): Annotation info for key information extraction.
        return_data: Return postprocessed data.
    Returns:
        result (dict): Predicted results.
    """

    if isinstance(imgs, (list, tuple)):
        is_batch = True
        if len(imgs) == 0:
            raise Exception('empty imgs provided, please check and try again')
        if not isinstance(imgs[0], (np.ndarray, str)):
            raise AssertionError('imgs must be strings or numpy arrays')

    elif isinstance(imgs, (np.ndarray, str)):
        imgs = [imgs]
        is_batch = False
    else:
        raise AssertionError('imgs must be strings or numpy arrays')

    is_ndarray = isinstance(imgs[0], np.ndarray)

    cfg = model.cfg

    if batch_mode:
        cfg = disable_text_recog_aug_test(cfg, set_types=['test'])

    device = next(model.parameters()).device  # model device

    if cfg.data.test.get('pipeline', None) is None:
        if is_2dlist(cfg.data.test.datasets):
            cfg.data.test.pipeline = cfg.data.test.datasets[0][0].pipeline
        else:
            cfg.data.test.pipeline = cfg.data.test.datasets[0].pipeline
    if is_2dlist(cfg.data.test.pipeline):
        cfg.data.test.pipeline = cfg.data.test.pipeline[0]

    if is_ndarray:
        cfg = cfg.copy()
        # set loading pipeline type
        cfg.data.test.pipeline[0].type = 'LoadImageFromNdarray'

    cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
    test_pipeline = Compose(cfg.data.test.pipeline)

    datas = []
    for img in imgs:
        # prepare data
        if is_ndarray:
            # directly add img
            data = dict(img=img,
                        ann_info=ann,
                        img_info=dict(width=img.shape[1], height=img.shape[0]),
                        bbox_fields=[])
        else:
            # add information into dict
            data = dict(img_info=dict(filename=img),
                        img_prefix=None,
                        ann_info=ann,
                        bbox_fields=[])
        if ann is not None:
            data.update(dict(**ann))

        # build the data pipeline
        data = test_pipeline(data)
        # get tensor from list to stack for batch mode (text detection)
        if batch_mode:
            if cfg.data.test.pipeline[1].type == 'MultiScaleFlipAug':
                for key, value in data.items():
                    data[key] = value[0]
        datas.append(data)

    if isinstance(datas[0]['img'], list) and len(datas) > 1:
        raise Exception('aug test does not support '
                        f'inference with batch size '
                        f'{len(datas)}')

    data = collate(datas, samples_per_gpu=len(imgs))

    # process img_metas
    if isinstance(data['img_metas'], list):
        data['img_metas'] = [
            img_metas.data[0] for img_metas in data['img_metas']
        ]
    else:
        data['img_metas'] = data['img_metas'].data

    if isinstance(data['img'], list):
        data['img'] = [img.data for img in data['img']]
        if isinstance(data['img'][0], list):
            data['img'] = [img[0] for img in data['img']]
    else:
        data['img'] = data['img'].data

    # for KIE models
    if ann is not None:
        data['relations'] = data['relations'].data[0]
        data['gt_bboxes'] = data['gt_bboxes'].data[0]
        data['texts'] = data['texts'].data[0]
        data['img'] = data['img'][0]
        data['img_metas'] = data['img_metas'][0]

    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]
    else:
        for m in model.modules():
            assert not isinstance(
                m, RoIPool
            ), 'CPU inference with RoIPool is not supported currently.'

    # forward the model
    with torch.no_grad():
        results = model(return_loss=False, rescale=True, **data)

    if not is_batch:
        if not return_data:
            return results[0]
        return results[0], datas[0]
    else:
        if not return_data:
            return results
        return results, datas