def __init__(self, datasets, separate_eval=True, pipeline=None, force_apply=False, **kwargs): new_datasets = [] if pipeline is not None: assert isinstance( pipeline, list), 'pipeline must be list[dict] or list[list[dict]].' if is_type_list(pipeline, dict): self._apply_pipeline(datasets, pipeline, force_apply) new_datasets = datasets elif is_2dlist(pipeline): assert is_2dlist(datasets) assert len(datasets) == len(pipeline) for sub_datasets, tmp_pipeline in zip(datasets, pipeline): self._apply_pipeline(sub_datasets, tmp_pipeline, force_apply) new_datasets.extend(sub_datasets) else: if is_2dlist(datasets): for sub_datasets in datasets: new_datasets.extend(sub_datasets) else: new_datasets = datasets datasets = [build_dataset(c, kwargs) for c in new_datasets] super().__init__(datasets, separate_eval)
def show_pred_gt(preds, gts, show=False, win_name='', wait_time=0, out_file=None): """Show detection and ground truth for one image. Args: preds (list[list[float]]): The detection boundary list. gts (list[list[float]]): The ground truth boundary list. show (bool): Whether to show the image. win_name (str): The window name. wait_time (int): The value of waitKey param. out_file (str): The filename of the output. """ assert utils.is_2dlist(preds) assert utils.is_2dlist(gts) assert isinstance(show, bool) assert isinstance(win_name, str) assert isinstance(wait_time, int) assert utils.is_none_or_type(out_file, str) p_xy = [p for boundary in preds for p in boundary] gt_xy = [g for gt in gts for g in gt] max_xy = np.max(np.array(p_xy + gt_xy).reshape(-1, 2), axis=0) width = int(max_xy[0]) + 100 height = int(max_xy[1]) + 100 img = np.ones((height, width, 3), np.int8) * 255 pred_color = mmcv.color_val('red') gt_color = mmcv.color_val('blue') thickness = 1 for boundary in preds: cv2.polylines( img, [np.array(boundary).astype(np.int32).reshape(-1, 1, 2)], True, color=pred_color, thickness=thickness) for gt in gts: cv2.polylines( img, [np.array(gt).astype(np.int32).reshape(-1, 1, 2)], True, color=gt_color, thickness=thickness) if show: mmcv.imshow(img, win_name, wait_time) if out_file is not None: mmcv.imwrite(img, out_file) return img
def __init__(self, datasets, separate_eval=True, show_mean_scores='auto', pipeline=None, force_apply=False, **kwargs): new_datasets = [] if pipeline is not None: assert isinstance( pipeline, list), 'pipeline must be list[dict] or list[list[dict]].' if is_type_list(pipeline, dict): self._apply_pipeline(datasets, pipeline, force_apply) new_datasets = datasets elif is_2dlist(pipeline): assert is_2dlist(datasets) assert len(datasets) == len(pipeline) for sub_datasets, tmp_pipeline in zip(datasets, pipeline): self._apply_pipeline(sub_datasets, tmp_pipeline, force_apply) new_datasets.extend(sub_datasets) else: if is_2dlist(datasets): for sub_datasets in datasets: new_datasets.extend(sub_datasets) else: new_datasets = datasets datasets = [build_dataset(c, kwargs) for c in new_datasets] super().__init__(datasets, separate_eval) if not separate_eval: raise NotImplementedError( 'Evaluating datasets as a whole is not' ' supported yet. Please use "separate_eval=True"') assert isinstance(show_mean_scores, bool) or show_mean_scores == 'auto' if show_mean_scores == 'auto': show_mean_scores = len(self.datasets) > 1 self.show_mean_scores = show_mean_scores if show_mean_scores is True or show_mean_scores == 'auto' and len( self.datasets) > 1: if len(set([type(ds) for ds in self.datasets])) != 1: raise NotImplementedError( 'To compute mean evaluation scores, all datasets' 'must have the same type')
def text_model_inference(model, input_sentence): """Inference text(s) with the entity recognizer. Args: model (nn.Module): The loaded recognizer. input_sentence (str): A text entered by the user. Returns: result (dict): Predicted results. """ assert isinstance(input_sentence, str) cfg = model.cfg if cfg.data.test.get('pipeline', None) is None: if is_2dlist(cfg.data.test.datasets): cfg.data.test.pipeline = cfg.data.test.datasets[0][0].pipeline else: cfg.data.test.pipeline = cfg.data.test.datasets[0].pipeline if is_2dlist(cfg.data.test.pipeline): cfg.data.test.pipeline = cfg.data.test.pipeline[0] test_pipeline = Compose(cfg.data.test.pipeline) data = {'text': input_sentence, 'label': {}} # build the data pipeline data = test_pipeline(data) if isinstance(data['img_metas'], dict): img_metas = data['img_metas'] else: img_metas = data['img_metas'].data assert isinstance(img_metas, dict) img_metas = { 'input_ids': img_metas['input_ids'].unsqueeze(0), 'attention_masks': img_metas['attention_masks'].unsqueeze(0), 'token_type_ids': img_metas['token_type_ids'].unsqueeze(0), 'labels': img_metas['labels'].unsqueeze(0) } # forward the model with torch.no_grad(): result = model(None, img_metas, return_loss=False) return result
def disable_text_recog_aug_test(cfg, set_types=None): """Remove aug_test from test pipeline for text recognition. Args: cfg (mmcv.Config): Input config. set_types (list[str]): Type of dataset source. Should be None or sublist of ['test', 'val']. """ assert set_types is None or isinstance(set_types, list) if set_types is None: set_types = ['val', 'test'] cfg = copy.deepcopy(cfg) warnings.simplefilter('once') for set_type in set_types: assert set_type in ['val', 'test'] dataset_type = cfg.data[set_type].type if dataset_type not in [ 'ConcatDataset', 'UniformConcatDataset', 'OCRDataset', 'OCRSegDataset' ]: continue uniform_pipeline = cfg.data[set_type].get('pipeline', None) if is_type_list(uniform_pipeline, dict): update_pipeline_recog(cfg.data[set_type]) elif is_2dlist(uniform_pipeline): for idx, _ in enumerate(uniform_pipeline): update_pipeline_recog(cfg.data[set_type].pipeline, idx) for dataset in cfg.data[set_type].get('datasets', []): if isinstance(dataset, list): for each_dataset in dataset: update_pipeline_recog(each_dataset) else: update_pipeline_recog(dataset) return cfg
def extract_boundary(result): """Extract boundaries and their scores from result. Args: result (dict): The detection result with the key 'boundary_result' of one image. Returns: boundaries_with_scores (list[list[float]]): The boundary and score list. boundaries (list[list[float]]): The boundary list. scores (list[float]): The boundary score list. """ assert isinstance(result, dict) assert 'boundary_result' in result.keys() boundaries_with_scores = result['boundary_result'] assert utils.is_2dlist(boundaries_with_scores) boundaries = [b[:-1] for b in boundaries_with_scores] scores = [b[-1] for b in boundaries_with_scores] return (boundaries_with_scores, boundaries, scores)
def replace_image_to_tensor(cfg, set_types=None): """Replace 'ImageToTensor' to 'DefaultFormatBundle'.""" assert set_types is None or isinstance(set_types, list) if set_types is None: set_types = ['val', 'test'] cfg = copy.deepcopy(cfg) for set_type in set_types: assert set_type in ['val', 'test'] uniform_pipeline = cfg.data[set_type].get('pipeline', None) if is_type_list(uniform_pipeline, dict): update_pipeline(cfg.data[set_type]) elif is_2dlist(uniform_pipeline): for idx, _ in enumerate(uniform_pipeline): update_pipeline(cfg.data[set_type], idx) for dataset in cfg.data[set_type].get('datasets', []): if isinstance(dataset, list): for each_dataset in dataset: update_pipeline(each_dataset) else: update_pipeline(dataset) return cfg
def imshow_text_char_boundary(img, text_quads, boundaries, char_quads, chars, show=False, thickness=1, font_scale=0.5, win_name='', wait_time=-1, out_file=None): """Draw text boxes and char boxes on img. Args: img (str or ndarray): The img to be displayed. text_quads (list[list[int|float]]): The text boxes. boundaries (list[list[int|float]]): The boundary list. char_quads (list[list[list[int|float]]]): A 2d list of char boxes. char_quads[i] is for the ith text, and char_quads[i][j] is the jth char of the ith text. chars (list[list[char]]). The string for each text box. thickness (int): Thickness of lines. font_scale (float): Font scales of texts. show (bool): Whether to show the image. win_name (str): The window name. wait_time (int): Value of waitKey param. out_file (str or None): The filename of the output. """ assert isinstance(img, (np.ndarray, str)) assert utils.is_2dlist(text_quads) assert utils.is_2dlist(boundaries) assert utils.is_3dlist(char_quads) assert utils.is_2dlist(chars) assert utils.equal_len(text_quads, char_quads, boundaries) img = mmcv.imread(img) char_color = [mmcv.color_val('blue'), mmcv.color_val('green')] text_color = mmcv.color_val('red') text_inx = 0 for text_box, boundary, char_box, txt in zip(text_quads, boundaries, char_quads, chars): text_box = np.array(text_box) boundary = np.array(boundary) text_box = text_box.reshape(-1, 2).astype(np.int32) cv2.polylines(img, [text_box.reshape(-1, 1, 2)], True, color=text_color, thickness=thickness) if boundary.shape[0] > 0: cv2.polylines(img, [boundary.reshape(-1, 1, 2)], True, color=text_color, thickness=thickness) for b in char_box: b = np.array(b) c = char_color[text_inx % 2] b = b.astype(np.int32) cv2.polylines(img, [b.reshape(-1, 1, 2)], True, color=c, thickness=thickness) label_text = ''.join(txt) cv2.putText(img, label_text, (text_box[0, 0], text_box[0, 1] - 2), cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) text_inx = text_inx + 1 if show: mmcv.imshow(img, win_name, wait_time) if out_file is not None: mmcv.imwrite(img, out_file) return img
def imshow_pred_boundary(img, boundaries_with_scores, labels, score_thr=0, boundary_color='blue', text_color='blue', thickness=1, font_scale=0.5, show=True, win_name='', wait_time=0, out_file=None, show_score=False): """Draw boundaries and class labels (with scores) on an image. Args: img (str or ndarray): The image to be displayed. boundaries_with_scores (list[list[float]]): Boundaries with scores. labels (list[int]): Labels of boundaries. score_thr (float): Minimum score of boundaries to be shown. boundary_color (str or tuple or :obj:`Color`): Color of boundaries. text_color (str or tuple or :obj:`Color`): Color of texts. thickness (int): Thickness of lines. font_scale (float): Font scales of texts. show (bool): Whether to show the image. win_name (str): The window name. wait_time (int): Value of waitKey param. out_file (str or None): The filename of the output. show_score (bool): Whether to show text instance score. """ assert isinstance(img, (str, np.ndarray)) assert utils.is_2dlist(boundaries_with_scores) assert utils.is_type_list(labels, int) assert utils.equal_len(boundaries_with_scores, labels) if len(boundaries_with_scores) == 0: warnings.warn('0 text found in ' + out_file) return utils.valid_boundary(boundaries_with_scores[0]) img = mmcv.imread(img) scores = np.array([b[-1] for b in boundaries_with_scores]) inds = scores > score_thr boundaries = [boundaries_with_scores[i][:-1] for i in np.where(inds)[0]] scores = [scores[i] for i in np.where(inds)[0]] labels = [labels[i] for i in np.where(inds)[0]] boundary_color = mmcv.color_val(boundary_color) text_color = mmcv.color_val(text_color) font_scale = 0.5 for boundary, score, label in zip(boundaries, scores, labels): boundary_int = np.array(boundary).astype(np.int32) cv2.polylines(img, [boundary_int.reshape(-1, 1, 2)], True, color=boundary_color, thickness=thickness) if show_score: label_text = f'{score:.02f}' cv2.putText(img, label_text, (boundary_int[0], boundary_int[1] - 2), cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) if show: mmcv.imshow(img, win_name, wait_time) if out_file is not None: mmcv.imwrite(img, out_file) return img
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) setup_multi_processes(cfg) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: CLI > segment in file > filename if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.load_from is not None: cfg.load_from = args.load_from if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpus is not None: cfg.gpu_ids = range(1) warnings.warn('`--gpus` is deprecated because we only support ' 'single GPU mode in non-distributed training. ' 'Use `gpus=1` now.') if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids[0:1] warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. ' 'Because we only support single GPU mode in ' 'non-distributed training. Use the first GPU ' 'in `gpu_ids` now.') if args.gpus is None and args.gpu_ids is None: cfg.gpu_ids = [args.gpu_id] # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # re-set gpu_ids with distributed training mode _, world_size = get_dist_info() cfg.gpu_ids = range(world_size) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init the logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info meta['config'] = cfg.pretty_text # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config:\n{cfg.pretty_text}') # set random seeds seed = init_random_seed(args.seed) seed = seed + dist.get_rank() if args.diff_seed else seed logger.info(f'Set random seed to {seed}, ' f'deterministic: {args.deterministic}') set_random_seed(seed, deterministic=args.deterministic) cfg.seed = seed meta['seed'] = seed meta['exp_name'] = osp.basename(args.config) model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg')) model.init_weights() datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) if cfg.data.train.get('pipeline', None) is None: if is_2dlist(cfg.data.train.datasets): train_pipeline = cfg.data.train.datasets[0][0].pipeline else: train_pipeline = cfg.data.train.datasets[0].pipeline elif is_2dlist(cfg.data.train.pipeline): train_pipeline = cfg.data.train.pipeline[0] else: train_pipeline = cfg.data.train.pipeline if val_dataset['type'] in ['ConcatDataset', 'UniformConcatDataset']: for dataset in val_dataset['datasets']: dataset.pipeline = train_pipeline else: val_dataset.pipeline = train_pipeline datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmocr_version=__version__ + get_git_hash()[:7], CLASSES=datasets[0].CLASSES) # add an attribute for visualization convenience model.CLASSES = datasets[0].CLASSES train_detector(model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta)
def test_is_2dlist(): assert utils.is_2dlist([]) assert utils.is_2dlist([[]]) assert utils.is_2dlist([[1]])
def model_inference(model, imgs, ann=None, batch_mode=False, return_data=False): """Inference image(s) with the detector. Args: model (nn.Module): The loaded detector. imgs (str/ndarray or list[str/ndarray] or tuple[str/ndarray]): Either image files or loaded images. batch_mode (bool): If True, use batch mode for inference. ann (dict): Annotation info for key information extraction. return_data: Return postprocessed data. Returns: result (dict): Predicted results. """ if isinstance(imgs, (list, tuple)): is_batch = True if len(imgs) == 0: raise Exception('empty imgs provided, please check and try again') if not isinstance(imgs[0], (np.ndarray, str)): raise AssertionError('imgs must be strings or numpy arrays') elif isinstance(imgs, (np.ndarray, str)): imgs = [imgs] is_batch = False else: raise AssertionError('imgs must be strings or numpy arrays') is_ndarray = isinstance(imgs[0], np.ndarray) cfg = model.cfg if batch_mode: cfg = disable_text_recog_aug_test(cfg, set_types=['test']) device = next(model.parameters()).device # model device if cfg.data.test.get('pipeline', None) is None: if is_2dlist(cfg.data.test.datasets): cfg.data.test.pipeline = cfg.data.test.datasets[0][0].pipeline else: cfg.data.test.pipeline = cfg.data.test.datasets[0].pipeline if is_2dlist(cfg.data.test.pipeline): cfg.data.test.pipeline = cfg.data.test.pipeline[0] if is_ndarray: cfg = cfg.copy() # set loading pipeline type cfg.data.test.pipeline[0].type = 'LoadImageFromNdarray' cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) test_pipeline = Compose(cfg.data.test.pipeline) datas = [] for img in imgs: # prepare data if is_ndarray: # directly add img data = dict(img=img, ann_info=ann, img_info=dict(width=img.shape[1], height=img.shape[0]), bbox_fields=[]) else: # add information into dict data = dict(img_info=dict(filename=img), img_prefix=None, ann_info=ann, bbox_fields=[]) if ann is not None: data.update(dict(**ann)) # build the data pipeline data = test_pipeline(data) # get tensor from list to stack for batch mode (text detection) if batch_mode: if cfg.data.test.pipeline[1].type == 'MultiScaleFlipAug': for key, value in data.items(): data[key] = value[0] datas.append(data) if isinstance(datas[0]['img'], list) and len(datas) > 1: raise Exception('aug test does not support ' f'inference with batch size ' f'{len(datas)}') data = collate(datas, samples_per_gpu=len(imgs)) # process img_metas if isinstance(data['img_metas'], list): data['img_metas'] = [ img_metas.data[0] for img_metas in data['img_metas'] ] else: data['img_metas'] = data['img_metas'].data if isinstance(data['img'], list): data['img'] = [img.data for img in data['img']] if isinstance(data['img'][0], list): data['img'] = [img[0] for img in data['img']] else: data['img'] = data['img'].data # for KIE models if ann is not None: data['relations'] = data['relations'].data[0] data['gt_bboxes'] = data['gt_bboxes'].data[0] data['texts'] = data['texts'].data[0] data['img'] = data['img'][0] data['img_metas'] = data['img_metas'][0] if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device])[0] else: for m in model.modules(): assert not isinstance( m, RoIPool ), 'CPU inference with RoIPool is not supported currently.' # forward the model with torch.no_grad(): results = model(return_loss=False, rescale=True, **data) if not is_batch: if not return_data: return results[0] return results[0], datas[0] else: if not return_data: return results return results, datas