def det_and_recog_inference(args, det_model, recog_model): image_path = args.img end2end_res = {'filename': image_path} end2end_res['result'] = [] image = mmcv.imread(image_path) det_result = model_inference(det_model, image) bboxes = det_result['boundary_result'] for bbox in bboxes: box_res = {} box_res['box'] = [round(x) for x in bbox[:-1]] box_res['box_score'] = float(bbox[-1]) box = bbox[:8] if len(bbox) > 9: min_x = min(bbox[0:-1:2]) min_y = min(bbox[1:-1:2]) max_x = max(bbox[0:-1:2]) max_y = max(bbox[1:-1:2]) box = [min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y] box_img = crop_img(image, box) recog_result = model_inference(recog_model, box_img) text = recog_result['text'] text_score = recog_result['score'] if isinstance(text_score, list): text_score = sum(text_score) / max(1, len(text)) box_res['text'] = text box_res['text_score'] = text_score end2end_res['result'].append(box_res) return end2end_res
def det_and_recog_inference(args, det_model, recog_model): image_path = args.img end2end_res = {'filename': image_path} end2end_res['result'] = [] image = mmcv.imread(image_path) det_result = model_inference(det_model, image) bboxes = det_result['boundary_result'] box_imgs = [] for bbox in bboxes: box_res = {} box_res['box'] = [round(x) for x in bbox[:-1]] box_res['box_score'] = float(bbox[-1]) box = bbox[:8] if len(bbox) > 9: min_x = min(bbox[0:-1:2]) min_y = min(bbox[1:-1:2]) max_x = max(bbox[0:-1:2]) max_y = max(bbox[1:-1:2]) box = [min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y] box_img = crop_img(image, box) if args.batch_mode: box_imgs.append(box_img) else: recog_result = model_inference(recog_model, box_img) text = recog_result['text'] text_score = recog_result['score'] if isinstance(text_score, list): text_score = sum(text_score) / max(1, len(text)) box_res['text'] = text box_res['text_score'] = text_score end2end_res['result'].append(box_res) if args.batch_mode: batch_size = args.batch_size for chunk_idx in range(len(box_imgs) // batch_size + 1): start_idx = chunk_idx * batch_size end_idx = (chunk_idx + 1) * batch_size chunk_box_imgs = box_imgs[start_idx:end_idx] if len(chunk_box_imgs) == 0: continue recog_results = model_inference(recog_model, chunk_box_imgs, batch_mode=True) for i, recog_result in enumerate(recog_results): text = recog_result['text'] text_score = recog_result['score'] if isinstance(text_score, list): text_score = sum(text_score) / max(1, len(text)) end2end_res['result'][start_idx + i]['text'] = text end2end_res['result'][start_idx + i]['text_score'] = text_score return end2end_res
def test_model_batch_inference_empty_detection(cfg_file): tmp_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) config_file = os.path.join(tmp_dir, cfg_file) model = build_model(config_file) empty_detection = [] with pytest.raises( Exception, match='empty imgs provided, please check and try again'): model_inference(model, empty_detection, batch_mode=True)
def test_model_batch_inference_det(cfg_file): tmp_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) config_file = os.path.join(tmp_dir, cfg_file) model = build_model(config_file) sample_img_path = os.path.join(tmp_dir, '../demo/demo_text_det.jpg') results = model_inference(model, [sample_img_path], batch_mode=True) assert len(results) == 1 # numpy inference img = imread(sample_img_path) results = model_inference(model, [img], batch_mode=True) assert len(results) == 1
def test_model_batch_inference_raises_exception_error_aug_test_recog(cfg_file): tmp_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) config_file = os.path.join(tmp_dir, cfg_file) model = build_model(config_file) with pytest.raises( Exception, match='aug test does not support inference with batch size'): sample_img_path = os.path.join(tmp_dir, '../demo/demo_text_det.jpg') model_inference(model, [sample_img_path, sample_img_path]) with pytest.raises( Exception, match='aug test does not support inference with batch size'): img = imread(sample_img_path) model_inference(model, [img, img])
def inference(m, a, **kwargs): if model == 'Tesseract_det': return self.tesseract_det_inference(a) elif model == 'Tesseract_recog': return self.tesseract_recog_inference(a) else: return model_inference(m, a, **kwargs)
def main(): parser = ArgumentParser() parser.add_argument('img', help='Image file.') parser.add_argument('config', help='Config file.') parser.add_argument('checkpoint', help='Checkpoint file.') parser.add_argument('save_path', help='Path to save visualized image.') parser.add_argument( '--device', default='cuda:0', help='Device used for inference.') parser.add_argument( '--imshow', action='store_true', help='Whether show image with OpenCV.') args = parser.parse_args() # build the model from a config file and a checkpoint file model = init_detector(args.config, args.checkpoint, device=args.device) if model.cfg.data.test['type'] == 'ConcatDataset': model.cfg.data.test.pipeline = model.cfg.data.test['datasets'][ 0].pipeline # test a single image result = model_inference(model, args.img) print(f'result: {result}') # show the results img = model.show_result(args.img, result, out_file=None, show=False) mmcv.imwrite(img, args.save_path) if args.imshow: mmcv.imshow(img, 'predicted results')
def single_inference(self, model, arrays, batch_mode, batch_size=0): result = [] if batch_mode: if batch_size == 0: result = model_inference(model, arrays, batch_mode=True) else: n = batch_size arr_chunks = [ arrays[i:i + n] for i in range(0, len(arrays), n) ] for chunk in arr_chunks: result.extend( model_inference(model, chunk, batch_mode=True)) else: for arr in arrays: result.append(model_inference(model, arr, batch_mode=False)) return result
def test_model_inference(): project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) print(project_dir) config_file = os.path.join( project_dir, '../configs/textrecog/sar/sar_r31_parallel_decoder_academic.py') checkpoint_file = os.path.join( project_dir, '../checkpoints/sar_r31_parallel_decoder_academic-dba3a4a3.pth') if not os.path.exists(checkpoint_file): url = ('https://download.openmmlab.com/mmocr' '/textrecog/sar/' 'sar_r31_parallel_decoder_academic-dba3a4a3.pth') print(f'Downloading {url} ...') local_filename, _ = urllib.request.urlretrieve(url) os.makedirs(os.path.dirname(checkpoint_file), exist_ok=True) shutil.move(local_filename, checkpoint_file) print(f'Saved as {checkpoint_file}') else: print(f'Using existing checkpoint {checkpoint_file}') device = 'cpu' model = init_detector(config_file, checkpoint=checkpoint_file, device=device) if model.cfg.data.test['type'] == 'ConcatDataset': model.cfg.data.test.pipeline = model.cfg.data.test['datasets'][ 0].pipeline img = os.path.join(project_dir, '../demo/demo_text_recog.jpg') with pytest.raises(AssertionError): model_inference(model, 1) model_inference(model, img)
def test_model_batch_inference_raises_exception_error_free_resize_recog( cfg_file): tmp_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) config_file = os.path.join(tmp_dir, cfg_file) model = build_model(config_file) with pytest.raises( Exception, match='Free resize do not support batch mode ' 'since the image width is not fixed, ' 'for resize keeping aspect ratio and ' 'max_width is not give.'): sample_img_path = os.path.join(tmp_dir, '../demo/demo_text_recog.jpg') model_inference( model, [sample_img_path, sample_img_path], batch_mode=True) with pytest.raises( Exception, match='Free resize do not support batch mode ' 'since the image width is not fixed, ' 'for resize keeping aspect ratio and ' 'max_width is not give.'): img = imread(sample_img_path) model_inference(model, [img, img], batch_mode=True)
def test_model_inference(cfg_file): tmp_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) config_file = os.path.join(tmp_dir, cfg_file) model = build_model(config_file) with pytest.raises(AssertionError): model_inference(model, 1) sample_img_path = os.path.join(tmp_dir, '../demo/demo_text_det.jpg') model_inference(model, sample_img_path) # numpy inference img = imread(sample_img_path) model_inference(model, img)
def main(): parser = ArgumentParser() parser.add_argument('img', help='Image file.') parser.add_argument('config', help='Config file.') parser.add_argument('checkpoint', help='Checkpoint file.') parser.add_argument('save_path', help='Path to save visualized image.') parser.add_argument('--device', default='cuda:0', help='Device used for inference.') parser.add_argument('--imshow', action='store_true', help='Whether show image with OpenCV.') args = parser.parse_args() # build the model from a config file and a checkpoint file model = init_detector(args.config, args.checkpoint, device=args.device) if model.cfg.data.test['type'] == 'ConcatDataset': model.cfg.data.test.pipeline = model.cfg.data.test['datasets'][ 0].pipeline # test a single image print(args.img) s = time.time() with open(args.img, 'r') as ff: lines = ff.readlines() for i, line in enumerate(lines): imgPath = line.strip() imgPath = f"qtests/{imgPath}" imgName = imgPath.split('/')[-1] print(imgPath) result = model_inference(model, imgPath) print(result) #show the results img = model.show_result(imgPath, result, out_file=None, show=False) print("time", time.time() - s) mmcv.imwrite(img, f"{args.save_path}/{imgName}.jpg") if args.imshow: mmcv.imshow(img, 'predicted results')
def main(): parser = ArgumentParser() parser.add_argument('config', help='Config file.') parser.add_argument('checkpoint', help='Checkpoint file.') parser.add_argument('save_path', help='Folder to save visualized images.') parser.add_argument('--images', nargs='+', help='Image files to be predicted with batch mode, ' 'separated by space, like "image_1.jpg image2.jpg".') parser.add_argument('--device', default='cuda:0', help='Device used for inference.') parser.add_argument('--imshow', action='store_true', help='Whether show image with OpenCV.') args = parser.parse_args() # build the model from a config file and a checkpoint file model = init_detector(args.config, args.checkpoint, device=args.device) if model.cfg.data.test['type'] == 'ConcatDataset': model.cfg.data.test.pipeline = model.cfg.data.test['datasets'][ 0].pipeline # test multiple images results = model_inference(model, args.images, batch_mode=True) print(f'results: {results}') save_path = Path(args.save_path) for img_path, result in zip(args.images, results): out_file = save_path / f'result_{Path(img_path).stem}.png' # show the results img = model.show_result(img_path, result, out_file=str(out_file), show=False) if args.imshow: mmcv.imshow(img, f'predicted results ({img_path})')
def test_model_inference(cfg_file): tmp_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) config_file = os.path.join(tmp_dir, cfg_file) device = 'cpu' model = init_detector(config_file, checkpoint=None, device=device) if model.cfg.data.test['type'] == 'ConcatDataset': model.cfg.data.test.pipeline = model.cfg.data.test['datasets'][ 0].pipeline with pytest.raises(AssertionError): model_inference(model, 1) sample_img_path = os.path.join(tmp_dir, '../demo/demo_text_det.jpg') model_inference(model, sample_img_path) # numpy inference img = imread(sample_img_path) model_inference(model, img)
def test_model_inference_numpy_ndarray_det(sample_det_img_path, psenet_model): det_img = imread(sample_det_img_path) model_inference(psenet_model, det_img)
def test_model_inference_numpy_ndarray(sample_img_path, sarnet_model): img = imread(sample_img_path) model_inference(sarnet_model, img)
def test_model_inference_image_path_det(sample_det_img_path, psenet_model): model_inference(psenet_model, sample_det_img_path)
def test_model_inference_image_path(sample_img_path, sarnet_model): with pytest.raises(AssertionError): model_inference(sarnet_model, 1) model_inference(sarnet_model, sample_img_path)
def det_recog_kie_inference(self, det_model, recog_model, kie_model=None): end2end_res = [] # Find bounding boxes in the images (text detection) det_result = self.single_inference(det_model, self.args.arrays, self.args.batch_mode, self.args.det_batch_size) bboxes_list = [res['boundary_result'] for res in det_result] if kie_model: kie_dataset = KIEDataset( dict_file=kie_model.cfg.data.test.dict_file) # For each bounding box, the image is cropped and # sent to the recognition model either one by one # or all together depending on the batch_mode for filename, arr, bboxes, out_file in zip(self.args.filenames, self.args.arrays, bboxes_list, self.args.output): img_e2e_res = {} img_e2e_res['filename'] = filename img_e2e_res['result'] = [] box_imgs = [] for bbox in bboxes: box_res = {} box_res['box'] = [round(x) for x in bbox[:-1]] box_res['box_score'] = float(bbox[-1]) box = bbox[:8] if len(bbox) > 9: min_x = min(bbox[0:-1:2]) min_y = min(bbox[1:-1:2]) max_x = max(bbox[0:-1:2]) max_y = max(bbox[1:-1:2]) box = [ min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y ] box_img = crop_img(arr, box) if self.args.batch_mode: box_imgs.append(box_img) else: if recog_model == 'Tesseract_recog': recog_result = self.single_inference(recog_model, box_img, batch_mode=True) else: recog_result = model_inference(recog_model, box_img) text = recog_result['text'] text_score = recog_result['score'] if isinstance(text_score, list): text_score = sum(text_score) / max(1, len(text)) box_res['text'] = text box_res['text_score'] = text_score img_e2e_res['result'].append(box_res) if self.args.batch_mode: recog_results = self.single_inference( recog_model, box_imgs, True, self.args.recog_batch_size) for i, recog_result in enumerate(recog_results): text = recog_result['text'] text_score = recog_result['score'] if isinstance(text_score, (list, tuple)): text_score = sum(text_score) / max(1, len(text)) img_e2e_res['result'][i]['text'] = text img_e2e_res['result'][i]['text_score'] = text_score if self.args.merge: img_e2e_res['result'] = stitch_boxes_into_lines( img_e2e_res['result'], self.args.merge_xdist, 0.5) if kie_model: annotations = copy.deepcopy(img_e2e_res['result']) # Customized for kie_dataset, which # assumes that boxes are represented by only 4 points for i, ann in enumerate(annotations): min_x = min(ann['box'][::2]) min_y = min(ann['box'][1::2]) max_x = max(ann['box'][::2]) max_y = max(ann['box'][1::2]) annotations[i]['box'] = [ min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y ] ann_info = kie_dataset._parse_anno_info(annotations) ann_info['ori_bboxes'] = ann_info.get('ori_bboxes', ann_info['bboxes']) ann_info['gt_bboxes'] = ann_info.get('gt_bboxes', ann_info['bboxes']) kie_result, data = model_inference( kie_model, arr, ann=ann_info, return_data=True, batch_mode=self.args.batch_mode) # visualize KIE results self.visualize_kie_output(kie_model, data, kie_result, out_file=out_file, show=self.args.imshow) gt_bboxes = data['gt_bboxes'].data.numpy().tolist() labels = self.generate_kie_labels(kie_result, gt_bboxes, kie_model.class_list) for i in range(len(gt_bboxes)): img_e2e_res['result'][i]['label'] = labels[i][0] img_e2e_res['result'][i]['label_score'] = labels[i][1] end2end_res.append(img_e2e_res) return end2end_res