def ocr(self, img, det=True, rec=True, cls=True): """ ocr with paddleocr args: img: img for ocr, support ndarray, img_path and list or ndarray det: use text detection or not. If false, only rec will be exec. Default is True rec: use text recognition or not. If false, only det will be exec. Default is True cls: use angle classifier or not. Default is True. If true, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False. """ assert isinstance(img, (np.ndarray, list, str)) if isinstance(img, list) and det == True: logger.error('When input a list of images, det must be false') exit(0) if cls == True and self.use_angle_cls == False: logger.warning( 'Since the angle classifier is not initialized, the angle classifier will not be used during the forward process' ) if isinstance(img, str): # download net image if img.startswith('http'): download_with_progressbar(img, 'tmp.jpg') img = 'tmp.jpg' image_file = img img, flag = check_and_read_gif(image_file) if not flag: with open(image_file, 'rb') as f: np_arr = np.frombuffer(f.read(), dtype=np.uint8) img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) if img is None: logger.error("error in loading image:{}".format(image_file)) return None if isinstance(img, np.ndarray) and len(img.shape) == 2: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) if det and rec: dt_boxes, rec_res = self.__call__(img, cls) return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)] elif det and not rec: dt_boxes, elapse = self.text_detector(img) if dt_boxes is None: return None return [box.tolist() for box in dt_boxes] else: if not isinstance(img, list): img = [img] if self.use_angle_cls and cls: img, cls_res, elapse = self.text_classifier(img) if not rec: return cls_res rec_res, elapse = self.text_recognizer(img) return rec_res
def __call__(self, img): if isinstance(img, str): # download net image if img.startswith('http'): download_with_progressbar(img, 'tmp.jpg') img = 'tmp.jpg' image_file = img img, flag = check_and_read_gif(image_file) if not flag: with open(image_file, 'rb') as f: np_arr = np.frombuffer(f.read(), dtype=np.uint8) img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) if img is None: logger.error("error in loading image:{}".format(image_file)) return None if isinstance(img, np.ndarray) and len(img.shape) == 2: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) res = super().__call__(img) return res
def main(): # for cmd args = parse_args(mMain=True) image_dir = args.image_dir if is_link(image_dir): download_with_progressbar(image_dir, 'tmp.jpg') image_file_list = ['tmp.jpg'] else: image_file_list = get_image_file_list(args.image_dir) if len(image_file_list) == 0: logger.error('no images find in {}'.format(args.image_dir)) return if args.type == 'ocr': engine = PaddleOCR(**(args.__dict__)) elif args.type == 'structure': engine = PPStructure(**(args.__dict__)) else: raise NotImplementedError for img_path in image_file_list: img_name = os.path.basename(img_path).split('.')[0] logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10)) if args.type == 'ocr': result = engine.ocr(img_path, det=args.det, rec=args.rec, cls=args.use_angle_cls) if result is not None: for line in result: logger.info(line) elif args.type == 'structure': result = engine(img_path) save_structure_res(result, args.output, img_name) for item in result: item.pop('img') logger.info(item)