Ejemplo n.º 1
0
def read_image_with_pil(image_directory):
    try:
        img = Image.open(image_directory)
        return img
    except IOError:
        orc_logger.error('PIL can not read a file, maybe it is not a image')
        raise IOError
Ejemplo n.º 2
0
    def _validate_file_extension():
        _, file_extension = os.path.splitext(image_directory)
        if file_extension.upper() in VALID_EXTEND_FILE:
            orc_logger.info('end validate input')
            return file_extension

        orc_logger.error(f"only allow file extend: {VALID_EXTEND_FILE}")
        raise Exception
Ejemplo n.º 3
0
def get_string(input, output, verbose):
    try:
        if output:
            orc_logger.set_log_name(output)

        orc_logger.info(f'start time: {datetime.datetime.now()}')
        orc_logger.info(f'receive input file: {input}')
        orc_logger.info(f'receive output file: {output}')
        type_file = validate_file(input)
        if type_file == VALID_EXTEND_FILE[1]:
            input = convert_pdf_to_img()

        text = recognize_text_with_tesseract(input)
        orc_logger.info(f'result  = \n {text}')
    except Exception as e:
        orc_logger.error(e)
    finally:
        orc_logger.info(f'end time: {datetime.datetime.now()}')
Ejemplo n.º 4
0
def recognize_text_with_tesseract(image_directory):
    try:
        orc_logger.info(f'start recognize text with tesseract')

        orc_logger.info(f'read image and convert it to gray')
        img = cv2.imread(image_directory)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        orc_logger.info(f'remove noise in image')
        kernel = np.ones((1, 1), np.uint8)
        img = cv2.dilate(img, kernel, iterations=1)
        cv2.erode(img, kernel, iterations=1)
        cv2.imwrite(TEMP_IMG, img)
        image_detect = Image.open(TEMP_IMG)

        orc_logger.info(f'start recognize text with tesseract')
        result = pytesseract.image_to_string(image_detect)
        orc_logger.info(f'end recognize text with tesseract')

        return result.strip()
    except Exception as e:
        orc_logger.error(e)
Ejemplo n.º 5
0
 def _validate_file_exist():
     if not os.path.isfile(image_directory) or not os.access(
             image_directory, os.R_OK):
         orc_logger.error('file not exists or not readable')
         raise Exception