コード例 #1
0
ファイル: tesseract.py プロジェクト: keyzf/Mayan-EDMS
    def execute(self, *args, **kwargs):
        """
        Execute the command line binary of tesseract
        """
        super(Tesseract, self).execute(*args, **kwargs)

        if self.command_tesseract:
            image = self.converter.get_page()

            try:
                temporary_image_file = TemporaryFile()
                shutil.copyfileobj(image, temporary_image_file)
                temporary_image_file.seek(0)

                arguments = ['-', '-']

                keyword_arguments = {
                    '_in': temporary_image_file,
                    '_timeout': self.command_timeout
                }

                if self.language:
                    keyword_arguments['l'] = self.language

                environment = os.environ.copy()
                environment.update(self.environment)
                keyword_arguments['_env'] = environment

                try:
                    result = self.command_tesseract(*arguments,
                                                    **keyword_arguments)
                    return force_text(result.stdout)
                except Exception as exception:
                    error_message = (
                        'Exception calling Tesseract with language option: {}; {}'
                    ).format(self.language, exception)

                    if self.language not in self.languages:
                        error_message = (
                            '{}\nThe requested OCR language "{}" is not '
                            'available and needs to be installed.\n').format(
                                error_message, self.language)

                    logger.error(error_message)
                    raise OCRError(error_message)
                else:
                    return result
            finally:
                temporary_image_file.close()