Esempio n. 1
0
 def getEnginesInSystem(self):
     existing_engines = []
     engines_paths = [(name, getExecPath(conf['engine_path']))\
                      for name, conf in PREDEFINED_ENGINES.items()]
     for name, path in engines_paths:
         if not path:
             continue
         engine_name = PREDEFINED_ENGINES[name].get('name', None)
         arguments = PREDEFINED_ENGINES[name].get('arguments', None)
         if not arguments or not engine_name:
             continue
         image_format = PREDEFINED_ENGINES[name].get('image_format', 'PPM')
         failure_string = PREDEFINED_ENGINES[name].get('failure_string', '')
         version = PREDEFINED_ENGINES[name].get('version', 0.0)
         language_argument = PREDEFINED_ENGINES[name].get('language_argument', '')
         languages = PREDEFINED_ENGINES[name].get('languages', '')
         favorite_language = PREDEFINED_ENGINES[name].get('favorite_language', '')
         engine = Engine(engine_name,
                         path,
                         arguments,
                         temporary_folder = self.TEMPORARY_FOLDER,
                         image_format = image_format,
                         failure_string = failure_string,
                         languages = languages,
                         language_argument = language_argument,
                         version = version)
         existing_engines.append(engine)
     return existing_engines
Esempio n. 2
0
 def getEnginesInSystem(self):
     existing_engines = []
     engines_paths = [(name, getExecPath(conf['engine_path']))\
                      for name, conf in list(PREDEFINED_ENGINES.items())]
     for name, path in engines_paths:
         if not path:
             continue
         engine_name = PREDEFINED_ENGINES[name].get('name', None)
         arguments = PREDEFINED_ENGINES[name].get('arguments', None)
         if not arguments or not engine_name:
             continue
         image_format = PREDEFINED_ENGINES[name].get('image_format', 'PPM')
         failure_string = PREDEFINED_ENGINES[name].get('failure_string', '')
         version = PREDEFINED_ENGINES[name].get('version', 0.0)
         language_argument = PREDEFINED_ENGINES[name].get(
             'language_argument', '')
         languages = PREDEFINED_ENGINES[name].get('languages', '')
         favorite_language = PREDEFINED_ENGINES[name].get(
             'favorite_language', '')
         engine = Engine(engine_name,
                         path,
                         arguments,
                         temporary_folder=self.TEMPORARY_FOLDER,
                         image_format=image_format,
                         failure_string=failure_string,
                         languages=languages,
                         language_argument=language_argument,
                         version=version)
         existing_engines.append(engine)
     return existing_engines
Esempio n. 3
0
class ConfigurationManager(object):

    TEXT_FILL = 'text_fill'
    IMAGE_FILL = 'image_fill'
    BOXES_STROKE = 'boxes_stroke'
    WINDOW_SIZE = 'window_size'
    UNPAPER = 'unpaper'
    UNPAPER_USE_BLACK_FILTER = 'unpaper_use_black_filter'
    UNPAPER_NOISE_FILTER_INTENSITY = 'unpaper_noise_filter_intensity'
    UNPAPER_GRAY_FILTER_SIZE = 'unpaper_gray_filter_size'
    UNPAPER_EXTRA_OPTIONS = 'unpaper_extra_options'
    UNPAPER_IMAGES_AFTER_ADDITION = 'unpaper_images_after_addition'
    FAVORITE_ENGINE = 'favorite_engine'
    IMPROVE_COLUMN_DETECTION = 'improve_column_detection'
    COLUMN_MIN_WIDTH = 'column_min_width'
    CLEAN_TEXT = 'clean_text'
    ADJUST_BOXES_BOUNDS = 'adjust_boxes_bounds'
    BOUNDS_ADJUSTMENT_SIZE = 'bounds_adjustment_size'
    DESKEW_IMAGES_AFTER_ADDITION = 'deskew_images_after_addition'
    LANGUAGE = 'language'

    TEMPORARY_FOLDER = tempfile.mkdtemp(prefix=OCRFEEDER_COMPACT_NAME + '_')

    DEFAULT_LOCALE = locale.getdefaultlocale()[0]

    DEFAULTS = {
        TEXT_FILL: (94, 156, 235, 150),
        BOXES_STROKE: (94, 156, 235, 250),
        IMAGE_FILL: (0, 183, 0, 150),
        WINDOW_SIZE: 'auto',
        UNPAPER: getExecPath('unpaper') or '',
        UNPAPER_USE_BLACK_FILTER: True,
        UNPAPER_NOISE_FILTER_INTENSITY: 'auto',
        UNPAPER_GRAY_FILTER_SIZE: 'auto',
        FAVORITE_ENGINE: 'ocrad',
        IMPROVE_COLUMN_DETECTION: True,
        COLUMN_MIN_WIDTH: 'auto',
        CLEAN_TEXT: True,
        ADJUST_BOXES_BOUNDS: True,
        BOUNDS_ADJUSTMENT_SIZE: 'auto',
        DESKEW_IMAGES_AFTER_ADDITION: False,
        UNPAPER_IMAGES_AFTER_ADDITION: False,
        UNPAPER_EXTRA_OPTIONS: '',
        LANGUAGE: DEFAULT_LOCALE.split('_')[0] if DEFAULT_LOCALE else '',
    }

    conf = dict(DEFAULTS)

    def __init__(self):
        self.user_configuration_folder = USER_CONFIG_DIR
        self.migrateOldConfigFolder()
        self.user_engines_folder = os.path.join(self.user_configuration_folder,
                                                'engines')
        self.makeUserConfigurationFolder()
        self.has_unpaper = self.getDefault(self.UNPAPER)

    def makeUserConfigurationFolder(self):
        if not os.path.exists(self.user_engines_folder):
            os.makedirs(self.user_engines_folder)
        if [file_name for file_name in os.listdir(self.user_engines_folder)\
            if file_name.endswith('.xml')]:
            return
        for engine in self.getEnginesInSystem():
            engine_file = os.path.join(self.user_engines_folder, engine.name)
            engine.saveToXml('%s.xml' % engine_file)

    def getEnginesInSystem(self):
        existing_engines = []
        engines_paths = [(name, getExecPath(conf['engine_path']))\
                         for name, conf in list(PREDEFINED_ENGINES.items())]
        for name, path in engines_paths:
            if not path:
                continue
            engine_name = PREDEFINED_ENGINES[name].get('name', None)
            arguments = PREDEFINED_ENGINES[name].get('arguments', None)
            if not arguments or not engine_name:
                continue
            image_format = PREDEFINED_ENGINES[name].get('image_format', 'PPM')
            failure_string = PREDEFINED_ENGINES[name].get('failure_string', '')
            version = PREDEFINED_ENGINES[name].get('version', 0.0)
            language_argument = PREDEFINED_ENGINES[name].get(
                'language_argument', '')
            languages = PREDEFINED_ENGINES[name].get('languages', '')
            favorite_language = PREDEFINED_ENGINES[name].get(
                'favorite_language', '')
            engine = Engine(engine_name,
                            path,
                            arguments,
                            temporary_folder=self.TEMPORARY_FOLDER,
                            image_format=image_format,
                            failure_string=failure_string,
                            languages=languages,
                            language_argument=language_argument,
                            version=version)
            existing_engines.append(engine)
        return existing_engines

    def setConf(self, conf_key, value):
        ConfigurationManager.conf[conf_key] = value

    def getConf(self, conf_key):
        return ConfigurationManager.conf[conf_key]

    def setFavoriteEngine(self, engine_name):
        self.setConf(self.FAVORITE_ENGINE, engine_name)

    def getFavoriteEngine(self):
        return self.getConf(self.FAVORITE_ENGINE)

    def __getColorFromString(self, color):
        if type(color) != str:
            return color
        color_list = [value.strip('()\ ') for value in color.split(',')]
        try:
            int_color_list = [int(value) for value in color_list]
        except ValueError as exception:
            return None
        return tuple(int_color_list)

    def setTextFill(self, color):
        self.setConf(self.TEXT_FILL, color)

    def setBoxesStroke(self, color):
        self.setConf(self.BOXES_STROKE, color)

    def setImageFill(self, color):
        self.setConf(self.IMAGE_FILL, color)

    def getTextFill(self):
        return self.__getColorFromString(self.getConf(self.TEXT_FILL))

    def getBoxesStroke(self):
        return self.__getColorFromString(self.getConf(self.BOXES_STROKE))

    def getImageFill(self):
        return self.__getColorFromString(self.getConf(self.IMAGE_FILL))

    def setWindowSize(self, window_size):
        self.setConf(self.WINDOW_SIZE, window_size)

    def getWindowSize(self):
        return self.getConf(self.WINDOW_SIZE)

    def setUnpaper(self, unpaper):
        self.setConf(self.UNPAPER, unpaper)

    def getUnpaper(self):
        return self.getConf(self.UNPAPER)

    def setUseBlackFilter(self, use_black_filter):
        self.setConf(self.UNPAPER_USE_BLACK_FILTER, use_black_filter)

    def getUseBlackFilter(self):
        use_black_filter = self.getConf(self.UNPAPER_USE_BLACK_FILTER)
        return self.__convertBoolSetting(use_black_filter)

    def setNoiseFilterIntensity(self, noise_filter_intensity):
        self.setConf(self.UNPAPER_NOISE_FILTER_INTENSITY,
                     noise_filter_intensity)

    def getNoiseFilterIntensity(self):
        noise_filter_intensity = \
            self.getConf(self.UNPAPER_NOISE_FILTER_INTENSITY)
        if noise_filter_intensity == 'auto' or noise_filter_intensity == 'none':
            return noise_filter_intensity
        try:
            noise_filter_intensity_int = int(noise_filter_intensity)
        except ValueError:
            return 'auto'
        return noise_filter_intensity_int

    def setGrayFilterSize(self, gray_filter_size):
        self.setConf(self.UNPAPER_GRAY_FILTER_SIZE, gray_filter_size)

    def getGrayFilterSize(self):
        gray_filter_size = self.getConf(self.UNPAPER_GRAY_FILTER_SIZE)
        if gray_filter_size == 'auto' or gray_filter_size == 'none':
            return gray_filter_size
        try:
            gray_filter_size_int = int(gray_filter_size)
        except ValueError:
            return 'auto'
        return gray_filter_size_int

    def setUnpaperExtraOptions(self, extra_options):
        self.setConf(self.UNPAPER_EXTRA_OPTIONS, extra_options)

    def getUnpaperExtraOptions(self):
        return self.getConf(self.UNPAPER_EXTRA_OPTIONS)

    def setUnpaperImagesAfterAddition(self, unpaper_images_after_addition):
        self.setConf(self.UNPAPER_IMAGES_AFTER_ADDITION,
                     unpaper_images_after_addition)

    def getUnpaperImagesAfterAddition(self):
        unpaper = self.getConf(self.UNPAPER_IMAGES_AFTER_ADDITION)
        return self.__convertBoolSetting(unpaper)

    def setImproveColumnDetection(self, improve_column_detection):
        self.setConf(self.IMPROVE_COLUMN_DETECTION, improve_column_detection)

    def getImproveColumnDetection(self):
        improve = self.getConf(self.IMPROVE_COLUMN_DETECTION)
        return self.__convertBoolSetting(improve)

    def setColumnMinWidth(self, column_min_width):
        self.setConf(self.COLUMN_MIN_WIDTH, column_min_width)

    def getColumnMinWidth(self):
        column_min_width = self.getConf(self.COLUMN_MIN_WIDTH)
        if column_min_width == 'auto':
            return column_min_width
        try:
            column_min_width_int = int(column_min_width)
        except ValueError:
            return 'auto'
        return column_min_width_int

    def getCleanText(self):
        clean_text = self.getConf(self.CLEAN_TEXT)
        return self.__convertBoolSetting(clean_text)

    def setCleanText(self, clean_text):
        self.setConf(self.CLEAN_TEXT, clean_text)

    def getLanguage(self):
        lang = self.getConf(self.LANGUAGE)
        return lang

    def setLanguage(self, language):
        self.setConf(self.LANGUAGE, language)

    def setAdjustBoxesBounds(self, adjust_boxes_bounds):
        self.setConf(self.ADJUST_BOXES_BOUNDS, adjust_boxes_bounds)

    def getAdjustBoxesBounds(self):
        adjust = self.getConf(self.ADJUST_BOXES_BOUNDS)
        return self.__convertBoolSetting(adjust)

    def setDeskewImagesAfterAddition(self, deskew_images_after_addition):
        self.setConf(self.DESKEW_IMAGES_AFTER_ADDITION,
                     deskew_images_after_addition)

    def getDeskewImagesAfterAddition(self):
        deskew = self.getConf(self.DESKEW_IMAGES_AFTER_ADDITION)
        return self.__convertBoolSetting(deskew)

    def setBoundsAdjustmentSize(self, adjustment_size):
        self.setConf(self.BOUNDS_ADJUSTMENT_SIZE, adjustment_size)

    def getBoundsAdjustmentSize(self):
        adjustment_size = self.getConf(self.BOUNDS_ADJUSTMENT_SIZE)
        if adjustment_size == 'auto':
            return adjustment_size
        try:
            adjustment_size_int = int(adjustment_size)
        except ValueError:
            return 'auto'
        return adjustment_size_int

    def __convertBoolSetting(self, setting):
        if type(setting) == str:
            if setting == 'True':
                setting = True
            else:
                setting = False
        return setting

    def setDefaults(self):
        ConfigurationManager.conf = dict(self.DEFAULTS)

    def getDefault(self, variable_name):
        if variable_name in list(self.DEFAULTS.keys()):
            return self.DEFAULTS[variable_name]
        else:
            return ''

    def getEngineDefaultConfiguration(self, engine_path):
        path = os.path.basename(engine_path)
        for name, conf in list(PREDEFINED_ENGINES.items()):
            if conf['engine_path'] == path:
                return conf
        return None

    def loadConfiguration(self):
        configuration_file = os.path.join(self.user_configuration_folder,
                                          'preferences.xml')
        if not os.path.isfile(configuration_file):
            return False
        document = minidom.parse(configuration_file)
        for key in list(self.DEFAULTS.keys()):
            nodeList = document.getElementsByTagName(key)
            if nodeList:
                for node in nodeList:
                    for child in node.childNodes:
                        if child.nodeType == child.TEXT_NODE:
                            ConfigurationManager.conf[key] = str(
                                child.nodeValue)
                            break
        return True

    def configurationToXml(self):
        configuration_file = os.path.join(self.user_configuration_folder,
                                          'preferences.xml')
        doc = minidom.Document()
        root_node = doc.createElement('ocrfeeder')
        for key, value in list(ConfigurationManager.conf.items()):
            new_node = doc.createElement(key)
            new_node.appendChild(doc.createTextNode(str(value)))
            root_node.appendChild(new_node)
        configuration = doc.toxml(encoding='utf-8').decode()
        configuration += '\n' + root_node.toxml(encoding='utf-8').decode()
        new_configuration_file = open(configuration_file, 'w')
        new_configuration_file.write(configuration)
        new_configuration_file.close()

    def removeTemporaryFolder(self):
        try:
            shutil.rmtree(self.TEMPORARY_FOLDER)
        except:
            debug('Error when removing the temporary folder: ' + \
                  self.TEMPORARY_FOLDER)

    def migrateOldConfigFolder(self):
        old_config_folder = os.path.expanduser('~/.ocrfeeder')
        if os.path.exists(old_config_folder) and \
           not os.path.exists(self.user_configuration_folder):
            shutil.copytree(old_config_folder, self.user_configuration_folder)
            debug('Migrated old configuration directory "%s" to the '
                  'new one: "%s"' %
                  (old_config_folder, self.user_configuration_folder))

    text_fill = property(getTextFill, setTextFill)
    image_fill = property(getImageFill, setImageFill)
    boxes_stroke = property(getBoxesStroke, setBoxesStroke)
    favorite_engine = property(getFavoriteEngine, setFavoriteEngine)
    window_size = property(getWindowSize, setWindowSize)
    unpaper = property(getUnpaper, setUnpaper)

    unpaper_use_black_filter = property(getUseBlackFilter, setUseBlackFilter)

    unpaper_gray_filter_size = property(getGrayFilterSize, setGrayFilterSize)

    unpaper_noise_filter_intensity = property(getNoiseFilterIntensity,
                                              setNoiseFilterIntensity)

    unpaper_images_after_addition = property(getUnpaperImagesAfterAddition,
                                             setUnpaperImagesAfterAddition)

    unpaper_extra_options = property(getUnpaperExtraOptions,
                                     setUnpaperExtraOptions)

    improve_column_detection = property(getImproveColumnDetection,
                                        setImproveColumnDetection)
    column_min_width = property(getColumnMinWidth, setColumnMinWidth)

    clean_text = property(getCleanText, setCleanText)

    adjust_boxes_bounds = property(getAdjustBoxesBounds, setAdjustBoxesBounds)

    bounds_adjustment_size = property(getBoundsAdjustmentSize,
                                      setBoundsAdjustmentSize)

    deskew_images_after_addition = property(getDeskewImagesAfterAddition,
                                            setDeskewImagesAfterAddition)

    language = property(getLanguage, setLanguage)
Esempio n. 4
0
class ConfigurationManager(object):

    TEXT_FILL = 'text_fill'
    IMAGE_FILL = 'image_fill'
    BOXES_STROKE = 'boxes_stroke'
    WINDOW_SIZE = 'window_size'
    UNPAPER = 'unpaper'
    UNPAPER_USE_BLACK_FILTER = 'unpaper_use_black_filter'
    UNPAPER_NOISE_FILTER_INTENSITY = 'unpaper_noise_filter_intensity'
    UNPAPER_GRAY_FILTER_SIZE = 'unpaper_gray_filter_size'
    UNPAPER_EXTRA_OPTIONS = 'unpaper_extra_options'
    UNPAPER_IMAGES_AFTER_ADDITION = 'unpaper_images_after_addition'
    FAVORITE_ENGINE = 'favorite_engine'
    IMPROVE_COLUMN_DETECTION = 'improve_column_detection'
    COLUMN_MIN_WIDTH = 'column_min_width'
    CLEAN_TEXT = 'clean_text'
    ADJUST_BOXES_BOUNDS = 'adjust_boxes_bounds'
    BOUNDS_ADJUSTMENT_SIZE = 'bounds_adjustment_size'
    DESKEW_IMAGES_AFTER_ADDITION = 'deskew_images_after_addition'
    LANGUAGE = 'language'

    TEMPORARY_FOLDER = tempfile.mkdtemp(prefix = OCRFEEDER_COMPACT_NAME + '_')

    DEFAULT_LOCALE = locale.getdefaultlocale()[0]

    DEFAULTS = {TEXT_FILL: (94, 156, 235, 150),
                BOXES_STROKE: (94, 156, 235, 250),
                IMAGE_FILL: (0, 183, 0, 150),
                WINDOW_SIZE: 'auto',
                UNPAPER: getExecPath('unpaper') or '',
                UNPAPER_USE_BLACK_FILTER: True,
                UNPAPER_NOISE_FILTER_INTENSITY: 'auto',
                UNPAPER_GRAY_FILTER_SIZE: 'auto',
                FAVORITE_ENGINE: 'ocrad',
                IMPROVE_COLUMN_DETECTION: True,
                COLUMN_MIN_WIDTH: 'auto',
                CLEAN_TEXT: True,
                ADJUST_BOXES_BOUNDS: True,
                BOUNDS_ADJUSTMENT_SIZE: 'auto',
                DESKEW_IMAGES_AFTER_ADDITION: False,
                UNPAPER_IMAGES_AFTER_ADDITION: False,
                UNPAPER_EXTRA_OPTIONS: '',
                LANGUAGE: DEFAULT_LOCALE.split('_')[0] if DEFAULT_LOCALE else '',
                }

    conf = dict(DEFAULTS)

    def __init__(self):
        self.user_configuration_folder = USER_CONFIG_DIR
        self.migrateOldConfigFolder()
        self.user_engines_folder = os.path.join(self.user_configuration_folder, 'engines')
        self.makeUserConfigurationFolder()
        self.has_unpaper = self.getDefault(self.UNPAPER)

    def makeUserConfigurationFolder(self):
        if not os.path.exists(self.user_engines_folder):
            os.makedirs(self.user_engines_folder)
        if [file_name for file_name in os.listdir(self.user_engines_folder)\
            if file_name.endswith('.xml')]:
            return
        for engine in self.getEnginesInSystem():
            engine_file = os.path.join(self.user_engines_folder, engine.name)
            engine.saveToXml('%s.xml' % engine_file)

    def getEnginesInSystem(self):
        existing_engines = []
        engines_paths = [(name, getExecPath(conf['engine_path']))\
                         for name, conf in PREDEFINED_ENGINES.items()]
        for name, path in engines_paths:
            if not path:
                continue
            engine_name = PREDEFINED_ENGINES[name].get('name', None)
            arguments = PREDEFINED_ENGINES[name].get('arguments', None)
            if not arguments or not engine_name:
                continue
            image_format = PREDEFINED_ENGINES[name].get('image_format', 'PPM')
            failure_string = PREDEFINED_ENGINES[name].get('failure_string', '')
            version = PREDEFINED_ENGINES[name].get('version', 0.0)
            language_argument = PREDEFINED_ENGINES[name].get('language_argument', '')
            languages = PREDEFINED_ENGINES[name].get('languages', '')
            favorite_language = PREDEFINED_ENGINES[name].get('favorite_language', '')
            engine = Engine(engine_name,
                            path,
                            arguments,
                            temporary_folder = self.TEMPORARY_FOLDER,
                            image_format = image_format,
                            failure_string = failure_string,
                            languages = languages,
                            language_argument = language_argument,
                            version = version)
            existing_engines.append(engine)
        return existing_engines

    def setConf(self, conf_key, value):
        ConfigurationManager.conf[conf_key] = value

    def getConf(self, conf_key):
        return ConfigurationManager.conf[conf_key]

    def setFavoriteEngine(self, engine_name):
        self.setConf(self.FAVORITE_ENGINE, engine_name)

    def getFavoriteEngine(self):
        return self.getConf(self.FAVORITE_ENGINE)

    def __getColorFromString(self, color):
        if type(color) != str:
            return color
        color_list = [value.strip('()\ ') for value in color.split(',')]
        try:
            int_color_list = [int(value) for value in color_list]
        except ValueError, exception:
            return None
        return tuple(int_color_list)