Example #1
0
 def addImages(self, image_path_list):
     item_list = []
     item_list_length = len(image_path_list)
     if not self.configuration_manager.deskew_images_after_addition and \
        not self.configuration_manager.unpaper_images_after_addition:
         for index in range(0, len(image_path_list)):
             if not self.__addImage(image_path_list[index], index == 0):
                 debug('Failed to load image "%s"' % image_path_list[index])
         return
     dialog = QueuedEventsProgressDialog(self.main_window.window)
     for index in range(0, item_list_length):
         image_path = image_path_list[index]
         item = AsyncItem(
             self.__imagePreProcessing, (image_path, ),
             self.__imagePreProcessingFinishedCb,
             (dialog, index == item_list_length - 1, index == 0))
         if item_list_length == 1:
             item_info = (_('Preparing image'), _(u'Please wait…'))
         else:
             item_info = (_('Preparing image %(current_index)s/%(total)s') % \
                          {'current_index': index + 1,
                           'total': item_list_length},
                          _(u'Please wait…'))
         item_list.append((item_info, item))
     dialog.setItemsList(item_list)
     dialog.run()
 def makeEnginesFromFolder(self, folder):
     self.ocr_engines = []
     favorite_engine_exists = False
     for xml_file in self.getXmlFilesInFolder(folder):
         engine = self.getEngineFromXml(xml_file)
         if engine:
             self.ocr_engines.append((engine, xml_file))
             favorite_engine_exists = favorite_engine_exists or \
                 self.configuration_manager.favorite_engine == engine.name
     if not len(self.ocr_engines):
         lib.debug("Warning: no engines found!")
     elif not favorite_engine_exists:
         self.configuration_manager.favorite_engine = self.ocr_engines[0][
             0].name
     engines_needing_update = {'auto': [], 'manual': []}
     for engine, path in self.ocr_engines:
         path = engine.engine_path
         default_conf = \
             self.configuration_manager.getEngineDefaultConfiguration(path)
         if default_conf is None:
             continue
         if float(engine.version) < float(default_conf['version']):
             update_type = 'manual'
             for arguments in default_conf['old_arguments']:
                 if engine.arguments == arguments:
                     update_type = 'auto'
                     break
             engines_needing_update[update_type].append({
                 'engine':
                 engine,
                 'configuration':
                 default_conf
             })
     return engines_needing_update
Example #3
0
 def getAllBlocks(self):
     blocks = self.extendBlocksByBelongingSingles()
     blocks = self.unifyBlocks(blocks)
     if OCRFEEDER_DEBUG:
         for block in blocks:
             debug(block)
     return blocks
Example #4
0
 def performOcrForDataBox(self, data_box, engine):
     if engine.hasLanguages():
         engine.setLanguage(data_box.getLanguage())
     pixbuf_width = self.image_pixbuf.get_width()
     pixbuf_height = self.image_pixbuf.get_height()
     new_pixbuf_width = min(data_box.getWidth(), pixbuf_width)
     new_pixbuf_height = min(data_box.getHeight(), pixbuf_height)
     subpixbuf = self.image_pixbuf.new_subpixbuf(data_box.getX(),
                                                 data_box.getY(),
                                                 new_pixbuf_width,
                                                 new_pixbuf_height)
     subpixbuf.x = data_box.getX()
     subpixbuf.y = data_box.getY()
     subpixbuf.width = pixbuf_width
     image = graphics.convertPixbufToImage(subpixbuf)
     layout_analysis = LayoutAnalysis(engine,
                                      clean_text = self.configuration_manager.clean_text)
     text = layout_analysis.readImage(image)
     data_box.setText(text)
     self.main_window.copy_to_clipboard_menu.set_sensitive(True)
     self.main_window.spellchecker_menu.set_sensitive(True)
     debug('Finished reading')
     text_size = layout_analysis.getTextSizeFromImage(image,
                                                      self.page.resolution[1])
     if text_size:
         data_box.setFontSize(text_size)
Example #5
0
 def makeEnginesFromFolder(self, folder):
     self.ocr_engines = []
     favorite_engine_exists = False
     for xml_file in self.getXmlFilesInFolder(folder):
         engine = self.getEngineFromXml(xml_file)
         if engine:
             self.ocr_engines.append((engine, xml_file))
             favorite_engine_exists = favorite_engine_exists or \
                 self.configuration_manager.favorite_engine == engine.name
     if not len(self.ocr_engines):
         lib.debug("Warning: no engines found!")
     elif not favorite_engine_exists:
         self.configuration_manager.favorite_engine = self.ocr_engines[0][0].name
     engines_needing_update = {'auto': [],
                               'manual': []}
     for engine, path in self.ocr_engines:
         path = engine.engine_path
         default_conf = \
             self.configuration_manager.getEngineDefaultConfiguration(path)
         if default_conf is None:
             continue
         if float(engine.version) < float(default_conf['version']):
             update_type = 'manual'
             for arguments in default_conf['old_arguments']:
                 if engine.arguments == arguments:
                     update_type = 'auto'
                     break
             engines_needing_update[update_type].append({'engine': engine,
                                            'configuration': default_conf})
     return engines_needing_update
Example #6
0
 def addImages(self, image_path_list):
     item_list = []
     temp_dir = self.configuration_manager.TEMPORARY_FOLDER
     image_path_list = graphics.convertMultiImagesInList(image_path_list,
                                                         temp_dir)
     item_list_length = len(image_path_list)
     if not self.configuration_manager.deskew_images_after_addition and \
        not self.configuration_manager.unpaper_images_after_addition:
         for index in range(0, len(image_path_list)):
             if not self.__addImage(image_path_list[index], index == 0):
                 debug('Failed to load image "%s"' % image_path_list[index])
         return
     dialog = QueuedEventsProgressDialog(self.main_window)
     for index in range(0, item_list_length):
         image_path = image_path_list[index]
         item = AsyncItem(self.__imagePreProcessing,
                          (image_path,),
                          self.__imagePreProcessingFinishedCb,
                          (dialog,
                           index == item_list_length - 1,
                           index == 0))
         if item_list_length == 1:
             item_info = (_('Preparing image'), _(u'Please wait…'))
         else:
             item_info = (_('Preparing image %(current_index)s/%(total)s') % \
                          {'current_index': index + 1,
                           'total': item_list_length},
                          _(u'Please wait…'))
         item_list.append((item_info,item))
     dialog.setItemsList(item_list)
     dialog.run()
Example #7
0
 def getAllBlocks(self):
     blocks = self.extendBlocksByBelongingSingles()
     blocks = self.unifyBlocks(blocks)
     if OCRFEEDER_DEBUG:
         for block in blocks:
             debug(block)
     return blocks
Example #8
0
 def updateBackgroundImage(self, image_path):
     self.path_to_image = self.page.image_path = image_path
     if not os.path.exists(self.path_to_image):
         return
     try:
         self.image_pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.path_to_image)
     except Exception, exception:
         debug(exception.message)
         return
 def save(self):
     name = self.name
     if not name.lower().endswith('.odt'):
         name += '.odt'
     self.document.save(name)
     for image in self.temp_images:
         try:
             os.unlink(image)
         except:
             debug('Error removing image: %s' % image)
Example #10
0
 def save(self):
     name = self.name
     if not name.lower().endswith('.odt'):
         name += '.odt'
     self.document.save(name)
     for image in self.temp_images:
         try:
             os.unlink(image)
         except:
             debug('Error removing image: %s' % image)
Example #11
0
 def updateBackgroundImage(self, image_path):
     self.path_to_image = self.page.image_path = image_path
     if not os.path.exists(self.path_to_image):
         return
     try:
         self.image_pixbuf = gtk.gdk.pixbuf_new_from_file(
             self.path_to_image)
     except Exception, exception:
         debug(exception.message)
         return
Example #12
0
 def makeEnginesFromFolder(self, folder):
     self.ocr_engines = []
     favorite_engine_exists = False
     for xml_file in self.getXmlFilesInFolder(folder):
         try:
             engine = self.getEngineFromXml(xml_file)
             self.ocr_engines.append((engine, xml_file))
         except WrongSettingsForEngine, we:
             lib.debug("Cannot load engine at %s: %s" %( xml_file, str(we)))
         else:
             favorite_engine_exists = favorite_engine_exists or \
                 self.configuration_manager.favorite_engine == engine.name
Example #13
0
 def makeEnginesFromFolder(self, folder):
     self.ocr_engines = []
     favorite_engine_exists = False
     for xml_file in self.getXmlFilesInFolder(folder):
         try:
             engine = self.getEngineFromXml(xml_file)
             self.ocr_engines.append((engine, xml_file))
         except WrongSettingsForEngine, we:
             lib.debug("Cannot load engine at %s: %s" % (xml_file, str(we)))
         else:
             favorite_engine_exists = favorite_engine_exists or \
                 self.configuration_manager.favorite_engine == engine.name
Example #14
0
    def getEngineFromXml(self, xml_file_name):
        document = ET.parse(xml_file_name)
        root_node = document.getroot()
        arguments = {}
        for child in root_node.getchildren():
            arg_name = child.tag
            arg_value = child.text
            arguments[arg_name] = arg_value

        try:
            engine = Engine(**arguments)
        except TypeError, exception:
            lib.debug('Error when unserializing engine: %s', exception.message)
            engine = None
    def getEngineFromXml(self, xml_file_name):
        document = ET.parse(xml_file_name)
        root_node = document.getroot()
        arguments = {}
        for child in root_node.getchildren():
            arg_name = child.tag
            arg_value = child.text
            arguments[arg_name] = arg_value

        try:
            engine = Engine(**arguments)
        except TypeError, exception:
            lib.debug('Error when unserializing engine: %s', exception.message)
            engine = None
Example #16
0
 def choosePageSize(self):
     current_reviewer = self.__getCurrentReviewer()
     current_page = current_reviewer.page
     page_size_dialog = PageSizeDialog((current_page.width, current_page.height))
     response = page_size_dialog.run()
     if response == gtk.RESPONSE_ACCEPT:
         size = page_size_dialog.getSize()
         if page_size_dialog.all_pages_radio.get_active():
             for page in self.source_images_selector_widget.getAllPages():
                 page.setSize(size)
         else:
             current_reviewer.page.setSize(size)
         debug('Page size: ', size)
     page_size_dialog.destroy()
     self.__updateStatusBar(current_reviewer)
Example #17
0
 def choosePageSize(self):
     current_reviewer = self.__getCurrentReviewer()
     current_page = current_reviewer.page
     page_size_dialog = PageSizeDialog(
         (current_page.width, current_page.height))
     response = page_size_dialog.run()
     if response == gtk.RESPONSE_ACCEPT:
         size = page_size_dialog.getSize()
         if page_size_dialog.all_pages_radio.get_active():
             for page in self.source_images_selector_widget.getAllPages():
                 page.setSize(size)
         else:
             current_reviewer.page.setSize(size)
         debug('Page size: ', size)
     page_size_dialog.destroy()
     self.__updateStatusBar(current_reviewer)
Example #18
0
 def addText(self, data_box):
     text = data_box.getText()
     frame_style = Style(name='FrameStyle', family = 'graphic')
     debug('Angle: ', data_box.text_data.angle)
     angle = data_box.text_data.angle
     if angle:
         frame_style = Style(name='FrameStyleRotated', family = 'graphic')
     x, y, width, height = data_box.getBoundsPrintSize(self.current_page_resolution)
     frame = Frame(stylename = frame_style, width = str(width) + 'in', height = str(height) + 'in', x = str(x) + 'in', y = str(y) + 'in', anchortype = 'paragraph')
     if angle:
         frame.addAttribute('transform', 'rotate (%s) translate (%scm %scm)' % (abs(math.radians(angle)), x, y))
     self.current_page.addElement(frame)
     textbox = TextBox()
     frame.addElement(textbox)
     for line in text.split('\n'):
         textbox.addElement(P(stylename = self.__handleFrameStyle(data_box.text_data), text = line))
Example #19
0
class OcrEnginesManager:
    def __init__(self, configuration_manager):
        self.ocr_engines = []
        self.configuration_manager = configuration_manager

    def getEnginesNames(self):
        return [engine.name for engine, path in self.ocr_engines]

    def getEnginePath(self, engine):
        for eng, path in self.ocr_engines:
            if eng == engine:
                return path
        return None

    def replaceEngine(self, engine, new_engine):
        for i in xrange(len(self.ocr_engines)):
            eng, path = self.ocr_engines[i]
            if eng == engine:
                new_path = self.engineToXml(new_engine, path)
                self.ocr_engines[i] = new_engine, path
                return True
        return False

    def makeEnginesFromFolder(self, folder):
        self.ocr_engines = []
        favorite_engine_exists = False
        for xml_file in self.getXmlFilesInFolder(folder):
            try:
                engine = self.getEngineFromXml(xml_file)
                self.ocr_engines.append((engine, xml_file))
            except WrongSettingsForEngine, we:
                lib.debug("Cannot load engine at %s: %s" % (xml_file, str(we)))
            else:
                favorite_engine_exists = favorite_engine_exists or \
                    self.configuration_manager.favorite_engine == engine.name
        if not len(self.ocr_engines):
            lib.debug("Warning: no engines found!")
        elif not favorite_engine_exists:
            self.configuration_manager.favorite_engine = self.ocr_engines[0][
                0].name
        engines_needing_update = {'auto': [], 'manual': []}
        for engine, path in self.ocr_engines:
            path = engine.engine_path
            default_conf = \
                self.configuration_manager.getEngineDefaultConfiguration(path)
            if default_conf is None:
                continue
            if float(engine.version) < float(default_conf['version']):
                update_type = 'manual'
                for arguments in default_conf['old_arguments']:
                    if engine.arguments == arguments:
                        update_type = 'auto'
                        break
                engines_needing_update[update_type].append({
                    'engine':
                    engine,
                    'configuration':
                    default_conf
                })
        return engines_needing_update
Example #20
0
 def addText(self, data_box):
     text = data_box.getText()
     frame_style = Style(name='FrameStyle', family = 'graphic')
     debug('Angle: ', data_box.text_data.angle)
     angle = data_box.text_data.angle
     if angle:
         frame_style = Style(name='FrameStyleRotated', family = 'graphic')
     x, y, width, height = data_box.getBoundsPrintSize(self.current_page_resolution)
     frame = Frame(stylename = frame_style, width = str(width) + 'in', height = str(height) + 'in', x = str(x) + 'in', y = str(y) + 'in', anchortype = 'paragraph')
     if angle:
         frame.addAttribute('transform', 'rotate (%s) translate (%scm %scm)' % (abs(math.radians(angle)), x, y))
     self.current_page.addElement(frame)
     textbox = TextBox()
     frame.addElement(textbox)
     for line in text.split('\n'):
         textbox.addElement(P(stylename = self.__handleFrameStyle(data_box.text_data), text = line))
Example #21
0
 def choosePageSize(self):
     current_reviewer = self.__getCurrentReviewer()
     current_page = current_reviewer.page
     page_size_dialog = PageSizeDialog(self.main_window,
                                       (current_page.width, current_page.height))
     response = page_size_dialog.run()
     if response == Gtk.ResponseType.ACCEPT:
         size = page_size_dialog.getSize()
         if page_size_dialog.all_pages_radio.get_active():
             for page in self.pages_icon_view.getAllPages():
                 page.setSize(size)
         else:
             current_reviewer.page.setSize(size)
         debug('Page size: ', size)
     page_size_dialog.destroy()
     self.__updateStatusBar(current_reviewer)
Example #22
0
 def performOcrForDataBox(self, data_box, engine):
     pixbuf_width = self.image_pixbuf.get_width()
     pixbuf_height = self.image_pixbuf.get_height()
     subpixbuf = self.image_pixbuf.subpixbuf(
         data_box.getX(), data_box.getY(),
         min(data_box.getWidth(), pixbuf_width),
         min(data_box.getHeight(), pixbuf_height))
     image = graphics.convertPixbufToImage(subpixbuf)
     layout_analysis = LayoutAnalysis(
         engine, clean_text=self.configuration_manager.clean_text)
     text = layout_analysis.readImage(image)
     data_box.setText(text)
     self.main_window.copy_to_clipboard_menu.set_sensitive(True)
     self.main_window.spellchecker_menu.set_sensitive(True)
     debug('Finished reading')
     text_size = layout_analysis.getTextSizeFromImage(
         image, self.page.resolution[1])
     if text_size:
         data_box.setFontSize(text_size)
Example #23
0
 def addText(self, box):
     x, y, width, height = box.getBoundsPrintSize(self.page_data.resolution)
     text = self.canvas.beginText()
     # Make the text transparent if we are not
     # creating a PDF from scratch
     if not self._from_scratch:
         text.setTextRenderMode(3)
     text.setTextOrigin(x * units.inch,
                        (self.page_data.height - y) * units.inch)
     text.setCharSpace(box.text_data.letter_space)
     text.setLeading(box.text_data.line_space + box.text_data.size)
     text.moveCursor(0, box.text_data.size)
     try:
         self.setFont(box.text_data.face,
                      box.text_data.size)
     except:
         debug('Error setting font %s' % box.text_data.face)
         self.canvas.setFontSize(box.text_data.size)
     text.textLines(box.text)
     self.canvas.drawText(text)
Example #24
0
 def loadConfiguration(self, folder = None):
     folder = folder or self.configuration_dir
     project_xml = os.path.join(folder, 'project.xml')
     if not project_xml:
         return None
     document = minidom.parse(project_xml)
     root_node = document.documentElement
     images_node = document.getElementsByTagName('image')
     images = self.__getImagesInfo(images_node)
     page_data_nodes = document.getElementsByTagName('PageData')
     pages = []
     for page_data in self.__getPageDataInfo(page_data_nodes):
         debug('Page Data:', page_data)
         data_boxes = []
         for data_box in page_data['data_boxes']:
             args = []
             # text variable is to avoid problems with
             # escaping characters
             text = ''
             for var_name, value in data_box.items():
                 if var_name == 'text':
                     text = value
                     continue
                 real_value = '"""%s"""' % re.escape(value)
                 try:
                     real_value = int(value)
                 except ValueError:
                     pass
                 args.append('%s = %s' % (var_name, real_value))
             exec('box = DataBox(%s)' % ', '.join(args))
             box.text = text
             data_boxes.append(box)
         image_path = page_data['image_path']
         if not os.path.exists(image_path):
             image_path = os.path.join(self.configuration_dir, 'images', images[image_path])
         page = PageData(image_path, data_boxes)
         pages.append(page)
     return pages
Example #25
0
 def loadConfiguration(self, folder = None):
     folder = folder or self.configuration_dir
     project_xml = os.path.join(folder, 'project.xml')
     if not project_xml:
         return None
     document = minidom.parse(project_xml)
     root_node = document.documentElement
     images_node = document.getElementsByTagName('image')
     images = self.__getImagesInfo(images_node)
     page_data_nodes = document.getElementsByTagName('PageData')
     pages = []
     for page_data in self.__getPageDataInfo(page_data_nodes):
         debug('Page Data:', page_data)
         data_boxes = []
         for data_box in page_data['data_boxes']:
             args = []
             # text variable is to avoid problems with
             # escaping characters
             text = ''
             for var_name, value in data_box.items():
                 if var_name == 'text':
                     text = value
                     continue
                 real_value = '"""%s"""' % re.escape(value)
                 try:
                     real_value = int(value)
                 except ValueError:
                     pass
                 args.append('%s = %s' % (var_name, real_value))
             exec('box = DataBox(%s)' % ', '.join(args))
             box.text = text
             data_boxes.append(box)
         image_path = page_data['image_path']
         if not os.path.exists(image_path):
             image_path = os.path.join(self.configuration_dir, 'images', images[image_path])
         page = PageData(image_path, data_boxes)
         pages.append(page)
     return pages
 def __init__(self, path_to_image,
              window_size = None, contrast_tolerance = 120):
     self.window_size = window_size
     self.contrast_tolerance = contrast_tolerance
     error_message = _("A problem occurred while trying to open the image:\n %s\n"
                       "Ensure the image exists or try converting it to another format.") % path_to_image
     if os.path.isfile(path_to_image):
         try:
             self.original_image = Image.open(path_to_image)
             self.black_n_white_image = self.original_image.convert('L')
             if not self.window_size:
                 self.window_size = self.original_image.size[1] / 60.
             debug('Window Size: ', self.window_size)
         except:
             debug(sys.exc_info())
             raise ImageManipulationError(error_message)
     else:
         debug(sys.exc_info())
         raise ImageManipulationError(error_message)
     self.bg_color = 255
Example #27
0
 def __pressedAngleDetectionButton(self, widget):
     image = graphics.convertPixbufToImage(self.box_editor.getImage())
     angle = graphics.getHorizontalAngleForText(image)
     debug('ANGLE: ', angle)
     self.box_editor.setAngle(angle)
class OcrEnginesManager:
    def __init__(self, configuration_manager):
        self.ocr_engines = []
        self.configuration_manager = configuration_manager

    def getEnginesNames(self):
        return [engine.name for engine, path in self.ocr_engines]

    def getEnginePath(self, engine):
        for eng, path in self.ocr_engines:
            if eng == engine:
                return path
        return None

    def replaceEngine(self, engine, new_engine):
        for i in xrange(len(self.ocr_engines)):
            eng, path = self.ocr_engines[i]
            if eng == engine:
                new_path = self.engineToXml(new_engine, path)
                self.ocr_engines[i] = new_engine, path
                return True
        return False

    def makeEnginesFromFolder(self, folder):
        self.ocr_engines = []
        favorite_engine_exists = False
        for xml_file in self.getXmlFilesInFolder(folder):
            engine = self.getEngineFromXml(xml_file)
            if engine:
                self.ocr_engines.append((engine, xml_file))
                favorite_engine_exists = favorite_engine_exists or \
                    self.configuration_manager.favorite_engine == engine.name
        if not len(self.ocr_engines):
            lib.debug("Warning: no engines found!")
        elif not favorite_engine_exists:
            self.configuration_manager.favorite_engine = self.ocr_engines[0][
                0].name
        engines_needing_update = {'auto': [], 'manual': []}
        for engine, path in self.ocr_engines:
            path = engine.engine_path
            default_conf = \
                self.configuration_manager.getEngineDefaultConfiguration(path)
            if default_conf is None:
                continue
            if float(engine.version) < float(default_conf['version']):
                update_type = 'manual'
                for arguments in default_conf['old_arguments']:
                    if engine.arguments == arguments:
                        update_type = 'auto'
                        break
                engines_needing_update[update_type].append({
                    'engine':
                    engine,
                    'configuration':
                    default_conf
                })
        return engines_needing_update

    def migrateEngine(self, engine, configuration, only_version=False):
        if not only_version:
            engine.arguments = configuration['arguments']
            engine.language_argument = configuration['language_argument']
            engine.setLanguages(configuration['languages'])
        engine.version = configuration['version']
        self.replaceEngine(engine, engine)

    def getEngineFromXml(self, xml_file_name):
        document = ET.parse(xml_file_name)
        root_node = document.getroot()
        arguments = {}
        for child in root_node.getchildren():
            arg_name = child.tag
            arg_value = child.text
            arguments[arg_name] = arg_value

        try:
            engine = Engine(**arguments)
        except TypeError, exception:
            lib.debug('Error when unserializing engine: %s', exception.message)
            engine = None
        except WrongSettingsForEngine, we:
            lib.debug("Cannot load engine at %s: %s" %
                      (xml_file_name, str(we)))
            engine = None
Example #29
0
 def removeTemporaryFolder(self):
     try:
         shutil.rmtree(self.TEMPORARY_FOLDER)
     except:
         debug('Error when removing the temporary folder: ' + \
               self.TEMPORARY_FOLDER)
Example #30
0
 def __pressedAngleDetectionButton(self, widget):
     image = graphics.convertPixbufToImage(self.box_editor.getImage())
     angle = graphics.getHorizontalAngleForText(image)
     debug('ANGLE: ', angle)
     self.box_editor.setAngle(angle)
 def removeTemporaryFolder(self):
     try:
         shutil.rmtree(self.TEMPORARY_FOLDER)
     except:
         debug('Error when removing the temporary folder: ' + \
               self.TEMPORARY_FOLDER)