Esempio n. 1
0
def getTextSizeFromImage(image):
    width, height = image.size
    colors = image.getcolors(width * height)
    background_color = 255
    if colors:
        colors.sort()
        background_color = (list(colors[-1][1])).pop()
    text_sizes = [0]
    for i in xrange(getTextBeginHeight(image), height):
        current_line = image.crop((0, i - 1, width, i))
        current_line.convert('L')
        blank_line = True
        for i in range(0, current_line.size[0], 3):
            color = current_line.getpixel((i, 0))
            if colorsContrast(list(color).pop(), background_color):
                blank_line = False
                break
        if blank_line:
            if text_sizes[-1]:
                text_sizes.append(0)
        else:
            text_sizes[-1] += 1
    text_sizes.sort()
    text_sizes = [i for i in text_sizes if i != 0]
    text_size = 0
    if text_sizes:
        text_sizes_avg = sum(text_sizes) / len(text_sizes)
        for i in text_sizes:
            if i > text_sizes_avg:
                text_size = math.floor(i)
                break
        text_size = max(text_sizes)
    debug('Text Size: ', text_size)
    return text_size
Esempio n. 2
0
def getTextSizeFromImage(image):
    width, height = image.size
    colors = image.getcolors(width * height)
    background_color = 255
    if colors:
        colors.sort()
        background_color = (list(colors[-1][1])).pop()
    text_sizes = [0]
    for i in xrange(getTextBeginHeight(image), height):
        current_line = image.crop((0, i - 1, width, i))
        current_line.convert('L')
        blank_line = True
        for i in range(0, current_line.size[0], 3):
            color = current_line.getpixel((i, 0))
            if colorsContrast(list(color).pop(), background_color):
                blank_line = False
                break
        if blank_line:
            if text_sizes[-1]:
                text_sizes.append(0)
        else:
            text_sizes[-1] += 1
    text_sizes.sort()
    text_sizes = [i for i in text_sizes if i != 0]
    text_size = 0
    if text_sizes:
        text_sizes_avg = sum(text_sizes) / len(text_sizes)
        for i in text_sizes:
            if i > text_sizes_avg:
                text_size = math.floor(i)
                break
        text_size = max(text_sizes)
    debug('Text Size: ', text_size)
    return text_size
Esempio n. 3
0
 def getAllBlocks(self):
     blocks = self.extendBlocksByBelongingSingles()
     blocks = self.unifyBlocks(blocks)
     if OCRFEEDER_DEBUG:
         for block in blocks:
             debug(block)
     return blocks
 def addText(self, data_box):
     text = data_box.getText()
     frame_style = Style(name='FrameStyle', family='graphic')
     debug('Angle: ', data_box.text_data.angle)
     angle = data_box.text_data.angle
     if angle:
         frame_style = Style(name='FrameStyleRotated', family='graphic')
     x, y, width, height = data_box.getBoundsPrintSize(
         self.current_page_resolution)
     frame = Frame(stylename=frame_style,
                   width=str(width) + 'in',
                   height=str(height) + 'in',
                   x=str(x) + 'in',
                   y=str(y) + 'in',
                   anchortype='paragraph')
     if angle:
         frame.addAttribute(
             'transform', 'rotate (%s) translate (%scm %scm)' %
             (abs(math.radians(angle)), x, y))
     self.current_page.addElement(frame)
     textbox = TextBox()
     frame.addElement(textbox)
     for line in text.split('\n'):
         textbox.addElement(
             P(stylename=self.__handleFrameStyle(data_box.text_data),
               text=line))
Esempio n. 5
0
 def getAllBlocks(self):
     blocks = self.extendBlocksByBelongingSingles()
     blocks = self.unifyBlocks(blocks)
     if OCRFEEDER_DEBUG:
         for block in blocks:
             debug(block)
     return blocks
 def save(self):
     name = self.name
     if not name.lower().endswith(".odt"):
         name += ".odt"
     self.document.save(name)
     for image in self.temp_images:
         try:
             os.unlink(image)
         except:
             debug("Error removing image: %s" % image)
 def save(self):
     name = self.name
     if not name.lower().endswith('.odt'):
         name += '.odt'
     self.document.save(name)
     for image in self.temp_images:
         try:
             os.unlink(image)
         except:
             debug('Error removing image: %s' % image)
Esempio n. 8
0
 def __init__(self, path_to_image, window_size=None):
     self.window_size = window_size
     if os.path.isfile(path_to_image):
         try:
             self.original_image = Image.open(path_to_image)
             self.black_n_white_image = self.original_image.convert('L')
             if not self.window_size:
                 self.window_size = self.original_image.size[1] / 60.
             debug('Window Size: ', self.window_size)
         except:
             raise ImageManipulationError
     else:
         raise IOError
Esempio n. 9
0
 def choosePageSize(self):
     current_reviewer = self.__getCurrentReviewer()
     current_page = current_reviewer.page
     page_size_dialog = PageSizeDialog((current_page.width, current_page.height))
     response = page_size_dialog.run()
     if response == gtk.RESPONSE_ACCEPT:
         size = page_size_dialog.getSize()
         if page_size_dialog.all_pages_radio.get_active():
             for key, reviewer in self.image_reviewer_dict.items():
                 reviewer.page.setSize(size)
         else:
             current_reviewer.page.setSize(size)
         debug('Page size: ', size)
     page_size_dialog.destroy()
Esempio n. 10
0
 def choosePageSize(self):
     current_reviewer = self.__getCurrentReviewer()
     current_page = current_reviewer.page
     page_size_dialog = PageSizeDialog(
         (current_page.width, current_page.height))
     response = page_size_dialog.run()
     if response == gtk.RESPONSE_ACCEPT:
         size = page_size_dialog.getSize()
         if page_size_dialog.all_pages_radio.get_active():
             for key, reviewer in self.image_reviewer_dict.items():
                 reviewer.page.setSize(size)
         else:
             current_reviewer.page.setSize(size)
         debug('Page size: ', size)
     page_size_dialog.destroy()
Esempio n. 11
0
 def loadConfiguration(self, folder = None):
     folder = folder or self.configuration_dir
     project_xml = os.path.join(folder, 'project.xml')
     if not project_xml:
         return None
     document = minidom.parse(project_xml)
     root_node = document.documentElement
     images_node = document.getElementsByTagName('image')
     images = self.__getImagesInfo(images_node)
     page_data_nodes = document.getElementsByTagName('PageData')
     pages = []
     for page_data in self.__getPageDataInfo(page_data_nodes):
         debug('Page Data:', page_data)
         data_boxes = []
         for data_box in page_data['data_boxes']:
             args = []
             # text variable is to avoid problems with
             # escaping characters 
             text = ''
             for var_name, value in data_box.items():
                 if var_name == 'text':
                     text = value
                     continue
                 real_value = '"""%s"""' % re.escape(value)
                 try:
                     real_value = int(value)
                 except ValueError:
                     pass
                 args.append('%s = %s' % (var_name, real_value))
             exec('box = DataBox(%s)' % ', '.join(args))
             box.text = text
             data_boxes.append(box)
         image_path = page_data['image_path']
         if not os.path.exists(image_path):
             image_path = os.path.join(self.configuration_dir, 'images', images[image_path])
         page = PageData(image_path, data_boxes)
         pages.append(page)
     return pages
Esempio n. 12
0
 def loadConfiguration(self, folder=None):
     folder = folder or self.configuration_dir
     project_xml = os.path.join(folder, "project.xml")
     if not project_xml:
         return None
     document = minidom.parse(project_xml)
     root_node = document.documentElement
     images_node = document.getElementsByTagName("image")
     images = self.__getImagesInfo(images_node)
     page_data_nodes = document.getElementsByTagName("PageData")
     pages = []
     for page_data in self.__getPageDataInfo(page_data_nodes):
         debug("Page Data:", page_data)
         data_boxes = []
         for data_box in page_data["data_boxes"]:
             args = []
             # text variable is to avoid problems with
             # escaping characters
             text = ""
             for var_name, value in data_box.items():
                 if var_name == "text":
                     text = value
                     continue
                 real_value = '"""%s"""' % re.escape(value)
                 try:
                     real_value = int(value)
                 except ValueError:
                     pass
                 args.append("%s = %s" % (var_name, real_value))
             exec("box = DataBox(%s)" % ", ".join(args))
             box.text = text
             data_boxes.append(box)
         image_path = page_data["image_path"]
         if not os.path.exists(image_path):
             image_path = os.path.join(self.configuration_dir, "images", images[image_path])
         page = PageData(image_path, data_boxes)
         pages.append(page)
     return pages
Esempio n. 13
0
 def addText(self, data_box):
     text = data_box.getText()
     frame_style = Style(name="FrameStyle", family="graphic")
     debug("Angle: ", data_box.text_data.angle)
     angle = data_box.text_data.angle
     if angle:
         frame_style = Style(name="FrameStyleRotated", family="graphic")
     x, y, width, height = data_box.getBoundsPrintSize(self.current_page_resolution)
     frame = Frame(
         stylename=frame_style,
         width=str(width) + "in",
         height=str(height) + "in",
         x=str(x) + "in",
         y=str(y) + "in",
         anchortype="paragraph",
     )
     if angle:
         frame.addAttribute("transform", "rotate (%s) translate (%scm %scm)" % (abs(math.radians(angle)), x, y))
     self.current_page.addElement(frame)
     textbox = TextBox()
     frame.addElement(textbox)
     for line in text.split("\n"):
         textbox.addElement(P(stylename=self.__handleFrameStyle(data_box.text_data), text=line))
Esempio n. 14
0
def OLDgetTextSizeFromImage(image):
    width, height = image.size
    global count
    debug('Image #%s' % count)
    image.save('/tmp/img%s.png' % count, 'PNG')
    count += 1
    text_sizes = [0]
    for i in xrange(getTextBeginHeight(image), height):
        current_line = image.crop((0, i - 1, width, i))
        current_line.convert('L')
        blank_line = True
        colors = current_line.getcolors()
        background_color = 255
        if colors:
            colors.sort()
            background_color = (list(colors[-1][1])).pop()
            for color in colors:
                if colorsContrast(list(color[1]).pop(), background_color):
                    blank_line = False
                    break
            if blank_line:
                if text_sizes[-1]:
                    text_sizes.append(0)
            else:
                text_sizes[-1] += 1
    text_sizes.sort()
    text_sizes = [i for i in text_sizes if i != 0]
    text_size = 0
    if text_sizes:
        text_sizes_avg = sum(text_sizes) / len(text_sizes)
        for i in text_sizes:
            if i > text_sizes_avg:
                text_size = math.floor(i)
                break
        text_size = max(text_sizes)
    debug('Text Size: ', text_size)
    return text_size
Esempio n. 15
0
def OLDgetTextSizeFromImage(image):
    width, height = image.size
    global count
    debug('Image #%s' % count)
    image.save('/tmp/img%s.png' % count , 'PNG')
    count += 1
    text_sizes = [0]
    for i in xrange(getTextBeginHeight(image), height):
        current_line = image.crop((0, i - 1, width, i))
        current_line.convert('L')
        blank_line = True
        colors = current_line.getcolors()
        background_color = 255
        if colors:
            colors.sort()
            background_color = (list(colors[-1][1])).pop()
            for color in colors:
                if colorsContrast(list(color[1]).pop(), background_color):
                    blank_line = False
                    break
            if blank_line:
                if text_sizes[-1]:
                    text_sizes.append(0)
            else:
                text_sizes[-1] += 1
    text_sizes.sort()
    text_sizes = [i for i in text_sizes if i != 0]
    text_size = 0
    if text_sizes:
        text_sizes_avg = sum(text_sizes) / len(text_sizes)
        for i in text_sizes:
            if i > text_sizes_avg:
                text_size = math.floor(i)
                break
        text_size = max(text_sizes)
    debug('Text Size: ', text_size)
    return text_size
Esempio n. 16
0
 def performOcr(self, engine_name = None):
     selected_engine_index = self.box_editor.getSelectedOcrEngine()
     if engine_name:
         for i in xrange(len(self.ocr_engines)):
             if self.ocr_engines[i][0].name == engine_name:
                 selected_engine_index = i
                 break
     self.box_editor.selectOcrEngine(selected_engine_index)
     image = graphics.convertPixbufToImage(self.box_editor.getImage())
     angle = self.box_editor.getAngle()
     if angle:
         image = graphics.getImageRotated(image, angle)
     if selected_engine_index != None:
         engine = self.ocr_engines[selected_engine_index][0]
         engine.setImage(image)
         text = engine.read()
         self.box_editor.setText(text)
         debug('Finished reading')
     text_size = graphics.getTextSizeFromImage(image)
     if text_size:
         y_resolution = float(self.reviewer.page.resolution[1])
         text_size /= y_resolution
         text_size *= 72.0
         self.box_editor.setFontSize(math.floor(text_size))
Esempio n. 17
0
 def performOcr(self, engine_name=None):
     selected_engine_index = self.box_editor.getSelectedOcrEngine()
     if engine_name:
         for i in xrange(len(self.ocr_engines)):
             if self.ocr_engines[i][0].name == engine_name:
                 selected_engine_index = i
                 break
     self.box_editor.selectOcrEngine(selected_engine_index)
     image = graphics.convertPixbufToImage(self.box_editor.getImage())
     angle = self.box_editor.getAngle()
     if angle:
         image = graphics.getImageRotated(image, angle)
     if selected_engine_index != None:
         engine = self.ocr_engines[selected_engine_index][0]
         engine.setImage(image)
         text = engine.read()
         self.box_editor.setText(text)
         debug('Finished reading')
     text_size = graphics.getTextSizeFromImage(image)
     if text_size:
         y_resolution = float(self.reviewer.page.resolution[1])
         text_size /= y_resolution
         text_size *= 72.0
         self.box_editor.setFontSize(math.floor(text_size))
Esempio n. 18
0
 def __pressedAngleDetectionButton(self, widget):
     image = graphics.convertPixbufToImage(self.box_editor.getImage())
     angle = graphics.getHorizontalAngleForText(image)
     debug('ANGLE: ', angle)
     self.box_editor.setAngle(angle)
Esempio n. 19
0
 def __pressedAngleDetectionButton(self, widget):
     image = graphics.convertPixbufToImage(self.box_editor.getImage())
     angle = graphics.getHorizontalAngleForText(image)
     debug('ANGLE: ', angle)
     self.box_editor.setAngle(angle)