예제 #1
0
 def __init__(self, bbox):
     self.bbox = core.Rect(bbox)
     self.lines = []
     self.glyphs = []
     # stats
     self.avg_glyph_area = 0
     self.avg_glyph_height = 0
     self.avg_glyph_width = 0
     self.avg_line_height = 0
     self.agv_line_width = 0
예제 #2
0
 def display_lines(self, clear=1):
     """Display the lines found by placing a box around them
     in a display. If clear is true then any boxes already on
     the displayed are cleared first."""
     if self.image._display is None:
         self.image.display()
     display = self.image._display
     if clear:
         display.clear_all_boxes()
     for s in self.sections:
         for line in s.lines:
             b = line.bbox
             display.add_box(
                 core.Rect(core.Point(b.ul_x, b.ul_y),
                           core.Dim(b.ncols, b.nrows)))
예제 #3
0
    def display_sections(self, clear=1):
        """Display the sections found by placing a box around them
        in a display. If clear is true then any boxes already on
        the displayed are cleared first."""
        # display the sections
        result = self.sections

        if self.image._display is None:
            self.image.display()
        display = self.image._display
        if clear:
            display.clear_all_boxes()
        for rect in result:
            b = rect.bbox
            display.add_box(
                core.Rect(core.Point(b.ul_x, b.ul_y),
                          core.Dim(b.ncols, b.nrows)))
예제 #4
0
 def __init__(self, glyph):
     self.center = 0
     self.bbox = core.Rect(core.Point(glyph.ul_x, glyph.ul_y),
                           core.Dim(glyph.ncols, glyph.nrows))
     self.glyphs = []
     self.add_glyph(glyph)
예제 #5
0
 def calculate_bbox(self):
     assert (len(self.glyphs) > 0)
     self.bbox = core.Rect(self.glyphs[0])
     for glyph in self.glyphs:
         self.bbox.union(glyph)
예제 #6
0
    def find_sections(self):
        """Find the sections within an image - this finds large blocks
        of text making it possible to find the lines within complex
        text layouts."""

        glyphs = self.glyphs

        FUDGE = self.__avg_glyph_size(glyphs) * self.section_search_size

        # remove noise and large objects
        self.__noise_size = FUDGE
        self.__large_size = FUDGE * 20
        new_glyphs = []
        for g in glyphs:
            if self.__section_size_test(g):
                new_glyphs.append(g)
            else:
                if self.__fill:
                    g.fill_white()
        glyphs = new_glyphs

        # Sort the glyphs left-to-right and top-to-bottom
        glyphs.sort(lambda x, y: cmp(x.ul_x, y.ul_x))
        glyphs.sort(lambda x, y: cmp(x.ul_y, y.ul_y))

        # Create rectangles for each glyph that are bigger by FUDGE
        big_rects = []
        for g in glyphs:
            ul_y = max(0, g.ul_y - FUDGE)
            ul_x = max(0, g.ul_x - FUDGE)
            lr_y = min(self.image.lr_y, g.lr_y + FUDGE)
            lr_x = min(self.image.lr_x, g.lr_x + FUDGE)
            ul_x = int(ul_x)
            ul_y = int(ul_y)
            nrows = int(lr_y - ul_y + 1)
            ncols = int(lr_x - ul_x + 1)
            big_rects.append(
                core.Rect(core.Point(ul_x, ul_y), core.Dim(ncols, nrows)))

        # Search for intersecting glyphs and merge them. This is
        # harder than it seems at first because we want everything
        # to merge together that intersects regardless of the order
        # in the list. It ends up being similar to connected-component
        # labeling. This is prone to be kind-of slow.
        current = 0
        rects = big_rects
        while (1):
            # Find the indexexes of any rects that interesect with current
            inter = self.__find_intersecting_rects(rects, current)
            # If we found intersecting rectangles merge them with them current
            # rect, remove them from the list, and start the whole process
            # over. We start over to make certain that everything that should
            # be merged is.
            if len(inter):
                g = rects[current]
                new_rects = [g]
                for i in range(len(rects)):
                    if i == current:
                        continue
                    if i in inter:
                        g.union(rects[i])
                    else:
                        new_rects.append(rects[i])
                rects = new_rects
                current = 0
            # If we didn't find anything that intersected move on to the next
            # rectangle.
            else:
                current += 1
            # Bail when we are done.
            if current >= len(rects):
                break

        # Create the sections
        sections = []
        for rect in rects:
            sections.append(Section(rect))

        # Place the original (small) glyphs into the sections
        for glyph in self.glyphs:
            if self.__section_size_test(glyph):
                for s in sections:
                    if s.bbox.intersects(glyph):
                        s.add_glyph(glyph)
                        break

        # Fix up the bounding boxes
        for s in sections:
            s.calculate_bbox()

        self.sections = sections