def __init__(self, bbox): self.bbox = core.Rect(bbox) self.lines = [] self.glyphs = [] # stats self.avg_glyph_area = 0 self.avg_glyph_height = 0 self.avg_glyph_width = 0 self.avg_line_height = 0 self.agv_line_width = 0
def display_lines(self, clear=1): """Display the lines found by placing a box around them in a display. If clear is true then any boxes already on the displayed are cleared first.""" if self.image._display is None: self.image.display() display = self.image._display if clear: display.clear_all_boxes() for s in self.sections: for line in s.lines: b = line.bbox display.add_box( core.Rect(core.Point(b.ul_x, b.ul_y), core.Dim(b.ncols, b.nrows)))
def display_sections(self, clear=1): """Display the sections found by placing a box around them in a display. If clear is true then any boxes already on the displayed are cleared first.""" # display the sections result = self.sections if self.image._display is None: self.image.display() display = self.image._display if clear: display.clear_all_boxes() for rect in result: b = rect.bbox display.add_box( core.Rect(core.Point(b.ul_x, b.ul_y), core.Dim(b.ncols, b.nrows)))
def __init__(self, glyph): self.center = 0 self.bbox = core.Rect(core.Point(glyph.ul_x, glyph.ul_y), core.Dim(glyph.ncols, glyph.nrows)) self.glyphs = [] self.add_glyph(glyph)
def calculate_bbox(self): assert (len(self.glyphs) > 0) self.bbox = core.Rect(self.glyphs[0]) for glyph in self.glyphs: self.bbox.union(glyph)
def find_sections(self): """Find the sections within an image - this finds large blocks of text making it possible to find the lines within complex text layouts.""" glyphs = self.glyphs FUDGE = self.__avg_glyph_size(glyphs) * self.section_search_size # remove noise and large objects self.__noise_size = FUDGE self.__large_size = FUDGE * 20 new_glyphs = [] for g in glyphs: if self.__section_size_test(g): new_glyphs.append(g) else: if self.__fill: g.fill_white() glyphs = new_glyphs # Sort the glyphs left-to-right and top-to-bottom glyphs.sort(lambda x, y: cmp(x.ul_x, y.ul_x)) glyphs.sort(lambda x, y: cmp(x.ul_y, y.ul_y)) # Create rectangles for each glyph that are bigger by FUDGE big_rects = [] for g in glyphs: ul_y = max(0, g.ul_y - FUDGE) ul_x = max(0, g.ul_x - FUDGE) lr_y = min(self.image.lr_y, g.lr_y + FUDGE) lr_x = min(self.image.lr_x, g.lr_x + FUDGE) ul_x = int(ul_x) ul_y = int(ul_y) nrows = int(lr_y - ul_y + 1) ncols = int(lr_x - ul_x + 1) big_rects.append( core.Rect(core.Point(ul_x, ul_y), core.Dim(ncols, nrows))) # Search for intersecting glyphs and merge them. This is # harder than it seems at first because we want everything # to merge together that intersects regardless of the order # in the list. It ends up being similar to connected-component # labeling. This is prone to be kind-of slow. current = 0 rects = big_rects while (1): # Find the indexexes of any rects that interesect with current inter = self.__find_intersecting_rects(rects, current) # If we found intersecting rectangles merge them with them current # rect, remove them from the list, and start the whole process # over. We start over to make certain that everything that should # be merged is. if len(inter): g = rects[current] new_rects = [g] for i in range(len(rects)): if i == current: continue if i in inter: g.union(rects[i]) else: new_rects.append(rects[i]) rects = new_rects current = 0 # If we didn't find anything that intersected move on to the next # rectangle. else: current += 1 # Bail when we are done. if current >= len(rects): break # Create the sections sections = [] for rect in rects: sections.append(Section(rect)) # Place the original (small) glyphs into the sections for glyph in self.glyphs: if self.__section_size_test(glyph): for s in sections: if s.bbox.intersects(glyph): s.add_glyph(glyph) break # Fix up the bounding boxes for s in sections: s.calculate_bbox() self.sections = sections