def get_coverage(self, x, y, width, height): assert(not self.obj.ignored) return image.get_coverage( self.obj.surface.surface, self.matrix, x, y, width, height )
def recognize(self): class Quadrilateral(): """This class iterates through small box areas in a quadriliteral. This is usefull because some scanners have trapezoidal distortions.""" # Assumes top left, top right, bottom right, bottom left # corner. def __init__(self, p0, p1, p2, p3): self.x0 = p0[0] self.y0 = p0[1] self.x1 = p1[0] self.y1 = p1[1] self.x2 = p2[0] self.y2 = p2[1] self.x3 = p3[0] self.y3 = p3[1] # 0 -> 1 self.m0 = (self.y1 - self.y0) / (self.x1 - self.x0) self.m1 = (self.x2 - self.x1) / (self.y2 - self.y1) self.m2 = (self.y3 - self.y2) / (self.x3 - self.x2) self.m3 = (self.x0 - self.x3) / (self.y0 - self.y3) self.top = min(self.y0, self.y1) self.bottom = max(self.y2, self.y3) self.left = min(self.x0, self.x3) self.right = max(self.x1, self.x2) def iterate_bb(self, step_x, step_y, test_width, test_height, padding): y = self.top while y + test_height < self.bottom: x = self.left while x + test_width < self.right: yield x, y x += step_x y += step_y def iterate_outline(self, step_x, step_y, test_width, test_height, padding): # Top x, y = self.x0, self.y0 x += padding y += padding dest_x = self.x1 - padding - test_width dest_y = self.y1 + padding dist_x = dest_x - x dist_y = dest_y - y length = math.sqrt(dist_x**2 + dist_y**2) for step in xrange(int(length / step_x)): yield x + dist_x * step / ( length / step_x), y + dist_y * step / (length / step_x) yield dest_x, dest_y # Bottom x, y = self.x3, self.y3 x = x + padding y = y - padding - test_height dest_x = self.x2 - padding - test_width dest_y = self.y2 - padding - test_height dist_x = dest_x - x dist_y = dest_y - y length = math.sqrt(dist_x**2 + dist_y**2) for step in xrange(int(length / step_x)): yield x + dist_x * step / ( length / step_x), y + dist_y * step / (length / step_x) yield dest_x, dest_y # Left x, y = self.x0, self.y0 x += padding y += padding dest_x = self.x3 + padding dest_y = self.y3 - padding - test_height dist_x = dest_x - x dist_y = dest_y - y length = math.sqrt(dist_x**2 + dist_y**2) for step in xrange(int(length / step_y)): yield x + dist_x * step / ( length / step_y), y + dist_y * step / (length / step_y) yield dest_x, dest_y # Right x, y = self.x1, self.y1 x = x - padding - test_width y = y + padding dest_x = self.x2 - padding - test_width dest_y = self.y2 - padding - test_height dist_x = dest_x - x dist_y = dest_y - y length = math.sqrt(dist_x**2 + dist_y**2) for step in xrange(int(length / step_y)): yield x + dist_x * step / ( length / step_y), y + dist_y * step / (length / step_y) yield dest_x, dest_y def iterate(self, step_x, step_y, test_width, test_height, padding): for x, y in self.iterate_bb(step_x, step_y, test_width, test_height, padding): ly = self.y0 + self.m0 * (x - self.x0) if not ly + padding < y: continue ly = self.y2 + self.m2 * (x - self.x2) if not ly - padding > y + test_height: continue lx = self.x1 + self.m1 * (y - self.y1) if not lx - padding > x + test_width: continue lx = self.x3 + self.m3 * (y - self.y3) if not lx + padding < x: continue yield x, y for x, y in self.iterate_outline(step_x, step_y, test_width, test_height, padding): yield x, y bbox = None img = self.obj.sheet.get_page_image(self.obj.page_number) if img is None or img.recognize.matrix is None: self.obj.sheet.valid = 0 return x = self.obj.x y = self.obj.y width = self.obj.width height = self.obj.height # Scanning area and stepping step_x = defs.textbox_scan_step_x step_y = defs.textbox_scan_step_x test_width = defs.textbox_scan_width test_height = defs.textbox_scan_height # extra_padding is always added to the box side at the end. extra_padding = defs.textbox_extra_padding scan_padding = defs.textbox_scan_uncorrected_padding quad = Quadrilateral((x, y), (x + width, y), (x + width, y + height), (x, y + height)) try: quad = Quadrilateral( *img.recognize.find_box_corners(x, y, width, height)) # Lower padding, as we found the corners and are therefore more acurate scan_padding = defs.textbox_scan_padding except AssertionError: pass surface = img.surface.surface matrix = img.recognize.matrix for x, y in quad.iterate(step_x, step_y, test_width, test_height, scan_padding): # Use the image module directly as we are calling in *a lot* coverage = image.get_coverage(surface, matrix, x, y, test_width, test_height) if coverage > defs.textbox_scan_coverage: if not bbox: bbox = [x, y, test_width, test_height] else: bbox_x = min(bbox[0], x) bbox_y = min(bbox[1], y) bbox[2] = max(bbox[0] + bbox[2], x + test_width) - bbox_x bbox[3] = max(bbox[1] + bbox[3], y + test_height) - bbox_y bbox[0] = bbox_x bbox[1] = bbox_y if bbox and (bbox[2] > defs.textbox_minimum_writing_width or bbox[3] > defs.textbox_minimum_writing_height): # Do not accept very small bounding boxes. self.obj.data.state = True self.obj.data.x = bbox[0] - (scan_padding + extra_padding) self.obj.data.y = bbox[1] - (scan_padding + extra_padding) self.obj.data.width = bbox[2] + 2 * (scan_padding + extra_padding) self.obj.data.height = bbox[3] + 2 * (scan_padding + extra_padding) else: self.obj.data.state = False self.obj.data.x = self.obj.x self.obj.data.y = self.obj.y self.obj.data.width = self.obj.width self.obj.data.height = self.obj.height
def recognize(self): class Quadrilateral(): """This class iterates through small box areas in a quadriliteral. This is usefull because some scanners have trapezoidal distortions.""" # Assumes top left, top right, bottom right, bottom left # corner. def __init__(self, p0, p1, p2, p3): self.x0 = p0[0] self.y0 = p0[1] self.x1 = p1[0] self.y1 = p1[1] self.x2 = p2[0] self.y2 = p2[1] self.x3 = p3[0] self.y3 = p3[1] # 0 -> 1 self.m0 = (self.y1 - self.y0) / (self.x1 - self.x0) self.m1 = (self.x2 - self.x1) / (self.y2 - self.y1) self.m2 = (self.y3 - self.y2) / (self.x3 - self.x2) self.m3 = (self.x0 - self.x3) / (self.y0 - self.y3) self.top = min(self.y0, self.y1) self.bottom = max(self.y2, self.y3) self.left = min(self.x0, self.x3) self.right = max(self.x1, self.x2) def iterate_bb(self, step_x, step_y, test_width, test_height, padding): y = self.top while y + test_height < self.bottom: x = self.left while x + test_width < self.right: yield x, y x += step_x y += step_y def iterate_outline(self, step_x, step_y, test_width, test_height, padding): # Top x, y = self.x0, self.y0 x += padding y += padding dest_x = self.x1 - padding - test_width dest_y = self.y1 + padding dist_x = dest_x - x dist_y = dest_y - y length = math.sqrt(dist_x ** 2 + dist_y ** 2) for step in xrange(int(length / step_x)): yield x + dist_x * step / (length / step_x), y + dist_y * step / (length / step_x) yield dest_x, dest_y # Bottom x, y = self.x3, self.y3 x = x + padding y = y - padding - test_height dest_x = self.x2 - padding - test_width dest_y = self.y2 - padding - test_height dist_x = dest_x - x dist_y = dest_y - y length = math.sqrt(dist_x ** 2 + dist_y ** 2) for step in xrange(int(length / step_x)): yield x + dist_x * step / (length / step_x), y + dist_y * step / (length / step_x) yield dest_x, dest_y # Left x, y = self.x0, self.y0 x += padding y += padding dest_x = self.x3 + padding dest_y = self.y3 - padding - test_height dist_x = dest_x - x dist_y = dest_y - y length = math.sqrt(dist_x ** 2 + dist_y ** 2) for step in xrange(int(length / step_y)): yield x + dist_x * step / (length / step_y), y + dist_y * step / (length / step_y) yield dest_x, dest_y # Right x, y = self.x1, self.y1 x = x - padding - test_width y = y + padding dest_x = self.x2 - padding - test_width dest_y = self.y2 - padding - test_height dist_x = dest_x - x dist_y = dest_y - y length = math.sqrt(dist_x ** 2 + dist_y ** 2) for step in xrange(int(length / step_y)): yield x + dist_x * step / (length / step_y), y + dist_y * step / (length / step_y) yield dest_x, dest_y def iterate(self, step_x, step_y, test_width, test_height, padding): for x, y in self.iterate_bb(step_x, step_y, test_width, test_height, padding): ly = self.y0 + self.m0 * (x - self.x0) if not ly + padding < y: continue ly = self.y2 + self.m2 * (x - self.x2) if not ly - padding > y + test_height: continue lx = self.x1 + self.m1 * (y - self.y1) if not lx - padding > x + test_width: continue lx = self.x3 + self.m3 * (y - self.y3) if not lx + padding < x: continue yield x, y for x, y in self.iterate_outline(step_x, step_y, test_width, test_height, padding): yield x, y bbox = None img = self.obj.sheet.get_page_image(self.obj.page_number) if img is None or img.recognize.matrix is None: self.obj.sheet.valid = 0 return x = self.obj.x y = self.obj.y width = self.obj.width height = self.obj.height # Scanning area and stepping step_x = defs.textbox_scan_step_x step_y = defs.textbox_scan_step_x test_width = defs.textbox_scan_width test_height = defs.textbox_scan_height # extra_padding is always added to the box side at the end. extra_padding = defs.textbox_extra_padding scan_padding = defs.textbox_scan_uncorrected_padding quad = Quadrilateral((x, y), (x + width, y), (x + width, y + height), (x, y + height)) try: quad = Quadrilateral(*img.recognize.find_box_corners(x, y, width, height)) # Lower padding, as we found the corners and are therefore more acurate scan_padding = defs.textbox_scan_padding except AssertionError: pass surface = img.surface.surface matrix = img.recognize.matrix for x, y in quad.iterate(step_x, step_y, test_width, test_height, scan_padding): # Use the image module directly as we are calling in *a lot* coverage = image.get_coverage(surface, matrix, x, y, test_width, test_height) if coverage > defs.textbox_scan_coverage: if not bbox: bbox = [x, y, test_width, test_height] else: bbox_x = min(bbox[0], x) bbox_y = min(bbox[1], y) bbox[2] = max(bbox[0] + bbox[2], x + test_width) - bbox_x bbox[3] = max(bbox[1] + bbox[3], y + test_height) - bbox_y bbox[0] = bbox_x bbox[1] = bbox_y if bbox and (bbox[2] > defs.textbox_minimum_writing_width or bbox[3] > defs.textbox_minimum_writing_height): # Do not accept very small bounding boxes. self.obj.data.state = True self.obj.data.x = bbox[0] - (scan_padding + extra_padding) self.obj.data.y = bbox[1] - (scan_padding + extra_padding) self.obj.data.width = bbox[2] + 2 * (scan_padding + extra_padding) self.obj.data.height = bbox[3] + 2 * (scan_padding + extra_padding) else: self.obj.data.state = False self.obj.data.x = self.obj.x self.obj.data.y = self.obj.y self.obj.data.width = self.obj.width self.obj.data.height = self.obj.height
def get_coverage(self, x, y, width, height): return image.get_coverage( self.obj.surface.surface, self.matrix, x, y, width, height )