def crop(self): if self.underlines: return Crop.union( Crop.union_all([l.crop() for l in self.letters]), Crop.union_all([u.crop() for u in self.underlines]), ) else: return Crop.union_all([l.crop() for l in self.letters])
def process_image(original, dpi=None): original_rot90 = original for i in range(args.rotate / 90): original_rot90 = np.rot90(original_rot90) # original_rot90 = cv2.resize(original_rot90, (0, 0), None, 1.5, 1.5) im_h, im_w = original_rot90.shape[:2] # image height should be about 10 inches. round to 100 if not dpi: dpi = int(round(im_h / 1100.0) * 100) print('detected dpi:', dpi) split = im_w > im_h # two pages cropped_images = [] if args.dewarp: lib.debug_prefix.append('dewarp') dewarped_images = dewarp.kim2014(original_rot90) for im in dewarped_images: bw = binarize.binarize(im, algorithm=binarize.sauvola, resize=1.0) lib.debug_prefix.append('crop') _, [lines] = crop(im, bw, split=False) lib.debug_prefix.pop() c = Crop.from_lines(lines) if c.nonempty(): cropped_images.append(Crop.from_whitespace(bw).apply(im)) lib.debug_prefix.pop() else: bw = binarize.binarize(original_rot90, algorithm=binarize.adaptive_otsu, resize=1.0) debug_imwrite('thresholded.png', bw) AH, line_sets = crop(original_rot90, bw, split=split) for lines in line_sets: c = Crop.from_lines(lines) if c.nonempty(): lib.debug = False bw_cropped = c.apply(bw) orig_cropped = c.apply(original_rot90) angle = algorithm.skew_angle(bw_cropped, original_rot90, AH, lines) if not np.isfinite(angle): angle = 0. rotated = algorithm.safe_rotate(orig_cropped, angle) rotated_bw = binarize.binarize(rotated, algorithm=binarize.adaptive_otsu) _, [new_lines] = crop(rotated, rotated_bw, split=False) # dewarped = algorithm.fine_dewarp(rotated, new_lines) # _, [new_lines] = crop(rotated, rotated_bw, split=False) new_crop = Crop.union_all([line.crop() for line in new_lines]) if new_crop.nonempty(): # cropped = new_crop.apply(dewarped) cropped = new_crop.apply(rotated) cropped_images.append(cropped) out_images = [] lib.debug_prefix.append('binarize') for i, cropped in enumerate(cropped_images): lib.debug_prefix.append('page{}'.format(i)) if lib.is_bw(original_rot90): out_images.append(binarize.otsu(cropped)) else: out_images.append( binarize.ng2014_fallback(binarize.grayscale(cropped)) ) lib.debug_prefix.pop() lib.debug_prefix.pop() return dpi, out_images