Esempio n. 1
0
 def crop(self):
     if self.underlines:
         return Crop.union(
             Crop.union_all([l.crop() for l in self.letters]),
             Crop.union_all([u.crop() for u in self.underlines]),
         )
     else:
         return Crop.union_all([l.crop() for l in self.letters])
Esempio n. 2
0
def process_image(original, dpi=None):
    original_rot90 = original

    for i in range(args.rotate / 90):
        original_rot90 = np.rot90(original_rot90)

    # original_rot90 = cv2.resize(original_rot90, (0, 0), None, 1.5, 1.5)
    im_h, im_w = original_rot90.shape[:2]
    # image height should be about 10 inches. round to 100
    if not dpi:
        dpi = int(round(im_h / 1100.0) * 100)
        print('detected dpi:', dpi)

    split = im_w > im_h # two pages

    cropped_images = []
    if args.dewarp:
        lib.debug_prefix.append('dewarp')
        dewarped_images = dewarp.kim2014(original_rot90)
        for im in dewarped_images:
            bw = binarize.binarize(im, algorithm=binarize.sauvola, resize=1.0)
            lib.debug_prefix.append('crop')
            _, [lines] = crop(im, bw, split=False)
            lib.debug_prefix.pop()
            c = Crop.from_lines(lines)
            if c.nonempty():
                cropped_images.append(Crop.from_whitespace(bw).apply(im))
        lib.debug_prefix.pop()
    else:
        bw = binarize.binarize(original_rot90, algorithm=binarize.adaptive_otsu, resize=1.0)
        debug_imwrite('thresholded.png', bw)
        AH, line_sets = crop(original_rot90, bw, split=split)

        for lines in line_sets:
            c = Crop.from_lines(lines)
            if c.nonempty():
                lib.debug = False
                bw_cropped = c.apply(bw)
                orig_cropped = c.apply(original_rot90)
                angle = algorithm.skew_angle(bw_cropped, original_rot90, AH, lines)
                if not np.isfinite(angle): angle = 0.
                rotated = algorithm.safe_rotate(orig_cropped, angle)

                rotated_bw = binarize.binarize(rotated, algorithm=binarize.adaptive_otsu)
                _, [new_lines] = crop(rotated, rotated_bw, split=False)

                # dewarped = algorithm.fine_dewarp(rotated, new_lines)
                # _, [new_lines] = crop(rotated, rotated_bw, split=False)
                new_crop = Crop.union_all([line.crop() for line in new_lines])

                if new_crop.nonempty():
                    # cropped = new_crop.apply(dewarped)
                    cropped = new_crop.apply(rotated)
                    cropped_images.append(cropped)

    out_images = []
    lib.debug_prefix.append('binarize')
    for i, cropped in enumerate(cropped_images):
        lib.debug_prefix.append('page{}'.format(i))
        if lib.is_bw(original_rot90):
            out_images.append(binarize.otsu(cropped))
        else:
            out_images.append(
                binarize.ng2014_fallback(binarize.grayscale(cropped))
            )
        lib.debug_prefix.pop()
    lib.debug_prefix.pop()

    return dpi, out_images