예제 #1
0
    def run(self, fname, i):
        fname = str(fname)
        print("Process file: ", fname, i + 1)
        base, _ = ocrolib.allsplitext(fname)
        binImg = ocrolib.read_image_binary(fname)

        lineDetectH = []
        lineDetectV = []
        fpath = self.remove_rular(fname, base)
        textarea, rgb, height, width = self.detect_textarea(fpath)
        self.param['colSeparator'] = int(width * self.param['colSeparator'])

        if len(textarea) > 1:
            textarea = self.crop_area(textarea, binImg, rgb, base)
            if len(textarea) == 0:
                self.select_borderLine(fpath, base, lineDetectH, lineDetectV)
        elif len(textarea) == 1 and (height * width * 0.5 <
                                     (abs(textarea[0][2] - textarea[0][0]) *
                                      abs(textarea[0][3] - textarea[0][1]))):
            x1, y1, x2, y2 = textarea[0]
            x1 = x1 - 20 if x1 > 20 else 0
            x2 = x2 + 20 if x2 < width - 20 else width
            y1 = y1 - 40 if y1 > 40 else 0
            y2 = y2 + 40 if y2 < height - 40 else height

            self.save_pf(base, [x1, y1, x2, y2])
        else:
            self.select_borderLine(fpath, base, lineDetectH, lineDetectV)

        return '%s.pf.png' % base
예제 #2
0
def extract(image):

    try:
        binary = ocrolib.read_image_binary(image)
        binary = 1-binary

        scale = psegutils.estimate_scale(binary)
        segmentation = compute_segmentation(binary,scale)

        # ...lines = compute_lines(segmentation,scale)

        # compute the reading order
        lines = psegutils.compute_lines(segmentation,scale)
        order = psegutils.reading_order([l.bounds for l in lines])
        lsort = psegutils.topsort(order)

        # renumber the labels so that they conform to the specs
        nlabels = amax(compute_segmentation)+1
        renumber = zeros(nlabels,'i')
        for i,v in enumerate(lsort): renumber[lines[v].label] = 0x010000+(i+1)
        segmentation = renumber[segmentation]

        outputdir = "http://127.0.0.1:5000/uploads/"
        
        lines = [lines[i] for i in lsort]
        ocrolib.write_page_segmentation("%s.pseg.png"%outputdir,segmentation)


        cleaned = ocrolib.remove_noise(binary,args.noise)
        for i,l in enumerate(lines):
            binline = psegutils.extract_masked(1-cleaned,l,pad=args.pad,expand=args.expand)
            ocrolib.write_image_binary("%s/01%04x.bin.png"%(outputdir,i+1),binline)
        #print "%6d"%i,fname,"%4.1f"%scale,len(lines)
    except:
        print ('error')
    def textimageseg(self, imf):
        # I: binarized-input-image; imftext: output-text-portion.png; imfimage: output-image-portion.png
        I = ocrolib.read_image_binary(imf)
        I = 1 - I / I.max()
        rows, cols = I.shape

        # Generate Mask and Seed Images
        Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I)

        # Iseedfill: Union of Mask and Seed Images
        Iseedfill = self.pixSeedfillBinary(Imask, Iseed)

        # Dilation of Iseedfill
        mask = ones((3, 3))
        Iseedfill = ndimage.binary_dilation(Iseedfill, mask)

        # Expansion of Iseedfill to become equal in size of I
        Iseedfill = self.expansion(Iseedfill, (rows, cols))

        # Write  Text and Non-Text images
        image_part = array((1 - I * Iseedfill), dtype=int)
        image_part[0, 0] = 0  # only for visualisation purpose
        text_part = array((1 - I * (1 - Iseedfill)), dtype=int)
        text_part[0, 0] = 0  # only for visualisation purpose

        base, _ = ocrolib.allsplitext(imf)
        ocrolib.write_image_binary(base + ".ts.png", text_part)

        #imf_image = imf[0:-3] + "nts.png"
        ocrolib.write_image_binary(base + ".nts.png", image_part)
        return [base + ".ts.png", base + ".nts.png"]
예제 #4
0
    def process(self):
        """
        Segment with ocropy
        """

        for (n, input_file) in enumerate(self.input_files):
            log.info("INPUT FILE %i / %s", n, input_file)
            downloaded_file = self.workspace.download_file(input_file)
            log.info("downloaded_file %s", downloaded_file)
            pcgts = page_from_file(downloaded_file)
            page_width = pcgts.get_Page().get_imageWidth()
            page_height = pcgts.get_Page().get_imageHeight()
            # TODO binarized variant from get_AlternativeImage()
            image_url = pcgts.get_Page().imageFilename
            log.info("pcgts %s", pcgts)

            binary = ocrolib.read_image_binary(
                self.workspace.download_url(image_url))
            binary = 1 - binary

            scale = self.parameter['scale'] if self.parameter[
                'scale'] != 0 else psegutils.estimate_scale(binary)
            log.debug(binary)

            pseg = self.compute_segmentation(binary, scale)
            log.debug("pseg=%s", pseg)

            # TODO reading order / enumber
            #  log.debug("finding reading order")
            #  lines = psegutils.compute_lines(pseg, scale)
            #  order = psegutils.reading_order([l.bounds for l in lines])
            #  lsort = psegutils.topsort(order)

            regions = ocrolib.RegionExtractor()
            regions.setPageLines(pseg)

            dummyRegion = TextRegionType(
                id="dummy",
                Coords=CoordsType(
                    points="0,0 %s,0 %s,%s 0,%s" %
                    (page_width, page_width, page_height, page_height)))
            pcgts.get_Page().add_TextRegion(dummyRegion)

            for lineno in range(1, regions.length()):
                log.debug("id=%s bbox=%s", regions.id(lineno),
                          regions.bbox(lineno))
                textline = TextLineType(
                    id=concat_padded("line", lineno),
                    Coords=CoordsType(
                        points=points_from_y0x0y1x1(regions.bbox(lineno))))
                dummyRegion.add_TextLine(textline)
            ID = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=ID,
                                    file_grp=self.output_file_grp,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename="%s/%s.xml" %
                                    (self.output_file_grp, ID),
                                    content=to_xml(pcgts))
예제 #5
0
def apply_mask(binary, colseps):
    try:
        #mask = ocrolib.read_image_binary(base+".mask.png")
        mask = ocrolib.read_image_binary("./ocropy_test.mask.png")
    except IOError:
        return binary, colseps
    masked_seps = np.maximum(colseps, mask)
    binary = np.minimum(binary, 1 - masked_seps)
    DSAVE("masked_seps", masked_seps)
    return binary, masked_seps
예제 #6
0
def apply_mask(binary, colseps):
    try:
        mask = ocrolib.read_image_binary(base + ".mask.png")
    except IOError:
        raise  # !@#$
        return binary, colseps
    masked_seps = np.maximum(colseps, mask)
    binary = np.minimum(binary, 1 - masked_seps)
    # DSAVE("masked_seps", masked_seps)
    return binary, masked_seps
예제 #7
0
def extract2(image):
    binary = ocrolib.read_image_binary(image)
	binary = 1-binary
	return binary
예제 #8
0
def analyze_page_layout(binary, gray, rgb=None):
    hscale = 1.0  # Non-standard scaling of horizontal parameters.
    vscale = 1.0  # Non-standard scaling of vertical parameters.
    threshold = 0.2  # baseline threshold.
    usegauss = True  # Use gaussian instead of uniform.
    maxseps = 0  # Maximum black column separators.
    sepwiden = 10  # Widen black separators (to account for warping).
    blackseps = True
    maxcolseps = 3  # Maximum # whitespace column separators.
    csminheight = 10  # Minimum column height (units=scale).
    noise = 8  # Noise threshold for removing small components from lines.
    gray_output = True  # Output grayscale lines as well, which are extracted from the grayscale version of the pages.
    pad = 3  # Padding for extracted lines.
    expand = 3  # Expand mask for grayscale extraction.

    if False:
        bin_image_filepath = './ocropy_test.bin.png'
        gray_image_filepath = './ocropy_test.nrm.png'

        binary = ocrolib.read_image_binary(bin_image_filepath)
        gray = ocrolib.read_image_gray(gray_image_filepath)

    binary = 1 - binary  # Invert.

    scale = psegutils.estimate_scale(binary)
    segmentation = compute_segmentation(binary,
                                        scale,
                                        blackseps,
                                        maxseps,
                                        maxcolseps,
                                        csminheight,
                                        sepwiden,
                                        usegauss,
                                        vscale,
                                        hscale,
                                        threshold,
                                        quiet=True)

    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # Renumber the labels so that they conform to the specs.
    nlabels = np.amax(segmentation) + 1
    renumber = np.zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]  # Image.

    lines = [lines[i] for i in lsort]

    # Visualize bounding boxes.
    if False:
        if rgb is not None:
            # REF [function] >> extract_masked() in ${OCROPY_HOME}/ocrolib/psegutils.py.
            for l in lines:
                y0, x0, y1, x1 = [
                    int(x) for x in [
                        l.bounds[0].start, l.bounds[1].start, l.bounds[0].stop,
                        l.bounds[1].stop
                    ]
                ]
                cv2.rectangle(rgb, (x0, y0), (x1, y1), (0, 0, 255), 1,
                              cv2.LINE_AA)
            cv2.imshow('Image', rgb)
            cv2.waitKey(0)

    # Output everything.
    if False:
        if not os.path.exists(outputdir):
            os.mkdir(outputdir)

        ocrolib.write_page_segmentation("%s.pseg.png" % outputdir,
                                        segmentation)
        cleaned = ocrolib.remove_noise(binary, noise)
        for i, l in enumerate(lines):
            binline = psegutils.extract_masked(1 - cleaned,
                                               l,
                                               pad=pad,
                                               expand=expand)  # Image.
            ocrolib.write_image_binary(
                "%s/01%04x.bin.png" % (outputdir, i + 1), binline)
            if gray_output:
                grayline = psegutils.extract_masked(gray,
                                                    l,
                                                    pad=pad,
                                                    expand=expand)  # Image.
                ocrolib.write_image_gray(
                    "%s/01%04x.nrm.png" % (outputdir, i + 1), grayline)
예제 #9
0
def processPngFile(outRoot, origFile, fileNum):
    baseName = os.path.basename(origFile)
    baseBase, _ = os.path.splitext(baseName)
    outDir = os.path.join(outRoot, "%s.%03d" % (baseBase, fileNum))
    inFile = os.path.join(outDir, baseName)

    os.makedirs(outDir, exist_ok=True)
    shutil.copy(origFile, inFile)

    inBase, _ = ocrolib.allsplitext(inFile)
    print("**  inBase=%s" % inBase)
    # print("** binBase=%s" % binBase)

    fname = inFile
    outputdir = inBase
    binFile = inBase + ".bin.png"
    outFile = inBase + ".out.png"
    outRoot2, outDir2 = os.path.split(outRoot)
    outFile2 = os.path.join(outRoot2, "%s.out" % outDir2, baseName)
    print("outFile2=%s" % outFile2)
    # assert False
    grayFile = inBase + ".nrm.png"
    psegFile = inBase + ".pseg.png"
    print("  inFile=%s" % inFile)
    print(" binFile=%s" % binFile)
    print("grayFile=%s" % grayFile)
    print(" outFile=%s" % outFile)
    assert inFile and binFile
    assert outFile != inFile
    assert outFile != binFile

    if not binarize(inFile, binFile, grayFile):
        binExists = os.path.exists(binFile)
        print("Couldn't binarize inFile=%s binFile=%s exists=%s" %
              (inFile, binFile, binExists))
        return False

    binary = ocrolib.read_image_binary(binFile)
    print("$$ %s=%s" % (binFile, desc(binary)))
    height, width = binary.shape
    checktype(binary, ABINARY2)
    check = check_page(np.amax(binary) - binary)
    if check is not None:
        print("%s SKIPPED %s (use -n to disable this check)" % (inFile, check))
        return False

    # if args.gray:
    #     if os.path.exists(base+".nrm.png"):
    #         gray = ocrolib.read_image_gray(base+".nrm.png")
    #         checktype(gray, GRAYSCALE)
    #     else:
    #         print_error("Grayscale version %s.nrm.png not found. Use ocropus-nlbin for creating " +
    #                     "normalized grayscale version of the pages as well." % base)
    #         return

    binary = 1 - binary  # invert

    scale = psegutils.estimate_scale(binary)
    print("scale %f" % scale)
    if np.isnan(scale) or scale > 1000.0:
        print("%s: bad scale (%g); skipping\n" % (fname, scale))
        return False

    # find columns and text lines
    print("computing segmentation")
    segmentation = compute_segmentation(binary, scale)
    if np.amax(segmentation) > maxlines:
        print("%s: too many lines %g" % (fname, np.amax(segmentation)))
        return False

    print("segmentation=%s" % desc(segmentation))
    print("number of lines %g" % np.amax(segmentation))

    # compute the reading order
    print("finding reading order")
    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)
    print("$$ lsort = %d = %s...%s" % (len(lsort), lsort[:10], lsort[-10:]))

    # renumber the labels so that they conform to the specs
    nlabels = np.amax(segmentation) + 1
    renumber = np.zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]

    # finally, output everything
    print("writing lines")
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
    lines = [lines[i] for i in lsort]
    ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation)
    cleaned = ocrolib.remove_noise(binary, noise)
    for i, l in enumerate(lines):
        binline = psegutils.extract_masked(1 - cleaned,
                                           l,
                                           pad=pad,
                                           expand=expand)
        ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1),
                                   binline)
        # if args.gray:
        #     grayline = psegutils.extract_masked(
        #         gray, l, pad=args.pad, expand=args.expand)
        #     ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i+1), grayline)
    print("%6d  %s %4.1f %d" % (i, fname, scale, len(lines)))

    # to proceed, we need a pseg file and a subdirectory containing text lines
    assert os.path.exists(psegFile), "%s: no such file" % psegFile
    assert os.path.isdir(inBase), "%s: no such directory" % inBase

    # iterate through the text lines in reading order, based on the page segmentation file
    pseg = ocrolib.read_page_segmentation(psegFile)
    print("$$ %s=%s" % (psegFile, desc(pseg)))

    regions = ocrolib.RegionExtractor()
    print("$$ regions=%s" % regions)
    regions.setPageLines(pseg)

    im = Image.open(inFile)
    print("~~%s %s" % (inFile, im.size))
    print("$$ regions=%s=%s" % (regions, sorted(regions.__dict__)))
    print("$$ regions.length=%s" % regions.length())

    n = regions.length()
    for i in range(1, n):

        id = regions.id(i)
        y0, x0, y1, x1 = regions.bbox(i)
        # print("%5d: 0x%05X %s %d x %d" %
        #       (i, id, [y0, x0, y1, x1], y1 - y0, x1 - x0))

        draw = ImageDraw.Draw(im)
        draw.rectangle((x0, y0, x1, y1), outline=(255, 0, 0), width=3)
        draw.rectangle((x0, y0, x1, y1), outline=(0, 0, 255), width=0)
        # draw.rectangle((x0, y0, x1, y1), outline=255, width=5)
        # draw.rectangle((x0, y0, x1, y1), outline=10,  width=1)
        del draw

    # write output files
    print("outFile=%s" % outFile)
    im.save(outFile, "PNG")
    print("outFile2=%s" % outFile2)
    outDir2 = os.path.dirname(outFile2)
    os.makedirs(outDir2, exist_ok=True)
    im.save(outFile2, "PNG")
    assert os.path.exists(outFile2)
    # outFile3, _ = os.path.splitext(outFile)
    # outFile3 = "%s.jpg" % outFile3
    # print("outFile3=%s" % outFile3)
    # im.save(outFile3, "JPEG")
    # assert os.path.exists(outFile3)
    return True
    def textline(self, arg):
        image = ocrolib.read_image_binary(arg)
        height, width = image.shape
        H = height
        W = width
        base, _ = ocrolib.allsplitext(arg)
        base2 = os.path.splitext(arg)[0]

        if not os.path.exists("%s/lines" % base):
            os.system("mkdir -p %s/lines" % base)
            #if os.path.exists(base2 + ".ts.png") :
            #    f = ocrolib.read_image_binary(base2 + ".ts.png")
            #    height, width = f.shape
            #    os.system("python "+args.libpath+"/anyBaseOCR-nlbin.py %s.pf.bin.png" % base2)
            #else:
            #    os.system("python "+args.libpath+"/anyBaseOCR-nlbin.py %s" % arg)
            #print("convert %s.ts.png %s/block-000.bin.png" % (base,base))
            #os.system("convert %s.ts.png %s/block-000.bin.png" % (base,base))
            #os.system("rm %s.bin.png %s.nrm.png" % (base, base))
            file = open('%s/sorted_cuts.dat' % base, 'w')
            l = "0 0 " + str(int(width)) + " " + str(
                int(height)) + " 0 0 0 0\n"
            file.write(l)
            file.close()

        #if not os.path.exists("%s/lines" % base) :
        #    os.system("mkdir %s/lines" % base)

        blockarray = []
        if os.path.exists(base + "/sorted_cuts.dat"):
            blocks = open(base + "/sorted_cuts.dat", "r")
            i = 0
            for block in blocks:
                words = block.split()
                blockarray.append((int(words[0]), -int(words[1]),
                                   int(words[2]), int(words[3]), i))
                i += 1
        else:
            blockarray.append((0, 0, width, height, 0))

        i = 0
        j = 0
        lines = []
        for block in blockarray:
            (x0, y0, x1, y1, i) = block
            y0 = -y0
            #blockImage = "%s/block-%03d" % (base, i)
            os.system("convert %s.ts.png %s/temp.png" % (base, base))
            img = Image.open("%s.ts.png" % base, 'r')
            img_w, img_h = img.size
            background = Image.new('RGBA', (W, H), (255, 255, 255, 255))
            bg_w, bg_h = background.size
            offX = (bg_w - img_w) // 2
            offY = (bg_h - img_h) // 2
            offset = (offX, offY)
            background.paste(img, offset)
            background.save("%s/temp.png" % base)
            command = "python " + self.param[
                'libpath'] + "/cli/anyBaseOCR-gpageseg.py %s/temp.png -n --minscale %f --maxlines %f --scale %f --hscale %f --vscale %f --threshold %f --noise %d --maxseps %d --sepwiden %d --maxcolseps %d --csminaspect %f --csminheight %f -p %d -e %d -Q %d" % (
                    base, self.param['minscale'], self.param['maxlines'], self.
                    param['scale'], self.param['hscale'], self.param['vscale'],
                    self.param['threshold'], self.param['noise'],
                    self.param['maxseps'], self.param['sepwiden'],
                    self.param['maxcolseps'], self.param['csminaspect'],
                    self.param['csminheight'], self.param['pad'],
                    self.param['expand'], self.param['parallel'])
            if (self.param['blackseps']):
                command = command + " -b"
            if (self.param['usegauss']):
                command = command + " --usegauss"
            os.system(command)
            pseg = ocrolib.read_page_segmentation("%s/temp.pseg.png" % base)
            regions = ocrolib.RegionExtractor()
            regions.setPageLines(pseg)
            file = open('%s/sorted_lines.dat' % base, 'w')
            for h in range(1, regions.length()):
                id = regions.id(h)
                y0, x0, y1, x1 = regions.bbox(h)
                l = str(int(x0 - offX)) + " " + str(
                    int(img_h -
                        (y1 - offY))) + " " + str(int(x1 - offX)) + " " + str(
                            int(img_h - (y0 - offY))) + " 0 0 0 0\n"
                file.write(l)
            filelist = glob.glob("%s/temp/*" % base)
            for infile in sorted(filelist):
                os.system("convert %s %s/lines/01%02x%02x.bin.png" %
                          (infile, base, i + 1, j + 1))
                lines.append("%s/lines/01%02x%02x.bin.png" %
                             (base, i + 1, j + 1))
                j += 1
            os.system("rm -r %s/temp/" % base)
            os.system("rm %s/temp.png %s/temp.pseg.png" % (base, base))
            i += 1
        return lines
예제 #11
0
 def test_read_image_binary(self):
     self.assertTrue(
         numpy.array_equal(ocrolib.read_image_binary(img_bin_disk),
                           ocrolib.read_image_binary(img_bin_mem)))
예제 #12
0
def process(job):
    imagepath, i = job
    global base
    base, _ = ocrolib.allsplitext(imagepath)
    outputdir = base
    imagename_base = os.path.basename(os.path.normpath(base))

    try:
        binary = ocrolib.read_image_binary(imagepath)
    except IOError:
        if ocrolib.trace: traceback.print_exc()
        print_error("cannot open either %s.bin.png or %s" % (base, imagepath))
        return

    checktype(binary, ABINARY2)

    if not args['nocheck']:
        check = check_page(amax(binary) - binary)
        if check is not None:
            print_error("%s SKIPPED %s (use -n to disable this check)" %
                        (imagepath, check))
            return

    binary = 1 - binary  # invert

    if args['scale'] == 0:
        scale = psegutils.estimate_scale(binary)
    else:
        scale = args['scale']
    print_info("scale %f" % (scale))
    if isnan(scale) or scale > 1000.0:
        print_error("%s: bad scale (%g); skipping\n" % (imagepath, scale))
        return
    if scale < args['minscale']:
        print_error("%s: scale (%g) less than --minscale; skipping\n" %
                    (imagepath, scale))
        return

    # find columns and text lines

    if not args['quiet']: print_info("computing segmentation")
    segmentation = compute_segmentation(binary, scale)
    if amax(segmentation) > args['maxlines']:
        print_error("%s: too many lines %g" % (imagepath, amax(segmentation)))
        return
    if not args['quiet']: print_info("number of lines %g" % amax(segmentation))

    # compute the reading order

    if not args['quiet']: print_info("finding reading order")
    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # renumber the labels so that they conform to the specs

    nlabels = amax(segmentation) + 1
    renumber = zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]

    # finally, output everything
    if not args['quiet']: print_info("writing lines")
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
    lines = [lines[i] for i in lsort]
    ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation)
    cleaned = ocrolib.remove_noise(binary, args['noise'])
    for i, l in enumerate(lines):
        binline = psegutils.extract_masked(1 - cleaned,
                                           l,
                                           pad=args['pad'],
                                           expand=args['expand'])
        ocrolib.write_image_binary(
            "%s/%s_01%04x.bin.png" % (outputdir, imagename_base, i + 1),
            binline)
    print_info("%6d  %s %4.1f %d" % (i, imagepath, scale, len(lines)))
    return outputdir
예제 #13
0
def process1(job):
    fname, i = job
    global base
    base, _ = ocrolib.allsplitext(fname)
    outputdir = base

    try:
        binary = ocrolib.read_image_binary(base + ".bin.png")
    except IOError:
        try:
            binary = ocrolib.read_image_binary(fname)
        except IOError:
            if ocrolib.trace:
                traceback.print_exc()
            print("cannot open either", base + ".bin.png", "or", fname)
            return

    checktype(binary, ABINARY2)

    if not args.nocheck:
        check = check_page(amax(binary) - binary)
        if check is not None:
            print(fname, "SKIPPED", check, "(use -n to disable this check)")
            return

    if args.gray:
        if os.path.exists(base + ".nrm.png"):
            gray = ocrolib.read_image_gray(base + ".nrm.png")
        checktype(gray, GRAYSCALE)

    binary = 1 - binary  # invert

    if args.scale == 0:
        scale = psegutils.estimate_scale(binary)
    else:
        scale = args.scale
    print("scale", scale)
    if isnan(scale) or scale > 1000.0:
        sys.stderr.write("%s: bad scale (%g); skipping\n" % (fname, scale))
        return
    if scale < args.minscale:
        sys.stderr.write("%s: scale (%g) less than --minscale; skipping\n" %
                         (fname, scale))
        return

    # find columns and text lines

    if not args.quiet:
        print("computing segmentation")
    segmentation = compute_segmentation(binary, scale)
    if amax(segmentation) > args.maxlines:
        print(fname, ": too many lines", amax(segmentation))
        return
    if not args.quiet:
        print("number of lines", amax(segmentation))

    # compute the reading order

    if not args.quiet:
        print("finding reading order")
    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # renumber the labels so that they conform to the specs

    nlabels = amax(segmentation) + 1
    renumber = zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]

    # finally, output everything

    if not args.quiet:
        print("writing lines")
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
    lines = [lines[i] for i in lsort]
    ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation)
    cleaned = ocrolib.remove_noise(binary, args.noise)
    for i, l in enumerate(lines):
        binline = psegutils.extract_masked(1 - cleaned,
                                           l,
                                           pad=args.pad,
                                           expand=args.expand)
        ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1),
                                   binline)
        if args.gray:
            grayline = psegutils.extract_masked(gray,
                                                l,
                                                pad=args.pad,
                                                expand=args.expand)
            ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i + 1),
                                     grayline)
    print("%6d" % i, fname, "%4.1f" % scale, len(lines))
예제 #14
0
# mendatory parameter check
if not args.mets or not args.Input or not args.Output or not args.work:
    parser.print_help()
    print("Example: python ocrd-anyBaseOCR-cropping.py -m (mets input file path) -I (input-file-grp name) -O (output-file-grp name) -w (Working directory)")
    sys.exit(0)

if args.work:
    if not os.path.exists(args.work):
        os.mkdir(args.work)

files = parseXML(args.mets)
fname=[]
for i, f in enumerate(files):
	print "Process file: ", str(f) , i+1
	base,_ = ocrolib.allsplitext(str(f))
	binImg = ocrolib.read_image_binary(str(f))

	lineDetectH=[]; lineDetectV=[]
	fpath = remove_rular(str(f), base)
	textarea, rgb, height, width = detect_textarea(fpath)
	args.colSeparator = int(width * args.colSeparator)

	if len(textarea)>1:
		textarea = crop_area(textarea, binImg, rgb, base)
		if len(textarea)==0:
			select_borderLine(fpath, base)
	elif len(textarea)==1 and (height*width*0.5 <  (abs(textarea[0][2]-textarea[0][0]) * abs(textarea[0][3]-textarea[0][1]))):
		x1,y1,x2,y2 = textarea[0]		
		x1 = x1-20 if x1>20 else 0
		x2 = x2+20 if x2<width-20 else width
		y1 = y1-40 if y1>40 else 0