Beispiel #1
0
def deskew(fpath, job):
    base,_ = ocrolib.allsplitext(fpath)
    basefile = ocrolib.allsplitext(os.path.basename(fpath))[0]

    if args.parallel<2: print_info("=== %s %-3d" % (fpath, job))
    raw = ocrolib.read_image_gray(fpath)

    flat = raw
    # estimate skew angle and rotate
    if args.maxskew>0:
        if args.parallel<2: print_info("estimating skew angle")
        d0,d1 = flat.shape
        o0,o1 = int(args.bignore*d0),int(args.bignore*d1)
        flat = amax(flat)-flat
        flat -= amin(flat)
        est = flat[o0:d0-o0,o1:d1-o1]
        ma = args.maxskew
        ms = int(2*args.maxskew*args.skewsteps)
        angle = estimate_skew_angle(est,linspace(-ma,ma,ms+1))
        flat = interpolation.rotate(flat,angle,mode='constant',reshape=0)
        flat = amax(flat)-flat
    else:
        angle = 0

    # estimate low and high thresholds
    if args.parallel<2: print_info("estimating thresholds")
    d0,d1 = flat.shape
    o0,o1 = int(args.bignore*d0),int(args.bignore*d1)
    est = flat[o0:d0-o0,o1:d1-o1]
    if args.escale>0:
        # by default, we use only regions that contain
        # significant variance; this makes the percentile
        # based low and high estimates more reliable
        e = args.escale
        v = est-filters.gaussian_filter(est,e*20.0)
        v = filters.gaussian_filter(v**2,e*20.0)**0.5
        v = (v>0.3*amax(v))
        v = morphology.binary_dilation(v,structure=ones((int(e*50),1)))
        v = morphology.binary_dilation(v,structure=ones((1,int(e*50))))
        if args.debug>0: imshow(v); ginput(1,args.debug)
        est = est[v]
    lo = stats.scoreatpercentile(est.ravel(),args.lo)
    hi = stats.scoreatpercentile(est.ravel(),args.hi)
    # rescale the image to get the gray scale image
    if args.parallel<2: print_info("rescaling")
    flat -= lo
    flat /= (hi-lo)
    flat = clip(flat,0,1)
    if args.debug>0: imshow(flat,vmin=0,vmax=1); ginput(1,args.debug)
    bin = 1*(flat>args.threshold)

    # output the normalized grayscale and the thresholded images
    print_info("%s lo-hi (%.2f %.2f) angle %4.1f" % (basefile, lo, hi, angle))
    if args.parallel<2: print_info("writing")
    ocrolib.write_image_binary(base+".ds.png",bin)
    return base+".ds.png"
Beispiel #2
0
def write_to_xml(fpath):
    xmldoc = minidom.parse(args.mets)
    subRoot = xmldoc.createElement('mets:fileGrp')
    subRoot.setAttribute('USE', args.Output)

    for f in fpath:
        #basefile = os.path.splitext(os.path.splitext(os.path.basename(f))[0])[0]
        basefile = ocrolib.allsplitext(os.path.basename(f))[0]
        child = xmldoc.createElement('mets:file')
        child.setAttribute('ID', 'BIN_' + basefile)
        child.setAttribute('GROUPID', 'P_' + basefile)
        child.setAttribute('MIMETYPE', "image/png")

        subChild = xmldoc.createElement('mets:FLocat')
        subChild.setAttribute('LOCTYPE', "URL")
        subChild.setAttribute('xlink:href', f)

        #xmldoc.getElementsByTagName('mets:file')[0].appendChild(subChild);
        subRoot.appendChild(child)
        child.appendChild(subChild)

    #subRoot.appendChild(child)
    xmldoc.getElementsByTagName('mets:fileSec')[0].appendChild(subRoot)

    if not args.OutputMets:
        metsFileSave = open(
            os.path.join(args.work, os.path.basename(args.mets)), "w")
    else:
        metsFileSave = open(
            os.path.join(
                args.work, args.OutputMets if args.OutputMets.endswith(".xml")
                else args.OutputMets + '.xml'), "w")
    metsFileSave.write(xmldoc.toxml())
Beispiel #3
0
def select_borderLine(arg, base):
	basefile = ocrolib.allsplitext(os.path.basename(arg))[0]    
	img, imgHeight, imgWidth, Hlines, Vlines = detect_lines(arg)
	
	# top side
	BorderLine(imgHeight*0.25, Hlines, index=1, flag="top")
	# left side
	BorderLine(imgWidth*0.4, Vlines, index=0, flag="left")
	# bottom side
	BorderLine(imgHeight*0.75, Hlines, index=1, flag="bottom")
	# right side
	BorderLine(imgWidth*0.6, Vlines, index=0, flag="right")

	intersectPoint=[]
	for l1 in lineDetectH:
		for l2 in lineDetectV:
			x ,y = get_intersect((l1[0],l1[1]), (l1[2],l1[3]), (l2[0],l2[1]), (l2[2],l2[3]))
			intersectPoint.append([x,y])
	Xstart = 0; Xend = imgWidth; Ystart = 0; Yend = imgHeight	
	for i in intersectPoint:
		Xs = int(i[0])+10 if i[0]<imgWidth*0.4 else 10
		if Xs>Xstart: Xstart = Xs
		Xe = int(i[0])-10 if i[0]>imgWidth*0.6 else int(imgWidth)-10
		if Xe<Xend: Xend = Xe
		Ys = int(i[1])+10 if i[1]<imgHeight*0.25 else 10
		#print("Ys,Ystart:",Ys,Ystart)
		if Ys>Ystart: Ystart = Ys
		Ye = int(i[1])-15 if i[1]>imgHeight*0.75 else int(imgHeight)-15
		if Ye<Yend: Yend = Ye

	if Xend<0: Xend = 10
	if Yend<0: Yend = 15
	save_pf(base, [Xstart,Ystart,Xend,Yend])

	return [Xstart,Ystart,Xend,Yend]
Beispiel #4
0
    def run(self, fname, i):
        fname = str(fname)
        print("Process file: ", fname, i + 1)
        base, _ = ocrolib.allsplitext(fname)
        binImg = ocrolib.read_image_binary(fname)

        lineDetectH = []
        lineDetectV = []
        fpath = self.remove_rular(fname, base)
        textarea, rgb, height, width = self.detect_textarea(fpath)
        self.param['colSeparator'] = int(width * self.param['colSeparator'])

        if len(textarea) > 1:
            textarea = self.crop_area(textarea, binImg, rgb, base)
            if len(textarea) == 0:
                self.select_borderLine(fpath, base, lineDetectH, lineDetectV)
        elif len(textarea) == 1 and (height * width * 0.5 <
                                     (abs(textarea[0][2] - textarea[0][0]) *
                                      abs(textarea[0][3] - textarea[0][1]))):
            x1, y1, x2, y2 = textarea[0]
            x1 = x1 - 20 if x1 > 20 else 0
            x2 = x2 + 20 if x2 < width - 20 else width
            y1 = y1 - 40 if y1 > 40 else 0
            y2 = y2 + 40 if y2 < height - 40 else height

            self.save_pf(base, [x1, y1, x2, y2])
        else:
            self.select_borderLine(fpath, base, lineDetectH, lineDetectV)

        return '%s.pf.png' % base
def process1(arg):
	(trial, fname) = arg
	base, _ = ocrolib.allsplitext(fname)
	line = ocrolib.read_image_gray(fname)
	raw_line = line.copy()
	if prod(line.shape) == 0: return None
	if amax(line) == amin(line): return None

	if not args.nocheck:
		check = check_line(amax(line) - line)
		if check is not None:
			print_error(fname + " SKIPPED " + check + " (use -n to disable this check)")
			return (0, [], 0, trial, fname)

	if not args.nolineest:
		assert "dew.png" not in fname, "don't dewarp dewarped images"
		temp = amax(line) - line
		temp = temp * 1.0 / amax(temp)
		lnorm.measure(temp)
		line = lnorm.normalize(line, cval=amax(line))
	else:
		assert "dew.png" in fname, "only apply to dewarped images"

	line = lstm.prepare_line(line, args.pad)
	try:
		pred = network.predictString(line)
	except RecognitionError, err:
		# TODO: Handle this in the extraction processor
		print_info(fname + " Failed to predict line. Skipping.")
		return (0, [], 0, trial, fname)
    def textimageseg(self, imf):
        # I: binarized-input-image; imftext: output-text-portion.png; imfimage: output-image-portion.png
        I = ocrolib.read_image_binary(imf)
        I = 1 - I / I.max()
        rows, cols = I.shape

        # Generate Mask and Seed Images
        Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I)

        # Iseedfill: Union of Mask and Seed Images
        Iseedfill = self.pixSeedfillBinary(Imask, Iseed)

        # Dilation of Iseedfill
        mask = ones((3, 3))
        Iseedfill = ndimage.binary_dilation(Iseedfill, mask)

        # Expansion of Iseedfill to become equal in size of I
        Iseedfill = self.expansion(Iseedfill, (rows, cols))

        # Write  Text and Non-Text images
        image_part = array((1 - I * Iseedfill), dtype=int)
        image_part[0, 0] = 0  # only for visualisation purpose
        text_part = array((1 - I * (1 - Iseedfill)), dtype=int)
        text_part[0, 0] = 0  # only for visualisation purpose

        base, _ = ocrolib.allsplitext(imf)
        ocrolib.write_image_binary(base + ".ts.png", text_part)

        #imf_image = imf[0:-3] + "nts.png"
        ocrolib.write_image_binary(base + ".nts.png", image_part)
        return [base + ".ts.png", base + ".nts.png"]
Beispiel #7
0
    def process(self):
        print(Path(self.parameter['pix2pixHD']).absolute())
        if not torch.cuda.is_available():
            print("Your system has no CUDA installed. No GPU detected.")
            sys.exit(1)

        path = Path(self.parameter['pix2pixHD']).absolute()

        if not Path(path).is_dir():
            print("""\
                NVIDIA's pix2pixHD was not found at '%s'. Make sure the `pix2pixHD` parameter
                points to the local path to the cloned pix2pixHD repository.

                pix2pixHD can be downloaded from https://github.com/NVIDIA/pix2pixHD
                """ % path)
            sys.exit(1)

        for (_, input_file) in enumerate(self.input_files):
            local_input_file = self.workspace.download_file(input_file)
            pcgts = parse(local_input_file.url, silence=True)
            image_coords = pcgts.get_Page().get_Border().get_Coords(
            ).points.split()
            fname = pcgts.get_Page().imageFilename

            # Get page Co-ordinates
            min_x, min_y = image_coords[0].split(",")
            max_x, max_y = image_coords[2].split(",")
            img_tmp_dir = "OCR-D-IMG/test_A"
            img_dir = os.path.dirname(str(fname))
            # Path of pix2pixHD
            Path(img_tmp_dir).mkdir(parents=True, exist_ok=True)

            crop_region = int(min_x), int(min_y), int(max_x), int(max_y)
            cropped_img = self.crop_image(fname, crop_region)

            base, _ = ocrolib.allsplitext(fname)
            filename = base.split("/")[-1] + ".png"
            cropped_img.save(img_tmp_dir + "/" + filename)
            #os.system("cp %s %s" % (str(fname), os.path.join(img_tmp_dir, os.path.basename(str(fname)))))
            #os.system("mkdir -p %s" % img_tmp_dir)
            #os.system("cp %s %s" % (str(fname), os.path.join(img_tmp_dir, os.path.basename(str(fname)))))
            os.system(
                "python " + str(path) +
                "/test.py --dataroot %s --checkpoints_dir ./ --name models --results_dir %s --label_nc 0 --no_instance --no_flip --resize_or_crop none --n_blocks_global 10 --n_local_enhancers 2 --gpu_ids %s --loadSize %d --fineSize %d --resize_or_crop %s"
                % (os.path.dirname(img_tmp_dir), img_dir,
                   self.parameter['gpu_id'], self.parameter['resizeHeight'],
                   self.parameter['resizeWidth'], self.parameter['imgresize']))
            synthesized_image = filename.split(
                ".")[0] + "_synthesized_image.jpg"
            pix2pix_img_dir = img_dir + "/models/test_latest/images/"
            dewarped_image = Path(pix2pix_img_dir + synthesized_image)
            if (dewarped_image.is_file()):
                shutil.copy(dewarped_image,
                            img_dir + "/" + filename.split(".")[0] + ".dw.jpg")

            if (Path(img_tmp_dir).is_dir()):
                shutil.rmtree(img_tmp_dir)
            if (Path(img_dir + "/models").is_dir()):
                shutil.rmtree(img_dir + "/models")
Beispiel #8
0
def process(arg):
    output_list = []
    (trial, fname) = arg
    base, _ = ocrolib.allsplitext(fname)
    line = ocrolib.read_image_gray(fname)
    raw_line = line.copy()
    if prod(line.shape) == 0: return None
    if amax(line) == amin(line): return None

    if not args['nocheck']:
        check = check_line(amax(line) - line)
        if check is not None:
            print_error("%s SKIPPED %s (use -n to disable this check)" %
                        (fname, check))
            return (0, [], 0, trial, fname)

    temp = amax(line) - line
    temp = temp * 1.0 / amax(temp)
    lnorm.measure(temp)
    line = lnorm.normalize(line, cval=amax(line))

    line = lstm.prepare_line(line, args['pad'])
    pred = network.predictString(line)

    if args['llocs']:
        # output recognized LSTM locations of characters
        result = lstm.translate_back(network.outputs, pos=1)
        scale = len(
            raw_line.T) * 1.0 / (len(network.outputs) - 2 * args['pad'])
        output_llocs = base + ".llocs"
        with codecs.open(output_llocs, "w", "utf-8") as locs:
            for r, c in result:
                c = network.l2s([c])
                r = (r - args['pad']) * scale
                locs.write("%s\t%.1f\n" % (c, r))
            output_list.append(output_llocs)
            #plot([r,r],[0,20],'r' if c==" " else 'b')
        #ginput(1,1000)

    if args['probabilities']:
        # output character probabilities
        result = lstm.translate_back(network.outputs, pos=2)
        output_prob = base + ".prob"
        with codecs.open(output_prob, "w", "utf-8") as file:
            for c, p in result:
                c = network.l2s([c])
                file.write("%s\t%s\n" % (c, p))
            output_list.append(output_prob)

    if not args['nonormalize']:
        pred = ocrolib.normalize_text(pred)

    if not args['quiet']:
        print_info(fname + ":" + pred)
    output_text = base + ".txt"
    ocrolib.write_text(output_text, pred)
    output_list.append(output_text)

    return output_list
Beispiel #9
0
def remove_rular(arg, base):
	basefile = ocrolib.allsplitext(os.path.basename(arg))[0]    
	img = cv2.imread(arg)
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	_, contours, hierarchy = cv2.findContours(gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

	height, width = gray.shape
	imgArea = height*width

	## Get bounding box x,y,w,h of each contours
	rects = [cv2.boundingRect(cnt) for cnt in contours]
	rects = sorted(rects,key=lambda  x:(x[2]*x[3]),reverse=True)
	rects = [r for r in rects if (imgArea*args.maxRularArea)>(r[2]*r[3])>(imgArea*args.minRularArea)]	## consider those rectangle whose area>10000 and less than one-fourth of images

	## detect child rectangles. Usually those are not rular. Rular position are basically any one side.
	removeRect=[]
	for i, rect1 in enumerate(rects):
		(x1,y1,w1,h1)=rect1
		for rect2 in rects[i+1:len(rects)]:
			(x2,y2,w2,h2)=rect2
			if (x1<x2) and (y1<y2) and (x1+w1>x2+w2) and (y1+h1>y2+h2):
				removeRect.append(rect2)

	## removed child rectangles.
	rects = [x for x in rects if x not in removeRect]

	predictRular=[]
	for rect in rects:
		(x,y,w,h)=rect		
		if (w<width*args.rularWidth) and ((y>height*args.positionBelow) or ((x+w)<width*args.positionLeft) or (x>width*args.positionRight)):
			if (args.rularRatioMin<round(float(w)/float(h),2)<args.rularRatioMax) or (args.rularRatioMin<round(float(h)/float(w),2)<args.rularRatioMax):
				blackPixel = np.count_nonzero(img[y:y+h,x:x+w]==0)
				predictRular.append((x,y,w,h,blackPixel))

	## Finally check number of black pixel to avoid false rular
	if predictRular:
		predictRular = sorted(predictRular,key=lambda  x:(x[4]),reverse=True)
		x,y,w,h,t = predictRular[0]
		cv2.rectangle(img, (x-15,y-15), (x+w+20,y+h+20), (255, 255, 255), cv2.FILLED)
	save_file_path = base + '.pf.png'
	cv2.imwrite(save_file_path, img)	
	return save_file_path
    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            fname = pcgts.get_Page().imageFilename
            img = self.workspace.resolve_image_as_pil(fname)
            #fname = str(fname)
            print("Process file: ", fname)
            base, _ = ocrolib.allsplitext(fname)

            img_array = ocrolib.pil2array(img)
            img_array_bin = np.array(img_array > ocrolib.midrange(img_array),
                                     'i')

            lineDetectH = []
            lineDetectV = []
            img_array_rr = self.remove_rular(img_array)

            textarea, img_array_rr_ta, height, width = self.detect_textarea(
                img_array_rr)
            self.parameter['colSeparator'] = int(
                width * self.parameter['colSeparator'])

            if len(textarea) > 1:
                textarea = self.crop_area(textarea, img_array_bin,
                                          img_array_rr_ta)

                if len(textarea) == 0:
                    min_x, min_y, max_x, max_y = self.select_borderLine(
                        img_array_rr, lineDetectH, lineDetectV)
                else:
                    min_x, min_y, max_x, max_y = textarea[0]
            elif len(textarea) == 1 and (
                    height * width * 0.5 <
                (abs(textarea[0][2] - textarea[0][0]) *
                 abs(textarea[0][3] - textarea[0][1]))):
                x1, y1, x2, y2 = textarea[0]
                x1 = x1 - 20 if x1 > 20 else 0
                x2 = x2 + 20 if x2 < width - 20 else width
                y1 = y1 - 40 if y1 > 40 else 0
                y2 = y2 + 40 if y2 < height - 40 else height

                #self.save_pf(base, [x1, y1, x2, y2])
                min_x, min_y, max_x, max_y = textarea[0]
            else:
                min_x, min_y, max_x, max_y = self.select_borderLine(
                    img_array_rr, lineDetectH, lineDetectV)

            brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                               (min_x, min_y, max_x, min_y,
                                                max_x, max_y, min_x, max_y)))
            pcgts.get_Page().set_Border(brd)

            # Use input_file's basename for the new file -
            # this way the files retain the same basenames:
            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.output_file_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=file_id,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename=os.path.join(
                                        self.output_file_grp,
                                        file_id + '.xml'),
                                    content=to_xml(pcgts).encode('utf-8'))
Beispiel #11
0
def process(job):
    fname, i = job
    print_info("# %s" % (fname))
    if args['parallel'] < 2: print_info("=== %s %-3d" % (fname, i))
    raw = ocrolib.read_image_gray(fname)

    # perform image normalization
    image = raw - amin(raw)
    if amax(image) == amin(image):
        print_info("# image is empty: %s" % (fname))
        return
    image /= amax(image)

    if not args['nocheck']:
        check = check_page(amax(image) - image)
        if check is not None:
            print_error(fname + "SKIPPED" + check +
                        "(use -n to disable this check)")
            return

    # flatten the image by estimating the local whitelevel
    comment = ""
    # if not, we need to flatten it by estimating the local whitelevel
    if args['parallel'] < 2: print_info("flattening")
    m = interpolation.zoom(image, args['zoom'])
    m = filters.percentile_filter(m, args['perc'], size=(args['range'], 2))
    m = filters.percentile_filter(m, args['perc'], size=(2, args['range']))
    m = interpolation.zoom(m, 1.0 / args['zoom'])
    w, h = minimum(array(image.shape), array(m.shape))
    flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)

    # estimate skew angle and rotate
    if args['maxskew'] > 0:
        if args['parallel'] < 2: print_info("estimating skew angle")
        d0, d1 = flat.shape
        o0, o1 = int(args['bignore'] * d0), int(args['bignore'] * d1)
        flat = amax(flat) - flat
        flat -= amin(flat)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        ma = args['maxskew']
        ms = int(2 * args['maxskew'] * args['skewsteps'])
        angle = estimate_skew_angle(est, linspace(-ma, ma, ms + 1))
        flat = interpolation.rotate(flat, angle, mode='constant', reshape=0)
        flat = amax(flat) - flat
    else:
        angle = 0

    # estimate low and high thresholds
    if args['parallel'] < 2: print_info("estimating thresholds")
    d0, d1 = flat.shape
    o0, o1 = int(args['bignore'] * d0), int(args['bignore'] * d1)
    est = flat[o0:d0 - o0, o1:d1 - o1]
    if args['escale'] > 0:
        # by default, we use only regions that contain
        # significant variance; this makes the percentile
        # based low and high estimates more reliable
        e = args['escale']
        v = est - filters.gaussian_filter(est, e * 20.0)
        v = filters.gaussian_filter(v**2, e * 20.0)**0.5
        v = (v > 0.3 * amax(v))
        v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
        v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
        est = est[v]
    lo = stats.scoreatpercentile(est.ravel(), args['lo'])
    hi = stats.scoreatpercentile(est.ravel(), args['hi'])
    # rescale the image to get the gray scale image
    if args['parallel'] < 2: print_info("rescaling")
    flat -= lo
    flat /= (hi - lo)
    flat = clip(flat, 0, 1)
    bin = 1 * (flat > args['threshold'])

    # output the normalized grayscale and the thresholded images
    print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" %
               (fname, lo, hi, angle, comment))
    if args['parallel'] < 2: print_info("writing")
    base, _ = ocrolib.allsplitext(fname)
    outputfile_bin = base + ".bin.png"
    #outputfile_nrm = base+".nrm.png"
    #output_files = [outputfile_bin, outputfile_nrm]
    ocrolib.write_image_binary(outputfile_bin, bin)
    #ocrolib.write_image_gray(outputfile_nrm, flat)
    #return output_files
    return outputfile_bin
Beispiel #12
0
def processPngFile(outRoot, origFile, fileNum):
    baseName = os.path.basename(origFile)
    baseBase, _ = os.path.splitext(baseName)
    outDir = os.path.join(outRoot, "%s.%03d" % (baseBase, fileNum))
    inFile = os.path.join(outDir, baseName)

    os.makedirs(outDir, exist_ok=True)
    shutil.copy(origFile, inFile)

    inBase, _ = ocrolib.allsplitext(inFile)
    print("**  inBase=%s" % inBase)
    # print("** binBase=%s" % binBase)

    fname = inFile
    outputdir = inBase
    binFile = inBase + ".bin.png"
    outFile = inBase + ".out.png"
    outRoot2, outDir2 = os.path.split(outRoot)
    outFile2 = os.path.join(outRoot2, "%s.out" % outDir2, baseName)
    print("outFile2=%s" % outFile2)
    # assert False
    grayFile = inBase + ".nrm.png"
    psegFile = inBase + ".pseg.png"
    print("  inFile=%s" % inFile)
    print(" binFile=%s" % binFile)
    print("grayFile=%s" % grayFile)
    print(" outFile=%s" % outFile)
    assert inFile and binFile
    assert outFile != inFile
    assert outFile != binFile

    if not binarize(inFile, binFile, grayFile):
        binExists = os.path.exists(binFile)
        print("Couldn't binarize inFile=%s binFile=%s exists=%s" %
              (inFile, binFile, binExists))
        return False

    binary = ocrolib.read_image_binary(binFile)
    print("$$ %s=%s" % (binFile, desc(binary)))
    height, width = binary.shape
    checktype(binary, ABINARY2)
    check = check_page(np.amax(binary) - binary)
    if check is not None:
        print("%s SKIPPED %s (use -n to disable this check)" % (inFile, check))
        return False

    # if args.gray:
    #     if os.path.exists(base+".nrm.png"):
    #         gray = ocrolib.read_image_gray(base+".nrm.png")
    #         checktype(gray, GRAYSCALE)
    #     else:
    #         print_error("Grayscale version %s.nrm.png not found. Use ocropus-nlbin for creating " +
    #                     "normalized grayscale version of the pages as well." % base)
    #         return

    binary = 1 - binary  # invert

    scale = psegutils.estimate_scale(binary)
    print("scale %f" % scale)
    if np.isnan(scale) or scale > 1000.0:
        print("%s: bad scale (%g); skipping\n" % (fname, scale))
        return False

    # find columns and text lines
    print("computing segmentation")
    segmentation = compute_segmentation(binary, scale)
    if np.amax(segmentation) > maxlines:
        print("%s: too many lines %g" % (fname, np.amax(segmentation)))
        return False

    print("segmentation=%s" % desc(segmentation))
    print("number of lines %g" % np.amax(segmentation))

    # compute the reading order
    print("finding reading order")
    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)
    print("$$ lsort = %d = %s...%s" % (len(lsort), lsort[:10], lsort[-10:]))

    # renumber the labels so that they conform to the specs
    nlabels = np.amax(segmentation) + 1
    renumber = np.zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]

    # finally, output everything
    print("writing lines")
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
    lines = [lines[i] for i in lsort]
    ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation)
    cleaned = ocrolib.remove_noise(binary, noise)
    for i, l in enumerate(lines):
        binline = psegutils.extract_masked(1 - cleaned,
                                           l,
                                           pad=pad,
                                           expand=expand)
        ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1),
                                   binline)
        # if args.gray:
        #     grayline = psegutils.extract_masked(
        #         gray, l, pad=args.pad, expand=args.expand)
        #     ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i+1), grayline)
    print("%6d  %s %4.1f %d" % (i, fname, scale, len(lines)))

    # to proceed, we need a pseg file and a subdirectory containing text lines
    assert os.path.exists(psegFile), "%s: no such file" % psegFile
    assert os.path.isdir(inBase), "%s: no such directory" % inBase

    # iterate through the text lines in reading order, based on the page segmentation file
    pseg = ocrolib.read_page_segmentation(psegFile)
    print("$$ %s=%s" % (psegFile, desc(pseg)))

    regions = ocrolib.RegionExtractor()
    print("$$ regions=%s" % regions)
    regions.setPageLines(pseg)

    im = Image.open(inFile)
    print("~~%s %s" % (inFile, im.size))
    print("$$ regions=%s=%s" % (regions, sorted(regions.__dict__)))
    print("$$ regions.length=%s" % regions.length())

    n = regions.length()
    for i in range(1, n):

        id = regions.id(i)
        y0, x0, y1, x1 = regions.bbox(i)
        # print("%5d: 0x%05X %s %d x %d" %
        #       (i, id, [y0, x0, y1, x1], y1 - y0, x1 - x0))

        draw = ImageDraw.Draw(im)
        draw.rectangle((x0, y0, x1, y1), outline=(255, 0, 0), width=3)
        draw.rectangle((x0, y0, x1, y1), outline=(0, 0, 255), width=0)
        # draw.rectangle((x0, y0, x1, y1), outline=255, width=5)
        # draw.rectangle((x0, y0, x1, y1), outline=10,  width=1)
        del draw

    # write output files
    print("outFile=%s" % outFile)
    im.save(outFile, "PNG")
    print("outFile2=%s" % outFile2)
    outDir2 = os.path.dirname(outFile2)
    os.makedirs(outDir2, exist_ok=True)
    im.save(outFile2, "PNG")
    assert os.path.exists(outFile2)
    # outFile3, _ = os.path.splitext(outFile)
    # outFile3 = "%s.jpg" % outFile3
    # print("outFile3=%s" % outFile3)
    # im.save(outFile3, "JPEG")
    # assert os.path.exists(outFile3)
    return True
    def textline(self, arg):
        image = ocrolib.read_image_binary(arg)
        height, width = image.shape
        H = height
        W = width
        base, _ = ocrolib.allsplitext(arg)
        base2 = os.path.splitext(arg)[0]

        if not os.path.exists("%s/lines" % base):
            os.system("mkdir -p %s/lines" % base)
            #if os.path.exists(base2 + ".ts.png") :
            #    f = ocrolib.read_image_binary(base2 + ".ts.png")
            #    height, width = f.shape
            #    os.system("python "+args.libpath+"/anyBaseOCR-nlbin.py %s.pf.bin.png" % base2)
            #else:
            #    os.system("python "+args.libpath+"/anyBaseOCR-nlbin.py %s" % arg)
            #print("convert %s.ts.png %s/block-000.bin.png" % (base,base))
            #os.system("convert %s.ts.png %s/block-000.bin.png" % (base,base))
            #os.system("rm %s.bin.png %s.nrm.png" % (base, base))
            file = open('%s/sorted_cuts.dat' % base, 'w')
            l = "0 0 " + str(int(width)) + " " + str(
                int(height)) + " 0 0 0 0\n"
            file.write(l)
            file.close()

        #if not os.path.exists("%s/lines" % base) :
        #    os.system("mkdir %s/lines" % base)

        blockarray = []
        if os.path.exists(base + "/sorted_cuts.dat"):
            blocks = open(base + "/sorted_cuts.dat", "r")
            i = 0
            for block in blocks:
                words = block.split()
                blockarray.append((int(words[0]), -int(words[1]),
                                   int(words[2]), int(words[3]), i))
                i += 1
        else:
            blockarray.append((0, 0, width, height, 0))

        i = 0
        j = 0
        lines = []
        for block in blockarray:
            (x0, y0, x1, y1, i) = block
            y0 = -y0
            #blockImage = "%s/block-%03d" % (base, i)
            os.system("convert %s.ts.png %s/temp.png" % (base, base))
            img = Image.open("%s.ts.png" % base, 'r')
            img_w, img_h = img.size
            background = Image.new('RGBA', (W, H), (255, 255, 255, 255))
            bg_w, bg_h = background.size
            offX = (bg_w - img_w) // 2
            offY = (bg_h - img_h) // 2
            offset = (offX, offY)
            background.paste(img, offset)
            background.save("%s/temp.png" % base)
            command = "python " + self.param[
                'libpath'] + "/cli/anyBaseOCR-gpageseg.py %s/temp.png -n --minscale %f --maxlines %f --scale %f --hscale %f --vscale %f --threshold %f --noise %d --maxseps %d --sepwiden %d --maxcolseps %d --csminaspect %f --csminheight %f -p %d -e %d -Q %d" % (
                    base, self.param['minscale'], self.param['maxlines'], self.
                    param['scale'], self.param['hscale'], self.param['vscale'],
                    self.param['threshold'], self.param['noise'],
                    self.param['maxseps'], self.param['sepwiden'],
                    self.param['maxcolseps'], self.param['csminaspect'],
                    self.param['csminheight'], self.param['pad'],
                    self.param['expand'], self.param['parallel'])
            if (self.param['blackseps']):
                command = command + " -b"
            if (self.param['usegauss']):
                command = command + " --usegauss"
            os.system(command)
            pseg = ocrolib.read_page_segmentation("%s/temp.pseg.png" % base)
            regions = ocrolib.RegionExtractor()
            regions.setPageLines(pseg)
            file = open('%s/sorted_lines.dat' % base, 'w')
            for h in range(1, regions.length()):
                id = regions.id(h)
                y0, x0, y1, x1 = regions.bbox(h)
                l = str(int(x0 - offX)) + " " + str(
                    int(img_h -
                        (y1 - offY))) + " " + str(int(x1 - offX)) + " " + str(
                            int(img_h - (y0 - offY))) + " 0 0 0 0\n"
                file.write(l)
            filelist = glob.glob("%s/temp/*" % base)
            for infile in sorted(filelist):
                os.system("convert %s %s/lines/01%02x%02x.bin.png" %
                          (infile, base, i + 1, j + 1))
                lines.append("%s/lines/01%02x%02x.bin.png" %
                             (base, i + 1, j + 1))
                j += 1
            os.system("rm -r %s/temp/" % base)
            os.system("rm %s/temp.png %s/temp.pseg.png" % (base, base))
            i += 1
        return lines
Beispiel #14
0
# mendatory parameter check
if not args.mets or not args.Input or not args.Output or not args.work:
    parser.print_help()
    print("Example: python ocrd-anyBaseOCR-cropping.py -m (mets input file path) -I (input-file-grp name) -O (output-file-grp name) -w (Working directory)")
    sys.exit(0)

if args.work:
    if not os.path.exists(args.work):
        os.mkdir(args.work)

files = parseXML(args.mets)
fname=[]
for i, f in enumerate(files):
	print "Process file: ", str(f) , i+1
	base,_ = ocrolib.allsplitext(str(f))
	binImg = ocrolib.read_image_binary(str(f))

	lineDetectH=[]; lineDetectV=[]
	fpath = remove_rular(str(f), base)
	textarea, rgb, height, width = detect_textarea(fpath)
	args.colSeparator = int(width * args.colSeparator)

	if len(textarea)>1:
		textarea = crop_area(textarea, binImg, rgb, base)
		if len(textarea)==0:
			select_borderLine(fpath, base)
	elif len(textarea)==1 and (height*width*0.5 <  (abs(textarea[0][2]-textarea[0][0]) * abs(textarea[0][3]-textarea[0][1]))):
		x1,y1,x2,y2 = textarea[0]		
		x1 = x1-20 if x1>20 else 0
		x2 = x2+20 if x2<width-20 else width
    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID
            page = pcgts.get_Page()
            LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID)
            page_image, page_xywh, _ = self.workspace.image_from_page(
                page, page_id)
            print("----------", type(page_image))

            raw = ocrolib.read_image_gray(page_image.filename)
            self.dshow(raw, "input")

            # perform image normalization
            image = raw - amin(raw)
            if amax(image) == amin(image):
                LOG.info("# image is empty: %s" %
                         (input_file.pageId or input_file.ID))
                return
            image /= amax(image)

            if not self.parameter['nocheck']:
                check = self.check_page(amax(image) - image)
                if check is not None:
                    LOG.error(input_file.pageId
                              or input_file.ID + " SKIPPED. " + check +
                              " (use -n to disable this check)")
                    return

            # check whether the image is already effectively binarized
            if self.parameter['gray']:
                extreme = 0
            else:
                extreme = (np.sum(image < 0.05) +
                           np.sum(image > 0.95)) * 1.0 / np.prod(image.shape)
            if extreme > 0.95:
                comment = "no-normalization"
                flat = image
            else:
                comment = ""
                # if not, we need to flatten it by estimating the local whitelevel
                LOG.info("Flattening")
                m = interpolation.zoom(image, self.parameter['zoom'])
                m = filters.percentile_filter(m,
                                              self.parameter['perc'],
                                              size=(self.parameter['range'],
                                                    2))
                m = filters.percentile_filter(m,
                                              self.parameter['perc'],
                                              size=(2,
                                                    self.parameter['range']))
                m = interpolation.zoom(m, 1.0 / self.parameter['zoom'])
                if self.parameter['debug'] > 0:
                    clf()
                    imshow(m, vmin=0, vmax=1)
                    ginput(1, self.parameter['debug'])
                w, h = minimum(array(image.shape), array(m.shape))
                flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)
                if self.parameter['debug'] > 0:
                    clf()
                    imshow(flat, vmin=0, vmax=1)
                    ginput(1, self.parameter['debug'])

            # estimate low and high thresholds
            LOG.info("Estimating Thresholds")
            d0, d1 = flat.shape
            o0, o1 = int(self.parameter['bignore'] * d0), int(
                self.parameter['bignore'] * d1)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            if self.parameter['escale'] > 0:
                # by default, we use only regions that contain
                # significant variance; this makes the percentile
                # based low and high estimates more reliable
                e = self.parameter['escale']
                v = est - filters.gaussian_filter(est, e * 20.0)
                v = filters.gaussian_filter(v**2, e * 20.0)**0.5
                v = (v > 0.3 * amax(v))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (int(e * 50), 1)))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (1, int(e * 50))))
                if self.parameter['debug'] > 0:
                    imshow(v)
                    ginput(1, self.parameter['debug'])
                est = est[v]
            lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
            hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
            # rescale the image to get the gray scale image
            LOG.info("Rescaling")
            flat -= lo
            flat /= (hi - lo)
            flat = clip(flat, 0, 1)
            if self.parameter['debug'] > 0:
                imshow(flat, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])
            binarized = 1 * (flat > self.parameter['threshold'])

            # output the normalized grayscale and the thresholded images
            # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment))
            LOG.info("%s lo-hi (%.2f %.2f) %s" %
                     (input_file.pageId or input_file.ID, lo, hi, comment))
            LOG.info("writing")
            if self.parameter['debug'] > 0 or self.parameter['show']:
                clf()
                gray()
                imshow(binarized)
                ginput(1, max(0.1, self.parameter['debug']))
            base, _ = ocrolib.allsplitext(page_image.filename)
            ocrolib.write_image_binary(base + ".bin.png", binarized)
            # ocrolib.write_image_gray(base +".nrm.png", flat)
            # print("########### File path : ", base+".nrm.png")
            # write_to_xml(base+".bin.png")
            # return base+".bin.png"

            # bin_array = array(255*(binarized>ocrolib.midrange(binarized)),'B')
            # bin_image = ocrolib.array2pil(bin_array)
            '''
            file_path = self.workspace.save_image_file(bin_image,
                                       file_id + ".bin",
                                       page_id=page_id,
                                       file_grp=self.output_file_grp
                )            
            '''

            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.output_file_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.output_file_grp, n)

            page.add_AlternativeImage(
                AlternativeImageType(filename=base + ".bin.png",
                                     comment="binarized"))

            self.workspace.add_file(ID=file_id,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype="image/png",
                                    url=base + ".bin.png",
                                    local_filename=os.path.join(
                                        self.output_file_grp,
                                        file_id + '.xml'),
                                    content=to_xml(pcgts).encode('utf-8'))
Beispiel #16
0
def process1(job):
    fname, i = job
    global base
    base, _ = ocrolib.allsplitext(fname)
    outputdir = base

    try:
        binary = ocrolib.read_image_binary(base + ".bin.png")
    except IOError:
        try:
            binary = ocrolib.read_image_binary(fname)
        except IOError:
            if ocrolib.trace:
                traceback.print_exc()
            print("cannot open either", base + ".bin.png", "or", fname)
            return

    checktype(binary, ABINARY2)

    if not args.nocheck:
        check = check_page(amax(binary) - binary)
        if check is not None:
            print(fname, "SKIPPED", check, "(use -n to disable this check)")
            return

    if args.gray:
        if os.path.exists(base + ".nrm.png"):
            gray = ocrolib.read_image_gray(base + ".nrm.png")
        checktype(gray, GRAYSCALE)

    binary = 1 - binary  # invert

    if args.scale == 0:
        scale = psegutils.estimate_scale(binary)
    else:
        scale = args.scale
    print("scale", scale)
    if isnan(scale) or scale > 1000.0:
        sys.stderr.write("%s: bad scale (%g); skipping\n" % (fname, scale))
        return
    if scale < args.minscale:
        sys.stderr.write("%s: scale (%g) less than --minscale; skipping\n" %
                         (fname, scale))
        return

    # find columns and text lines

    if not args.quiet:
        print("computing segmentation")
    segmentation = compute_segmentation(binary, scale)
    if amax(segmentation) > args.maxlines:
        print(fname, ": too many lines", amax(segmentation))
        return
    if not args.quiet:
        print("number of lines", amax(segmentation))

    # compute the reading order

    if not args.quiet:
        print("finding reading order")
    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # renumber the labels so that they conform to the specs

    nlabels = amax(segmentation) + 1
    renumber = zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]

    # finally, output everything

    if not args.quiet:
        print("writing lines")
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
    lines = [lines[i] for i in lsort]
    ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation)
    cleaned = ocrolib.remove_noise(binary, args.noise)
    for i, l in enumerate(lines):
        binline = psegutils.extract_masked(1 - cleaned,
                                           l,
                                           pad=args.pad,
                                           expand=args.expand)
        ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1),
                                   binline)
        if args.gray:
            grayline = psegutils.extract_masked(gray,
                                                l,
                                                pad=args.pad,
                                                expand=args.expand)
            ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i + 1),
                                     grayline)
    print("%6d" % i, fname, "%4.1f" % scale, len(lines))
Beispiel #17
0
def process(job):
    imagepath, i = job
    global base
    base, _ = ocrolib.allsplitext(imagepath)
    outputdir = base
    imagename_base = os.path.basename(os.path.normpath(base))

    try:
        binary = ocrolib.read_image_binary(imagepath)
    except IOError:
        if ocrolib.trace: traceback.print_exc()
        print_error("cannot open either %s.bin.png or %s" % (base, imagepath))
        return

    checktype(binary, ABINARY2)

    if not args['nocheck']:
        check = check_page(amax(binary) - binary)
        if check is not None:
            print_error("%s SKIPPED %s (use -n to disable this check)" %
                        (imagepath, check))
            return

    binary = 1 - binary  # invert

    if args['scale'] == 0:
        scale = psegutils.estimate_scale(binary)
    else:
        scale = args['scale']
    print_info("scale %f" % (scale))
    if isnan(scale) or scale > 1000.0:
        print_error("%s: bad scale (%g); skipping\n" % (imagepath, scale))
        return
    if scale < args['minscale']:
        print_error("%s: scale (%g) less than --minscale; skipping\n" %
                    (imagepath, scale))
        return

    # find columns and text lines

    if not args['quiet']: print_info("computing segmentation")
    segmentation = compute_segmentation(binary, scale)
    if amax(segmentation) > args['maxlines']:
        print_error("%s: too many lines %g" % (imagepath, amax(segmentation)))
        return
    if not args['quiet']: print_info("number of lines %g" % amax(segmentation))

    # compute the reading order

    if not args['quiet']: print_info("finding reading order")
    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # renumber the labels so that they conform to the specs

    nlabels = amax(segmentation) + 1
    renumber = zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]

    # finally, output everything
    if not args['quiet']: print_info("writing lines")
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
    lines = [lines[i] for i in lsort]
    ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation)
    cleaned = ocrolib.remove_noise(binary, args['noise'])
    for i, l in enumerate(lines):
        binline = psegutils.extract_masked(1 - cleaned,
                                           l,
                                           pad=args['pad'],
                                           expand=args['expand'])
        ocrolib.write_image_binary(
            "%s/%s_01%04x.bin.png" % (outputdir, imagename_base, i + 1),
            binline)
    print_info("%6d  %s %4.1f %d" % (i, imagepath, scale, len(lines)))
    return outputdir
Beispiel #18
0
start = args.start if args.start>=0 else network.last_trial

for trial in range(start,args.ntrain):
    network.last_trial = trial+1

    do_display = (args.display>0 and trial%args.display==0)
    do_update = 1

    if args.movie and do_display:
        fname = args.moviesample
        do_update = 0
    else:
        fname = pyrandom.sample(inputs,1)[0]

    base,_ = ocrolib.allsplitext(fname)
    try:
        line = ocrolib.read_image_gray(fname)
        transcript = ocrolib.read_text(base+".gt.txt")
    except IOError as e:
        print("ERROR", e)
        continue

    if not args.nolineest:
        assert "dew.png" not in fname,"don't dewarp already dewarped lines"
        network.lnorm.measure(np.amax(line)-line)
        line = network.lnorm.normalize(line,cval=np.amax(line))
    else:
        assert "dew.png" in fname,"input must already be dewarped"

    if line.size<10 or np.amax(line)==np.amin(line):
Beispiel #19
0
    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            fname = pcgts.get_Page().imageFilename
            img = self.workspace.resolve_image_as_pil(fname)
            param = self.parameter
            base, _ = ocrolib.allsplitext(fname)
            #basefile = ocrolib.allsplitext(os.path.basename(fpath))[0]

            if param['parallel'] < 2:
                print_info("=== %s " % (fname))
            raw = ocrolib.read_image_gray(img.filename)

            flat = raw
            #flat = np.array(binImg)
            # estimate skew angle and rotate
            if param['maxskew'] > 0:
                if param['parallel'] < 2:
                    print_info("estimating skew angle")
                d0, d1 = flat.shape
                o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1)
                flat = amax(flat) - flat
                flat -= amin(flat)
                est = flat[o0:d0 - o0, o1:d1 - o1]
                ma = param['maxskew']
                ms = int(2 * param['maxskew'] * param['skewsteps'])
                angle = self.estimate_skew_angle(est,
                                                 linspace(-ma, ma, ms + 1))
                flat = interpolation.rotate(flat,
                                            angle,
                                            mode='constant',
                                            reshape=0)
                flat = amax(flat) - flat
            else:
                angle = 0

            # self.write_angles_to_pageXML(base,angle)
            # estimate low and high thresholds
            if param['parallel'] < 2:
                print_info("estimating thresholds")
            d0, d1 = flat.shape
            o0, o1 = int(param['bignore'] * d0), int(param['bignore'] * d1)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            if param['escale'] > 0:
                # by default, we use only regions that contain
                # significant variance; this makes the percentile
                # based low and high estimates more reliable
                e = param['escale']
                v = est - filters.gaussian_filter(est, e * 20.0)
                v = filters.gaussian_filter(v**2, e * 20.0)**0.5
                v = (v > 0.3 * amax(v))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (int(e * 50), 1)))
                v = morphology.binary_dilation(v,
                                               structure=ones(
                                                   (1, int(e * 50))))
                if param['debug'] > 0:
                    imshow(v)
                    ginput(1, param['debug'])
                est = est[v]
            lo = stats.scoreatpercentile(est.ravel(), param['lo'])
            hi = stats.scoreatpercentile(est.ravel(), param['hi'])
            # rescale the image to get the gray scale image
            if param['parallel'] < 2:
                print_info("rescaling")
            flat -= lo
            flat /= (hi - lo)
            flat = clip(flat, 0, 1)
            if param['debug'] > 0:
                imshow(flat, vmin=0, vmax=1)
                ginput(1, param['debug'])
            deskewed = 1 * (flat > param['threshold'])

            # output the normalized grayscale and the thresholded images
            print_info("%s lo-hi (%.2f %.2f) angle %4.1f" %
                       (pcgts.get_Page().imageFilename, lo, hi, angle))
            if param['parallel'] < 2:
                print_info("writing")
            ocrolib.write_image_binary(base + ".ds.png", deskewed)

            orientation = -angle
            orientation = 180 - (180 - orientation) % 360
            pcgts.get_Page().set_orientation(orientation)

            ID = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=ID,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype="image/png",
                                    url=base + ".ds.png",
                                    local_filename='%s/%s' %
                                    (self.output_file_grp, ID),
                                    content=to_xml(pcgts).encode('utf-8'))
Beispiel #20
0
def process1(job):
    fname, i = job
    print_info("# %s" % (fname))
    if args.parallel < 2: print_info("=== %s %-3d" % (fname, i))
    raw = ocrolib.read_image_gray(fname)
    dshow(raw, "input")
    # perform image normalization
    image = raw - amin(raw)
    if amax(image) == amin(image):
        print_info("# image is empty: %s" % (fname))
        return
    image /= amax(image)

    if not args.nocheck:
        check = check_page(amax(image) - image)
        if check is not None:
            print_error(fname + " SKIPPED. " + check +
                        " (use -n to disable this check)")
            return

    # check whether the image is already effectively binarized
    if args.gray:
        extreme = 0
    else:
        extreme = (sum(image < 0.05) + sum(image > 0.95)) * 1.0 / prod(
            image.shape)
    if extreme > 0.95:
        comment = "no-normalization"
        flat = image
    else:
        comment = ""
        # if not, we need to flatten it by estimating the local whitelevel
        if args.parallel < 2: print_info("flattening")
        m = interpolation.zoom(image, args.zoom)
        m = filters.percentile_filter(m, args.perc, size=(args.range, 2))
        m = filters.percentile_filter(m, args.perc, size=(2, args.range))
        m = interpolation.zoom(m, 1.0 / args.zoom)
        if args.debug > 0:
            clf()
            imshow(m, vmin=0, vmax=1)
            ginput(1, args.debug)
        w, h = minimum(array(image.shape), array(m.shape))
        flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)
        if args.debug > 0:
            clf()
            imshow(flat, vmin=0, vmax=1)
            ginput(1, args.debug)

    # estimate low and high thresholds
    if args.parallel < 2: print_info("estimating thresholds")
    d0, d1 = flat.shape
    o0, o1 = int(args.bignore * d0), int(args.bignore * d1)
    est = flat[o0:d0 - o0, o1:d1 - o1]
    if args.escale > 0:
        # by default, we use only regions that contain
        # significant variance; this makes the percentile
        # based low and high estimates more reliable
        e = args.escale
        v = est - filters.gaussian_filter(est, e * 20.0)
        v = filters.gaussian_filter(v**2, e * 20.0)**0.5
        v = (v > 0.3 * amax(v))
        v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
        v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
        if args.debug > 0:
            imshow(v)
            ginput(1, args.debug)
        est = est[v]
    lo = stats.scoreatpercentile(est.ravel(), args.lo)
    hi = stats.scoreatpercentile(est.ravel(), args.hi)
    # rescale the image to get the gray scale image
    if args.parallel < 2: print_info("rescaling")
    flat -= lo
    flat /= (hi - lo)
    flat = clip(flat, 0, 1)
    if args.debug > 0:
        imshow(flat, vmin=0, vmax=1)
        ginput(1, args.debug)
    bin = 1 * (flat > args.threshold)

    # output the normalized grayscale and the thresholded images
    #print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment))
    print_info("%s lo-hi (%.2f %.2f) %s" % (fname, lo, hi, comment))
    if args.parallel < 2: print_info("writing")
    if args.debug > 0 or args.show:
        clf()
        gray()
        imshow(bin)
        ginput(1, max(0.1, args.debug))
    base, _ = ocrolib.allsplitext(fname)
    ocrolib.write_image_binary(base + ".bin.png", bin)
    ocrolib.write_image_gray(base + ".nrm.png", flat)
    #print("########### File path : ", base+".nrm.png")
    #write_to_xml(base+".bin.png")
    return base + ".bin.png"
Beispiel #21
0
def process1(arg):
    (trial,fname) = arg
    base,_ = ocrolib.allsplitext(fname)
    line = ocrolib.read_image_gray(fname)
    if prod(line.shape)==0: return None
    if amax(line)==amin(line): return None

    if not args.nolineest:
        assert "dew.png" not in fname,"don't dewarp dewarped images"
        temp = amax(line)-line
        temp = temp*1.0/amax(temp)
        lnorm.measure(temp)
        line = lnorm.normalize(line,cval=amax(line))
    else:
        assert "dew.png" in fname,"only apply to dewarped images"

    line = lstm.prepare_line(line,args.pad)
    pred = network.predictString(line)

    if not args.nonormalize:
        pred = ocrolib.normalize_text(pred)

    if args.estrate:
        try:
            gt = ocrolib.read_text(base+".gt.txt")
        except:
            return (0,[],0,trial,fname)
        pred0 = ocrolib.project_text(pred,args.compare)
        gt0 = ocrolib.project_text(gt,args.compare)
        if args.estconf>0:
            err,conf = edist.xlevenshtein(pred0,gt0,context=args.context)
        else:
            err = edist.xlevenshtein(pred0,gt0)
            conf = []
        if not args.quiet:
            print "%3d %3d"%(err,len(gt)),fname,":",pred
            sys.stdout.flush()
        return (err,conf,len(gt0),trial,fname)

    if not args.quiet:
        print pred
        # print fname,":",pred
    # ocrolib.write_text(base+".txt",pred)

    if args.show>0 or args.save is not None:
        ion()
        matplotlib.rc('xtick',labelsize=7)
        matplotlib.rc('ytick',labelsize=7)
        matplotlib.rcParams.update({"font.size":7})
        if os.path.exists(base+".gt.txt"):
            transcript = ocrolib.read_text(base+".gt.txt")
            transcript = ocrolib.normalize_text(transcript)
        else:
            transcript = pred
        pred2 = network.trainString(line,transcript,update=0)
        figure("result",figsize=(1400//75,800//75),dpi=75)
        clf()
        subplot(311)
        imshow(line.T,cmap=cm.gray)
        title(transcript)
        subplot(312)
        gca().set_xticks([])
        imshow(network.outputs.T[1:],vmin=0,cmap=cm.hot)
        title(pred[:80])
        subplot(313)
        plot(network.outputs[:,0],color='yellow',linewidth=3,alpha=0.5)
        plot(network.outputs[:,1],color='green',linewidth=3,alpha=0.5)
        plot(amax(network.outputs[:,2:],axis=1),color='blue',linewidth=3,alpha=0.5)
        plot(network.aligned[:,0],color='orange',linestyle='dashed',alpha=0.7)
        plot(network.aligned[:,1],color='green',linestyle='dashed',alpha=0.5)
        plot(amax(network.aligned[:,2:],axis=1),color='blue',linestyle='dashed',alpha=0.5)
        if args.save is not None:
            draw()
            savename = args.save
            if "%" in savename: savename = savename%trial
            print "saving",savename
            savefig(savename,bbox_inches=0)
        if trial==len(inputs)-1:
            ginput(1,99999999)
        else:
            ginput(1,args.show)
    return None