def _process_segment(self, model, dataset, page, page_xywh, page_id, input_file, orig_img_size, n): for i, data in enumerate(dataset): w, h = orig_img_size generated = model.inference(data['label'], data['inst'], data['image']) dewarped = array(generated.data[0].permute(1, 2, 0).detach().cpu()) bin_array = array(255 * (dewarped > ocrolib.midrange(dewarped)), 'B') dewarped = ocrolib.array2pil(bin_array) dewarped = dewarped.resize((w, h)) page_xywh['features'] += ',dewarped' file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file( dewarped, file_id, page_id=page_id, file_grp=self.image_grp, force=self.parameter['force']) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): img_array = ocrolib.pil2array(page_image) # Check if image is RGB or not #FIXME: check not needed anymore? if len(img_array.shape) == 2: img_array = np.stack((img_array,)*3, axis=-1) img_array_bin = np.array( img_array > ocrolib.midrange(img_array), 'i') lineDetectH = [] lineDetectV = [] img_array_rr = self.remove_rular(img_array) textarea, img_array_rr_ta, height, width = self.detect_textarea( img_array_rr) colSeparator = int( width * self.parameter['colSeparator']) if len(textarea) > 1: textarea = self.crop_area( textarea, img_array_bin, img_array_rr_ta, colSeparator) if len(textarea) == 0: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) else: min_x, min_y, max_x, max_y = textarea[0] elif len(textarea) == 1 and (height*width*0.5 < (abs(textarea[0][2]-textarea[0][0]) * abs(textarea[0][3]-textarea[0][1]))): x1, y1, x2, y2 = textarea[0] x1 = x1-20 if x1 > 20 else 0 x2 = x2+20 if x2 < width-20 else width y1 = y1-40 if y1 > 40 else 0 y2 = y2+40 if y2 < height-40 else height min_x, min_y, max_x, max_y = textarea[0] else: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) border_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]] border_polygon = coordinates_for_segment(border_polygon, page_image, page_xywh) border_points = points_from_polygon(border_polygon) brd = BorderType(Coords=CoordsType(border_points)) page.set_Border(brd) page_image = crop_image(page_image, box=(min_x, min_y, max_x, max_y)) page_xywh['features'] += ',cropped' file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file(page_image, file_id + '-IMG', page_id=page_id, file_grp=self.output_file_grp) page.add_AlternativeImage(AlternativeImageType( filename=file_path, comments=page_xywh['features']))
def blxlimages(image,shapedict,bls,xls): image = (image>ocrolib.midrange(image)) if amax(image)==0: raise RecognitionError("empty line") seg = lineseg.ccslineseg(image) # ion(); subplot(311); imshow(image); subplot(312); morph.showlabels(seg); ginput(1,0.1); raw_input() seg = morph.renumber_by_xcenter(seg) blimage = zeros(image.shape) xlimage = zeros(image.shape) for sub,transform,itransform_add in extract_chars(seg): best = shapedict.predict1(sub) bli = bls[best].reshape(32,32) xli = xls[best].reshape(32,32) itransform_add(blimage,bli) itransform_add(xlimage,xli) return blimage,xlimage
def blxlimages(image, shapedict, bls, xls): image = (image > ocrolib.midrange(image)) if amax(image) == 0: raise RecognitionError("empty line") seg = lineseg.ccslineseg(image) # ion(); subplot(311); imshow(image); subplot(312); morph.showlabels(seg); ginput(1,0.1); raw_input() seg = morph.renumber_by_xcenter(seg) blimage = zeros(image.shape) xlimage = zeros(image.shape) for sub, transform, itransform_add in extract_chars(seg): best = shapedict.predict1(sub) bli = bls[best].reshape(32, 32) xli = xls[best].reshape(32, 32) itransform_add(blimage, bli) itransform_add(xlimage, xli) return blimage, xlimage
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): I = ocrolib.pil2array(page_image) if len(I.shape) > 2: I = np.mean(I, 2) I = 1 - I / I.max() rows, cols = I.shape # Generate Mask and Seed Images Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I) # Iseedfill: Union of Mask and Seed Images Iseedfill = self.pixSeedfillBinary(Imask, Iseed) # Dilation of Iseedfill mask = ones((3, 3)) Iseedfill = ndimage.binary_dilation(Iseedfill, mask) # Expansion of Iseedfill to become equal in size of I Iseedfill = self.expansion(Iseedfill, (rows, cols)) # Write Text and Non-Text images image_part = array((1 - I * Iseedfill), dtype=int) image_part[0, 0] = 0 # only for visualisation purpose text_part = array((1 - I * (1 - Iseedfill)), dtype=int) text_part[0, 0] = 0 # only for visualisation purpose page_xywh['features'] += ',tiseged' bin_array = array(255 * (text_part > ocrolib.midrange(text_part)), 'B') bin_image = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(bin_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): raw = ocrolib.pil2array(page_image) flat = raw.astype("float64") # estimate skew angle and rotate if self.parameter['maxskew'] > 0: if self.parameter['parallel'] < 2: LOG.info("Estimating Skew Angle") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = self.parameter['maxskew'] ms = int(2 * self.parameter['maxskew'] * self.parameter['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if self.parameter['parallel'] < 2: LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image if self.parameter['parallel'] < 2: LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) deskewed = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images #LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" %(lo, hi, angle)) #TODO: Need some clarification as the results effect the following pre-processing steps. #orientation = -angle #orientation = 180 - ((180 - orientation) % 360) if angle is None: # FIXME: quick fix to prevent angle of "none" angle = 0 page.set_orientation(angle) page_xywh['features'] += ',deskewed' bin_array = array(255 * (deskewed > ocrolib.midrange(deskewed)), 'B') page_image = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(page_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename img = self.workspace.resolve_image_as_pil(fname) #fname = str(fname) print("Process file: ", fname) base, _ = ocrolib.allsplitext(fname) img_array = ocrolib.pil2array(img) img_array_bin = np.array(img_array > ocrolib.midrange(img_array), 'i') lineDetectH = [] lineDetectV = [] img_array_rr = self.remove_rular(img_array) textarea, img_array_rr_ta, height, width = self.detect_textarea( img_array_rr) self.parameter['colSeparator'] = int( width * self.parameter['colSeparator']) if len(textarea) > 1: textarea = self.crop_area(textarea, img_array_bin, img_array_rr_ta) if len(textarea) == 0: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) else: min_x, min_y, max_x, max_y = textarea[0] elif len(textarea) == 1 and ( height * width * 0.5 < (abs(textarea[0][2] - textarea[0][0]) * abs(textarea[0][3] - textarea[0][1]))): x1, y1, x2, y2 = textarea[0] x1 = x1 - 20 if x1 > 20 else 0 x2 = x2 + 20 if x2 < width - 20 else width y1 = y1 - 40 if y1 > 40 else 0 y2 = y2 + 40 if y2 < height - 40 else height #self.save_pf(base, [x1, y1, x2, y2]) min_x, min_y, max_x, max_y = textarea[0] else: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) pcgts.get_Page().set_Border(brd) # Use input_file's basename for the new file - # this way the files retain the same basenames: file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) if file_id == input_file.ID: file_id = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=file_id, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype=MIMETYPE_PAGE, local_filename=os.path.join( self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts).encode('utf-8'))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, model): I = ocrolib.pil2array(page_image) LOG.info('image size: %s', page_image.size) if model: if len(I.shape) < 3: print('Wrong input shape. Image should have 3 channel') # get prediction out = model.predict_segmentation(inp=I, out_fname="/tmp/out.png") cv2.imwrite('out_image.png', out * (255 / 2)) text_part = np.ones(out.shape) text_part[np.where(out == 1)] = 0 image_part = np.ones(out.shape) image_part[np.where(out == 2)] = 0 image_part = array(255 * (image_part), 'B') image_part = ocrolib.array2pil(image_part) text_part = array(255 * (text_part), 'B') text_part = ocrolib.array2pil(text_part) text_part = text_part.resize(page_image.size, Image.BICUBIC) image_part = image_part.resize(page_image.size, Image.BICUBIC) else: if len(I.shape) > 2: I = np.mean(I, 2) I = 1 - I / I.max() rows, cols = I.shape # Generate Mask and Seed Images Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I) # Iseedfill: Union of Mask and Seed Images Iseedfill = self.pixSeedfillBinary(Imask, Iseed) # Dilation of Iseedfill mask = ones((3, 3)) Iseedfill = ndimage.binary_dilation(Iseedfill, mask) # Expansion of Iseedfill to become equal in size of I Iseedfill = self.expansion(Iseedfill, (rows, cols)) # Write Text and Non-Text images image_part = array((1 - I * Iseedfill), dtype=int) text_part = array((1 - I * (1 - Iseedfill)), dtype=int) bin_array = array(255 * (text_part > ocrolib.midrange(img_part)), 'B') text_part = ocrolib.array2pil(bin_array) bin_array = array(255 * (text_part > ocrolib.midrange(text_part)), 'B') image_part = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file( image_part, file_id + "_img", page_id=page_id, file_grp=self.image_grp, force=self.parameter['force']) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features'] + ',non_text')) page_xywh['features'] += ',clipped' file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file( text_part, file_id + "_txt", page_id=page_id, file_grp=self.image_grp, force=self.parameter['force']) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page, filename, page_id, file_id): raw = ocrolib.read_image_gray(filename) self.dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (page_id)) return image /= amax(image) if not self.parameter['nocheck']: check = self.check_page(amax(image) - image) if check is not None: LOG.error(input_file.pageId or input_file.ID + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) #base, _ = ocrolib.allsplitext(filename) #ocrolib.write_image_binary(base + ".bin.png", binarized) # ocrolib.write_image_gray(base +".nrm.png", flat) # print("########### File path : ", base+".nrm.png") # write_to_xml(base+".bin.png") # return base+".bin.png" bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B') bin_image = ocrolib.array2pil(bin_array) file_path = self.workspace.save_image_file(bin_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comment="binarized"))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): # Get image orientation # orientation = page.get_orientation() # This function is not working # rotated_image = self.rotate_image(orientation, page_image) # img_array = ocrolib.pil2array(rotated_image) img_array = ocrolib.pil2array(page_image) # Check if image is RGB or not #FIXME: check not needed anymore? if len(img_array.shape) == 2: img_array = np.stack((img_array, ) * 3, axis=-1) img_array_bin = np.array(img_array > ocrolib.midrange(img_array), 'i') lineDetectH = [] lineDetectV = [] img_array_rr = self.remove_rular(img_array) textarea, img_array_rr_ta, height, width = self.detect_textarea( img_array_rr) self.parameter['colSeparator'] = int(width * self.parameter['colSeparator']) if len(textarea) > 1: textarea = self.crop_area(textarea, img_array_bin, img_array_rr_ta) if len(textarea) == 0: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) else: min_x, min_y, max_x, max_y = textarea[0] elif len(textarea) == 1 and (height * width * 0.5 < (abs(textarea[0][2] - textarea[0][0]) * abs(textarea[0][3] - textarea[0][1]))): x1, y1, x2, y2 = textarea[0] x1 = x1 - 20 if x1 > 20 else 0 x2 = x2 + 20 if x2 < width - 20 else width y1 = y1 - 40 if y1 > 40 else 0 y2 = y2 + 40 if y2 < height - 40 else height min_x, min_y, max_x, max_y = textarea[0] else: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) page.set_Border(brd) page_image = crop_image(page_image, box=(min_x, min_y, max_x, max_y)) page_xywh['features'] += ',cropped' file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(page_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page_image, page, textregion, region_xywh, page_id, input_file, n): LOG = getLogger('OcrdAnybaseocrTextline') #check for existing text lines and whether to overwrite them if textregion.get_TextLine(): if self.parameter['overwrite']: LOG.info('removing existing TextLines in region "%s"', page_id) textregion.set_TextLine([]) else: LOG.warning('keeping existing TextLines in region "%s"', page_id) return binary = ocrolib.pil2array(page_image) if len(binary.shape) > 2: binary = np.mean(binary, 2) binary = np.array(1 - binary / np.amax(binary), 'B') if self.parameter['scale'] == 0: scale = psegutils.estimate_scale(binary) else: scale = self.parameter['scale'] if np.isnan( scale) or scale > 1000.0 or scale < self.parameter['minscale']: LOG.warning(str(scale) + ": bad scale; skipping!\n") return segmentation = self.compute_segmentation(binary, scale) if np.amax(segmentation) > self.parameter['maxlines']: LOG.warning("too many lines %i; skipping!\n", (np.amax(segmentation))) return lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = np.amax(segmentation) + 1 renumber = np.zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] lines = [lines[i] for i in lsort] cleaned = ocrolib.remove_noise(binary, self.parameter['noise']) for i, l in enumerate(lines): #LOG.info('check this: ') #LOG.info(type(l.bounds)) #LOG.info(l.bounds) #line_points = np.where(l.mask==1) #hull = MultiPoint([x for x in zip(line_points[0],line_points[1])]).convex_hull #x,y = hull.exterior.coords.xy #LOG.info('hull coords x: ',x) #LOG.info('hull coords y: ',y) min_x, max_x = (l.bounds[0].start, l.bounds[0].stop) min_y, max_y = (l.bounds[1].start, l.bounds[1].stop) line_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]] #line_polygon = [x for x in zip(y, x)] line_polygon = coordinates_for_segment(line_polygon, page_image, region_xywh) line_points = points_from_polygon(line_polygon) img = cleaned[l.bounds[0], l.bounds[1]] img = np.array(255 * (img > ocrolib.midrange(img)), 'B') img = 255 - img img = ocrolib.array2pil(img) file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file( img, file_id + "_" + str(n) + "_" + str(i), page_id=page_id, file_grp=self.output_file_grp) ai = AlternativeImageType(filename=file_path, comments=region_xywh['features']) line_id = '%s_line%04d' % (page_id, i) line = TextLineType(custom='readingOrder {index:' + str(i) + ';}', id=line_id, Coords=CoordsType(line_points)) line.add_AlternativeImage(ai) textregion.add_TextLine(line)
def _process_segment(self, page_image, page, region_xywh, page_id, input_file, n): binary = ocrolib.pil2array(page_image) binary = np.array(1 - binary / np.amax(binary), 'B') if page.get_TextRegion() is None or len(page.get_TextRegion()) < 1: min_x, max_x = (0, binary.shape[0]) min_y, max_y = (0, binary.shape[1]) textregion = TextRegionType( Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) page.add_TextRegion(textregion) else: textregion = page.get_TextRegion()[-1] ocrolib.write_image_binary("test.bin.png", binary) if self.parameter['scale'] == 0: scale = psegutils.estimate_scale(binary) else: scale = self.parameter['scale'] if np.isnan( scale) or scale > 1000.0 or scale < self.parameter['minscale']: LOG.warning("%s: bad scale (%g); skipping\n" % (fname, scale)) return segmentation = self.compute_segmentation(binary, scale) if np.amax(segmentation) > self.parameter['maxlines']: LOG.warning("%s: too many lines %i", (fname, np.amax(segmentation))) return lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = np.amax(segmentation) + 1 renumber = np.zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] lines = [lines[i] for i in lsort] cleaned = ocrolib.remove_noise(binary, self.parameter['noise']) region_xywh['features'] += ",textline" for i, l in enumerate(lines): ocrolib.write_image_binary("test.bin.png", binary[l.bounds[0], l.bounds[1]]) min_x, max_x = (l.bounds[0].start, l.bounds[0].stop) min_y, max_y = (l.bounds[1].start, l.bounds[1].stop) img = binary[l.bounds[0], l.bounds[1]] img = np.array(255 * (img > ocrolib.midrange(img)), 'B') img = ocrolib.array2pil(img) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(img, file_id + "_" + str(i), page_id=page_id, file_grp=self.image_grp) ai = AlternativeImageType(filename=file_path, comments=region_xywh['features']) line = TextLineType( Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) line.add_AlternativeImage(ai) textregion.add_TextLine(line)
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID page = pcgts.get_Page() page_image, page_xywh, _ = self.workspace.image_from_page( page, page_id) print(type(page_image), page_image.filename) # Get image orientation orientation = pcgts.get_Page().get_orientation() rotated_image = self.rotate_image(orientation, page_image) LOG.info("INPUT FILE %s ", input_file.pageId or input_file.ID) img_array = ocrolib.pil2array(rotated_image) #Check if image is RGB or not if len(img_array.shape) == 2: img_array = np.stack((img_array, ) * 3, axis=-1) img_array_bin = np.array(img_array > ocrolib.midrange(img_array), 'i') lineDetectH = [] lineDetectV = [] img_array_rr = self.remove_rular(img_array) textarea, img_array_rr_ta, height, width = self.detect_textarea( img_array_rr) self.parameter['colSeparator'] = int( width * self.parameter['colSeparator']) if len(textarea) > 1: textarea = self.crop_area(textarea, img_array_bin, img_array_rr_ta) if len(textarea) == 0: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) else: min_x, min_y, max_x, max_y = textarea[0] elif len(textarea) == 1 and ( height * width * 0.5 < (abs(textarea[0][2] - textarea[0][0]) * abs(textarea[0][3] - textarea[0][1]))): x1, y1, x2, y2 = textarea[0] x1 = x1 - 20 if x1 > 20 else 0 x2 = x2 + 20 if x2 < width - 20 else width y1 = y1 - 40 if y1 > 40 else 0 y2 = y2 + 40 if y2 < height - 40 else height #self.save_pf(base, [x1, y1, x2, y2]) min_x, min_y, max_x, max_y = textarea[0] else: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) pcgts.get_Page().set_Border(brd) # Use input_file's basename for the new file - # this way the files retain the same basenames: file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) if file_id == input_file.ID: file_id = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=file_id, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype=MIMETYPE_PAGE, local_filename=os.path.join( self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts).encode('utf-8'))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): LOG = getLogger('OcrdAnybaseocrBinarizer') raw = ocrolib.pil2array(page_image) if len(raw.shape) > 2: raw = np.mean(raw, 2) raw = raw.astype("float64") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (page_id)) return image /= amax(image) # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) page_xywh['features'] += ',binarized' bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B') bin_image = ocrolib.array2pil(bin_array) file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file( bin_image, file_id + '-IMG', page_id=page_id, file_grp=self.output_file_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, model): LOG = getLogger('OcrdAnybaseocrTiseg') if model: I = ocrolib.pil2array( page_image.resize((800, 1024), Image.ANTIALIAS)) I = np.array(I)[np.newaxis, :, :, :] LOG.info('I shape %s', I.shape) if len(I.shape) < 3: print('Wrong input shape. Image should have 3 channel') # get prediction #out = model.predict_segmentation( # inp=I, # out_fname="/tmp/out.png" #) out = model.predict(I) out = out.reshape((2048, 1600, 3)).argmax(axis=2) text_part = np.ones(out.shape) text_part[np.where(out == 1)] = 0 image_part = np.ones(out.shape) image_part[np.where(out == 2)] = 0 image_part = array(255 * (image_part), 'B') image_part = ocrolib.array2pil(image_part) text_part = array(255 * (text_part), 'B') text_part = ocrolib.array2pil(text_part) text_part = text_part.resize(page_image.size, Image.BICUBIC) image_part = image_part.resize(page_image.size, Image.BICUBIC) else: I = ocrolib.pil2array(page_image) if len(I.shape) > 2: I = np.mean(I, 2) I = 1 - I / I.max() rows, cols = I.shape # Generate Mask and Seed Images Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I) # Iseedfill: Union of Mask and Seed Images Iseedfill = self.pixSeedfillBinary(Imask, Iseed) # Dilation of Iseedfill mask = ones((3, 3)) Iseedfill = ndimage.binary_dilation(Iseedfill, mask) # Expansion of Iseedfill to become equal in size of I Iseedfill = self.expansion(Iseedfill, (rows, cols)) # Write Text and Non-Text images image_part = array((1 - I * Iseedfill), dtype=int) text_part = array((1 - I * (1 - Iseedfill)), dtype=int) bin_array = array(255 * (text_part > ocrolib.midrange(img_part)), 'B') text_part = ocrolib.array2pil(bin_array) bin_array = array(255 * (text_part > ocrolib.midrange(text_part)), 'B') image_part = ocrolib.array2pil(bin_array) file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file( image_part, file_id + "_img", page_id=page_id, file_grp=self.output_file_grp, ) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features'] + ',non_text')) page_xywh['features'] += ',clipped' file_path = self.workspace.save_image_file( text_part, file_id + "_txt", page_id=page_id, file_grp=self.output_file_grp, ) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))