def process(self): if not tf.test.is_gpu_available(): LOG.error("Your system has no CUDA installed. No GPU detected.") sys.exit(1) model_path = Path(self.parameter['model_path']) class_mapper_path = Path(self.parameter['class_mapping_path']) if not Path(model_path).is_file(): LOG.error("""\ Layout Classfication model was not found at '%s'. Make sure the `model_path` parameter points to the local model path. model can be downloaded from http://url """ % model_path) sys.exit(1) else: LOG.info('Loading model from file %s', model_path) model = self.create_model(str(model_path)) # load the mapping pickle_in = open(str(class_mapper_path), "rb") class_indices = pickle.load(pickle_in) label_mapping = dict((v, k) for k, v in class_indices.items()) # print("INPUT FILE HERE",self.input_files) for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename page_id = input_file.pageId or input_file.ID size = 600, 500 metadata = pcgts.get_Metadata() metadata.add_MetadataItem( MetadataItemType( type_="processingStep", name=self.ocrd_tool['steps'][0], value=TOOL, Labels=[ LabelsType( #externalRef="parameter", Label=[ LabelType(type_=name, value=self.parameter[name]) for name in self.parameter.keys() ]) ])) page = pcgts.get_Page() LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID) page_image, page_xywh, page_image_info = self.workspace.image_from_page( page, page_id, feature_selector='binarized') img_array = ocrolib.pil2array( page_image.resize((500, 600), Image.ANTIALIAS)) img_array = img_array * 1. / 255. img_array = img_array[np.newaxis, :, :, np.newaxis] results = self.start_test(model, img_array, fname, label_mapping) LOG.info(results) self.workspace.mets.set_physical_page_for_file( "PHYS_000" + str(n), input_file) self.create_logmap_smlink(pcgts) self.write_to_mets(results, "PHYS_000" + str(n))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): img_array = ocrolib.pil2array(page_image) # Check if image is RGB or not #FIXME: check not needed anymore? if len(img_array.shape) == 2: img_array = np.stack((img_array,)*3, axis=-1) img_array_bin = np.array( img_array > ocrolib.midrange(img_array), 'i') lineDetectH = [] lineDetectV = [] img_array_rr = self.remove_rular(img_array) textarea, img_array_rr_ta, height, width = self.detect_textarea( img_array_rr) colSeparator = int( width * self.parameter['colSeparator']) if len(textarea) > 1: textarea = self.crop_area( textarea, img_array_bin, img_array_rr_ta, colSeparator) if len(textarea) == 0: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) else: min_x, min_y, max_x, max_y = textarea[0] elif len(textarea) == 1 and (height*width*0.5 < (abs(textarea[0][2]-textarea[0][0]) * abs(textarea[0][3]-textarea[0][1]))): x1, y1, x2, y2 = textarea[0] x1 = x1-20 if x1 > 20 else 0 x2 = x2+20 if x2 < width-20 else width y1 = y1-40 if y1 > 40 else 0 y2 = y2+40 if y2 < height-40 else height min_x, min_y, max_x, max_y = textarea[0] else: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) border_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]] border_polygon = coordinates_for_segment(border_polygon, page_image, page_xywh) border_points = points_from_polygon(border_polygon) brd = BorderType(Coords=CoordsType(border_points)) page.set_Border(brd) page_image = crop_image(page_image, box=(min_x, min_y, max_x, max_y)) page_xywh['features'] += ',cropped' file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file(page_image, file_id + '-IMG', page_id=page_id, file_grp=self.output_file_grp) page.add_AlternativeImage(AlternativeImageType( filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, class_names): img_array = ocrolib.pil2array(page_image) results = mrcnn_model.detect([img_array], verbose=1) r = results[0] page_xywh['features'] += ',blksegmented' for i in range(len(r['rois'])): width, height, _ = img_array.shape min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] #small post-processing incase of paragrapgh to not cut last alphabets if (min_x - 5) > width and r['class_ids'][i] == 2: min_x -= 5 if (max_x + 10) < width and r['class_ids'][i] == 2: min_x += 10 # this can be tested, provided whether we need previous comments or not? region_img = img_array[min_x:max_x, min_y: max_y] #extract from points and img_array region_img = ocrolib.array2pil(region_img) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(region_img, file_id + "_" + str(i), page_id=page_id, file_grp=self.image_grp) ai = AlternativeImageType(filename=file_path, comments=page_xywh['features']) coords = CoordsType( "%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y)) textregion = TextRegionType(Coords=coords, type_=class_names[r['class_ids'][i]]) textregion.add_AlternativeImage(ai) page.add_TextRegion(textregion)
def process(self): LOG = getLogger('OcrdAnybaseocrLayoutAnalyser') if not tf.test.is_gpu_available(): LOG.error("Your system has no CUDA installed. No GPU detected.") # sys.exit(1) assert_file_grp_cardinality(self.input_file_grp, 1) assert_file_grp_cardinality(self.output_file_grp, 1) model_path = Path(self.resolve_resource(self.parameter['model_path'])) class_mapper_path = Path(self.resolve_resource(self.parameter['class_mapping_path'])) if not Path(model_path).is_file(): LOG.error("""\ Layout Classfication model was not found at '%s'. Make sure the `model_path` parameter points to the local model path. model can be downloaded from http://url """ % model_path) sys.exit(1) else: LOG.info('Loading model from file %s', model_path) model = self.create_model(str(model_path)) # load the mapping pickle_in = open(str(class_mapper_path), "rb") class_indices = pickle.load(pickle_in) label_mapping = dict((v,k) for k,v in class_indices.items()) # print("INPUT FILE HERE",self.input_files) for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename page_id = input_file.pageId or input_file.ID size = 600, 500 self.add_metadata(pcgts) page = pcgts.get_Page() LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID) page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_selector='binarized') img_array = ocrolib.pil2array(page_image.resize((500, 600), Image.ANTIALIAS)) img_array = img_array * 1./255. img_array = img_array[np.newaxis, :, :, np.newaxis] results = self.start_test(model, img_array, fname, label_mapping) LOG.info(results) self.workspace.mets.set_physical_page_for_file("PHYS_000" + str(n) , input_file) self.create_logmap_smlink(pcgts) self.write_to_mets(results, "PHYS_000" + str(n))
def process(self): if not tf.test.is_gpu_available(): LOG.error("Your system has no CUDA installed. No GPU detected.") sys.exit(1) model_path = Path(self.parameter['model_path']) class_mapper_path = Path(self.parameter['class_mapping_path']) LOG.info('Loading model from file ', model_path) model = self.create_model(str(model_path)) # load the mapping pickle_in = open(str(class_mapper_path), "rb") class_indices = pickle.load(pickle_in) if not Path(model_path).is_file(): LOG.error("""\ Layout Classfication model was not found at '%s'. Make sure the `model_path` parameter points to the local model path. model can be downloaded from http://url """ % model_path) sys.exit(1) else: LOG.info('Loading model from file ', model_path) model = self.create_model(str(model_path)) # load the mapping pickle_in = open(str(class_mapper_path), "rb") class_indices = pickle.load(pickle_in) for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename LOG.info("INPUT FILE %s", fname) size = 600, 500 img = Image.open(fname) img_array = ocrolib.pil2array(img.resize((500, 600), Image.ANTIALIAS)) img_array = img_array[np.newaxis, :, :, np.newaxis] results = self.start_test(model, img_array, fname, class_indices) LOG.info(results) self.workspace.mets.set_physical_page_for_file("PHYS_000" + str(n) , input_file) self.create_logmap_smlink(pcgts) self.write_to_mets(results, "PHYS_000" + str(n))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): I = ocrolib.pil2array(page_image) if len(I.shape) > 2: I = np.mean(I, 2) I = 1 - I / I.max() rows, cols = I.shape # Generate Mask and Seed Images Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I) # Iseedfill: Union of Mask and Seed Images Iseedfill = self.pixSeedfillBinary(Imask, Iseed) # Dilation of Iseedfill mask = ones((3, 3)) Iseedfill = ndimage.binary_dilation(Iseedfill, mask) # Expansion of Iseedfill to become equal in size of I Iseedfill = self.expansion(Iseedfill, (rows, cols)) # Write Text and Non-Text images image_part = array((1 - I * Iseedfill), dtype=int) image_part[0, 0] = 0 # only for visualisation purpose text_part = array((1 - I * (1 - Iseedfill)), dtype=int) text_part[0, 0] = 0 # only for visualisation purpose page_xywh['features'] += ',tiseged' bin_array = array(255 * (text_part > ocrolib.midrange(text_part)), 'B') bin_image = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(bin_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def readFeatures(filename): """Read features from the image files""" #Open Image #image = Image.open(filename).transpose(Image.FLIP_TOP_BOTTOM).transpose(Image.ROTATE_270).convert('L') #print image.size image = Image.open(filename).convert('L') #Normalize using OCRopus normalizer imagea = ocrolib.pil2array(image) lnorm.measure(amax(imagea) - imagea) try: imagea = lnorm.normalize(imagea, cval=amax(imagea)) except (ZeroDivisionError, ValueError): print 'Bad image, removing' #Read Image frames (1-column width window of heigh equal to that of the image) for w in range(imagea.shape[1]): v = imagea[:, w] inputs.append(v) #seqLengths is the width of the image and seqDims is the dimensions of the frames seqLengths.append(imagea.shape[1]) seqDims.append([seqLengths[-1]])
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): raw = ocrolib.pil2array(page_image) if len(raw.shape) > 2: raw = np.mean(raw, 2) raw = raw.astype("float64") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (page_id)) return image /= amax(image) # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) page_xywh['features'] += ',binarized' bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B') bin_image = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(bin_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) fname = pcgts.get_Page().imageFilename img = self.workspace.resolve_image_as_pil(fname) #fname = str(fname) print("Process file: ", fname) base, _ = ocrolib.allsplitext(fname) img_array = ocrolib.pil2array(img) img_array_bin = np.array(img_array > ocrolib.midrange(img_array), 'i') lineDetectH = [] lineDetectV = [] img_array_rr = self.remove_rular(img_array) textarea, img_array_rr_ta, height, width = self.detect_textarea( img_array_rr) self.parameter['colSeparator'] = int( width * self.parameter['colSeparator']) if len(textarea) > 1: textarea = self.crop_area(textarea, img_array_bin, img_array_rr_ta) if len(textarea) == 0: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) else: min_x, min_y, max_x, max_y = textarea[0] elif len(textarea) == 1 and ( height * width * 0.5 < (abs(textarea[0][2] - textarea[0][0]) * abs(textarea[0][3] - textarea[0][1]))): x1, y1, x2, y2 = textarea[0] x1 = x1 - 20 if x1 > 20 else 0 x2 = x2 + 20 if x2 < width - 20 else width y1 = y1 - 40 if y1 > 40 else 0 y2 = y2 + 40 if y2 < height - 40 else height #self.save_pf(base, [x1, y1, x2, y2]) min_x, min_y, max_x, max_y = textarea[0] else: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) pcgts.get_Page().set_Border(brd) # Use input_file's basename for the new file - # this way the files retain the same basenames: file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) if file_id == input_file.ID: file_id = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=file_id, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype=MIMETYPE_PAGE, local_filename=os.path.join( self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts).encode('utf-8'))
def _process_segment(self, page, page_image, page_coords, page_id, input_file): LOG = getLogger('OcrdAnybaseocrTiseg') if self.model: I = ocrolib.pil2array( page_image.resize((800, 1024), Image.ANTIALIAS)) I = np.array(I)[np.newaxis, :, :, :] LOG.info('I shape %s', I.shape) if len(I.shape) < 3: print('Wrong input shape. Image should have 3 channel') # get prediction #out = self.model.predict_segmentation( # inp=I, # out_fname="/tmp/out.png" #) out = self.model.predict(I) out = out.reshape((2048, 1600, 3)).argmax(axis=2) text_part = 255 * np.ones(out.shape, 'B') text_part[np.where(out == 1)] = 0 LOG.info( 'text: %d percent', 100 * (1 - np.count_nonzero(text_part) / np.prod(out.shape))) image_part = 255 * np.ones(out.shape, 'B') image_part[np.where(out == 2)] = 0 LOG.info( 'image: %d percent', 100 * (1 - np.count_nonzero(image_part) / np.prod(out.shape))) image_part = ocrolib.array2pil(image_part) text_part = ocrolib.array2pil(text_part) image_part = image_part.resize(page_image.size, Image.BICUBIC) text_part = text_part.resize(page_image.size, Image.BICUBIC) else: I = ocrolib.pil2array(page_image) if len(I.shape) > 2: I = np.mean(I, 2) I = 1 - I / I.max() rows, cols = I.shape # Generate Mask and Seed Images Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I) # Iseedfill: Union of Mask and Seed Images Iseedfill = self.pixSeedfillBinary(Imask, Iseed) # Dilation of Iseedfill mask = np.ones((3, 3)) Iseedfill = ndimage.binary_dilation(Iseedfill, mask) # Expansion of Iseedfill to become equal in size of I Iseedfill = self.expansion(Iseedfill, (rows, cols)) # Write Text and Non-Text images image_part = np.array(255 * (1 - I * Iseedfill), dtype='B') text_part = np.array(255 * (1 - I * (1 - Iseedfill)), dtype='B') LOG.info( 'text: %d percent', 100 * (1 - np.count_nonzero(text_part) / np.prod(I.shape))) LOG.info( 'image: %d percent', 100 * (1 - np.count_nonzero(image_part) / np.prod(I.shape))) image_part = ocrolib.array2pil(image_part) text_part = ocrolib.array2pil(text_part) file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file( image_part, file_id + "_img", page_id=input_file.pageId, file_grp=self.output_file_grp, ) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_coords['features'] + ',non_text')) file_path = self.workspace.save_image_file( text_part, file_id + "_txt", page_id=input_file.pageId, file_grp=self.output_file_grp, ) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_coords['features'] + ',clipped'))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, model): I = ocrolib.pil2array(page_image) LOG.info('image size: %s', page_image.size) if model: if len(I.shape) < 3: print('Wrong input shape. Image should have 3 channel') # get prediction out = model.predict_segmentation(inp=I, out_fname="/tmp/out.png") cv2.imwrite('out_image.png', out * (255 / 2)) text_part = np.ones(out.shape) text_part[np.where(out == 1)] = 0 image_part = np.ones(out.shape) image_part[np.where(out == 2)] = 0 image_part = array(255 * (image_part), 'B') image_part = ocrolib.array2pil(image_part) text_part = array(255 * (text_part), 'B') text_part = ocrolib.array2pil(text_part) text_part = text_part.resize(page_image.size, Image.BICUBIC) image_part = image_part.resize(page_image.size, Image.BICUBIC) else: if len(I.shape) > 2: I = np.mean(I, 2) I = 1 - I / I.max() rows, cols = I.shape # Generate Mask and Seed Images Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I) # Iseedfill: Union of Mask and Seed Images Iseedfill = self.pixSeedfillBinary(Imask, Iseed) # Dilation of Iseedfill mask = ones((3, 3)) Iseedfill = ndimage.binary_dilation(Iseedfill, mask) # Expansion of Iseedfill to become equal in size of I Iseedfill = self.expansion(Iseedfill, (rows, cols)) # Write Text and Non-Text images image_part = array((1 - I * Iseedfill), dtype=int) text_part = array((1 - I * (1 - Iseedfill)), dtype=int) bin_array = array(255 * (text_part > ocrolib.midrange(img_part)), 'B') text_part = ocrolib.array2pil(bin_array) bin_array = array(255 * (text_part > ocrolib.midrange(text_part)), 'B') image_part = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file( image_part, file_id + "_img", page_id=page_id, file_grp=self.image_grp, force=self.parameter['force']) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features'] + ',non_text')) page_xywh['features'] += ',clipped' file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file( text_part, file_id + "_txt", page_id=page_id, file_grp=self.image_grp, force=self.parameter['force']) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): # Get image orientation # orientation = page.get_orientation() # This function is not working # rotated_image = self.rotate_image(orientation, page_image) # img_array = ocrolib.pil2array(rotated_image) img_array = ocrolib.pil2array(page_image) # Check if image is RGB or not #FIXME: check not needed anymore? if len(img_array.shape) == 2: img_array = np.stack((img_array, ) * 3, axis=-1) img_array_bin = np.array(img_array > ocrolib.midrange(img_array), 'i') lineDetectH = [] lineDetectV = [] img_array_rr = self.remove_rular(img_array) textarea, img_array_rr_ta, height, width = self.detect_textarea( img_array_rr) self.parameter['colSeparator'] = int(width * self.parameter['colSeparator']) if len(textarea) > 1: textarea = self.crop_area(textarea, img_array_bin, img_array_rr_ta) if len(textarea) == 0: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) else: min_x, min_y, max_x, max_y = textarea[0] elif len(textarea) == 1 and (height * width * 0.5 < (abs(textarea[0][2] - textarea[0][0]) * abs(textarea[0][3] - textarea[0][1]))): x1, y1, x2, y2 = textarea[0] x1 = x1 - 20 if x1 > 20 else 0 x2 = x2 + 20 if x2 < width - 20 else width y1 = y1 - 40 if y1 > 40 else 0 y2 = y2 + 40 if y2 < height - 40 else height min_x, min_y, max_x, max_y = textarea[0] else: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) page.set_Border(brd) page_image = crop_image(page_image, box=(min_x, min_y, max_x, max_y)) page_xywh['features'] += ',cropped' file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(page_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): raw = ocrolib.pil2array(page_image) flat = raw.astype("float64") # estimate skew angle and rotate if self.parameter['maxskew'] > 0: if self.parameter['parallel'] < 2: LOG.info("Estimating Skew Angle") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = self.parameter['maxskew'] ms = int(2 * self.parameter['maxskew'] * self.parameter['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if self.parameter['parallel'] < 2: LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image if self.parameter['parallel'] < 2: LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) deskewed = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images #LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" %(lo, hi, angle)) #TODO: Need some clarification as the results effect the following pre-processing steps. #orientation = -angle #orientation = 180 - ((180 - orientation) % 360) if angle is None: # FIXME: quick fix to prevent angle of "none" angle = 0 page.set_orientation(angle) page_xywh['features'] += ',deskewed' bin_array = array(255 * (deskewed > ocrolib.midrange(deskewed)), 'B') page_image = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(page_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page_image, page, textregion, region_xywh, page_id, input_file, n): LOG = getLogger('OcrdAnybaseocrTextline') #check for existing text lines and whether to overwrite them if textregion.get_TextLine(): if self.parameter['overwrite']: LOG.info('removing existing TextLines in region "%s"', page_id) textregion.set_TextLine([]) else: LOG.warning('keeping existing TextLines in region "%s"', page_id) return binary = ocrolib.pil2array(page_image) if len(binary.shape) > 2: binary = np.mean(binary, 2) binary = np.array(1 - binary / np.amax(binary), 'B') if self.parameter['scale'] == 0: scale = psegutils.estimate_scale(binary) else: scale = self.parameter['scale'] if np.isnan( scale) or scale > 1000.0 or scale < self.parameter['minscale']: LOG.warning(str(scale) + ": bad scale; skipping!\n") return segmentation = self.compute_segmentation(binary, scale) if np.amax(segmentation) > self.parameter['maxlines']: LOG.warning("too many lines %i; skipping!\n", (np.amax(segmentation))) return lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = np.amax(segmentation) + 1 renumber = np.zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] lines = [lines[i] for i in lsort] cleaned = ocrolib.remove_noise(binary, self.parameter['noise']) for i, l in enumerate(lines): #LOG.info('check this: ') #LOG.info(type(l.bounds)) #LOG.info(l.bounds) #line_points = np.where(l.mask==1) #hull = MultiPoint([x for x in zip(line_points[0],line_points[1])]).convex_hull #x,y = hull.exterior.coords.xy #LOG.info('hull coords x: ',x) #LOG.info('hull coords y: ',y) min_x, max_x = (l.bounds[0].start, l.bounds[0].stop) min_y, max_y = (l.bounds[1].start, l.bounds[1].stop) line_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]] #line_polygon = [x for x in zip(y, x)] line_polygon = coordinates_for_segment(line_polygon, page_image, region_xywh) line_points = points_from_polygon(line_polygon) img = cleaned[l.bounds[0], l.bounds[1]] img = np.array(255 * (img > ocrolib.midrange(img)), 'B') img = 255 - img img = ocrolib.array2pil(img) file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file( img, file_id + "_" + str(n) + "_" + str(i), page_id=page_id, file_grp=self.output_file_grp) ai = AlternativeImageType(filename=file_path, comments=region_xywh['features']) line_id = '%s_line%04d' % (page_id, i) line = TextLineType(custom='readingOrder {index:' + str(i) + ';}', id=line_id, Coords=CoordsType(line_points)) line.add_AlternativeImage(ai) textregion.add_TextLine(line)
def ocr(image_path, segmentation_path, output_path, model_path): """ Scan a single image with ocropus. Reads a single image file from ```imagepath``` and writes the recognized text as a TEI document into output_path. Args: image_path (unicode): Path of the input file segmentation_path (unicode): Path of the segmentation XML file. output_path (unicode): Path of the output file model_path (unicode): Path of the recognition model. Must be a pyrnn.gz pickle dump interoperable with ocropus-rpred. Returns: (unicode): A string of the output file that is actually written. As Ocropus rewrites output file paths without notice it may be different from the ```outputfilepath``` argument. Raises: NidabaOcropusException: Ocropus somehow failed. The error output is contained in the message but as it is de facto unusable as a library it's impossible to deduct the nature of the problem. """ try: logger.debug('Loading pyrnn from {}'.format(model_path)) network = ocrolib.load_object(model_path, verbose=0) lnorm = getattr(network, "lnorm") except Exception as e: raise NidabaOcropusException('Something somewhere broke: ' + e.msg) im = Image.open(image_path) logger.debug('Loading TEI segmentation {}'.format(segmentation_path)) tei = TEIFacsimile() with open(segmentation_path, 'r') as seg_fp: tei.read(seg_fp) logger.debug('Clearing out word/grapheme boxes') # ocropus is a line recognizer tei.clear_graphemes() tei.clear_segments() # add and scope new responsibility statement tei.add_respstmt('ocropus', 'character recognition') for box in tei.lines: logger.debug('Recognizing line {}'.format(box[4])) ib = tuple(int(x) for x in box[:-2]) line = ocrolib.pil2array(im.crop(ib)) temp = np.amax(line) - line temp = temp * 1.0 / np.amax(temp) lnorm.measure(temp) line = lnorm.normalize(line, cval=np.amax(line)) if line.ndim == 3: np.mean(line, 2) line = ocrolib.lstm.prepare_line(line, 16) pred = network.predictString(line) pred = ocrolib.normalize_text(pred) logger.debug('Scoping line {}'.format(box[4])) tei.scope_line(box[4]) logger.debug('Adding graphemes: {}'.format(pred)) tei.add_graphemes(pred) with open(output_path, 'wb') as fp: logger.debug('Writing TEI to {}'.format(fp.abs_path)) tei.write(fp) return output_path
def _process_segment(self, page_image, page, region_xywh, page_id, input_file, n): binary = ocrolib.pil2array(page_image) binary = np.array(1 - binary / np.amax(binary), 'B') if page.get_TextRegion() is None or len(page.get_TextRegion()) < 1: min_x, max_x = (0, binary.shape[0]) min_y, max_y = (0, binary.shape[1]) textregion = TextRegionType( Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) page.add_TextRegion(textregion) else: textregion = page.get_TextRegion()[-1] ocrolib.write_image_binary("test.bin.png", binary) if self.parameter['scale'] == 0: scale = psegutils.estimate_scale(binary) else: scale = self.parameter['scale'] if np.isnan( scale) or scale > 1000.0 or scale < self.parameter['minscale']: LOG.warning("%s: bad scale (%g); skipping\n" % (fname, scale)) return segmentation = self.compute_segmentation(binary, scale) if np.amax(segmentation) > self.parameter['maxlines']: LOG.warning("%s: too many lines %i", (fname, np.amax(segmentation))) return lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = np.amax(segmentation) + 1 renumber = np.zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] lines = [lines[i] for i in lsort] cleaned = ocrolib.remove_noise(binary, self.parameter['noise']) region_xywh['features'] += ",textline" for i, l in enumerate(lines): ocrolib.write_image_binary("test.bin.png", binary[l.bounds[0], l.bounds[1]]) min_x, max_x = (l.bounds[0].start, l.bounds[0].stop) min_y, max_y = (l.bounds[1].start, l.bounds[1].stop) img = binary[l.bounds[0], l.bounds[1]] img = np.array(255 * (img > ocrolib.midrange(img)), 'B') img = ocrolib.array2pil(img) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(img, file_id + "_" + str(i), page_id=page_id, file_grp=self.image_grp) ai = AlternativeImageType(filename=file_path, comments=region_xywh['features']) line = TextLineType( Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) line.add_AlternativeImage(ai) textregion.add_TextLine(line)
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, mask, dpi): LOG = getLogger('processor.AnybaseocrBlockSegmenter') # check for existing text regions and whether to overwrite them if page.get_TextRegion() or page.get_TableRegion(): if self.parameter['overwrite']: LOG.info('removing existing text/table regions in page "%s"', page_id) page.set_TextRegion([]) else: LOG.warning('keeping existing text/table regions in page "%s"', page_id) # check if border exists border_polygon = None if page.get_Border(): border_coords = page.get_Border().get_Coords() border_points = polygon_from_points(border_coords.get_points()) border_polygon = Polygon(border_points) LOG.info('detecting regions on page "%s"', page_id) img_array = ocrolib.pil2array(page_image) if len(img_array.shape) <= 2: img_array = np.stack((img_array, ) * 3, axis=-1) # convert to incidence matrix class_ids = np.array([[ 1 if category in self.parameter['active_classes'] else 0 for category in CLASS_NAMES ]], dtype=np.int32) results = self.mrcnn_model.detect([img_array], verbose=0, active_class_ids=class_ids) r = results[0] LOG.info('found %d candidates on page "%s"', len(r['rois']), page_id) th = self.parameter['th'] # check for existing semgentation mask # this code executes only when the workflow had tiseg run before with use_deeplr=true if mask: mask = ocrolib.pil2array(mask) mask = mask // 255 mask = 1 - mask # multiply all the bounding box part with 2 for i in range(len(r['rois'])): min_y, min_x, max_y, max_x = r['rois'][i] mask[min_y:max_y, min_x:max_x] *= i + 2 # check for left over pixels and add them to the bounding boxes pixel_added = True while pixel_added: pixel_added = False left_over = np.where(mask == 1) for y, x in zip(left_over[0], left_over[1]): local_mask = mask[y - th:y + th, x - th:x + th] candidates = np.where(local_mask > 1) candidates = [k for k in zip(candidates[0], candidates[1])] if len(candidates) > 0: pixel_added = True # find closest pixel with x>1 candidates.sort(key=lambda j: np.sqrt((j[0] - th)**2 + (j[1] - th)**2)) index = local_mask[candidates[0]] - 2 # add pixel to mask/bbox # y,x to bbox with index if y < r['rois'][index][0]: r['rois'][index][0] = y elif y > r['rois'][index][2]: r['rois'][index][2] = y if x < r['rois'][index][1]: r['rois'][index][1] = x elif x > r['rois'][index][3]: r['rois'][index][3] = x # update the mask mask[y, x] = index + 2 for i in range(len(r['rois'])): class_id = r['class_ids'][i] if class_id >= len(CLASS_NAMES): raise Exception( 'Unexpected class id %d - model does not match' % class_id) # find hull contours on masks if self.parameter['use_masks']: r.setdefault('polygons', list()) # estimate glyph scale (roughly) scale = int(dpi / 6) scale = scale + (scale + 1) % 2 # odd for i in range(len(r['rois'])): mask = r['masks'][:, :, i] mask = cv2.dilate(mask.astype(np.uint8), np.ones((scale, scale), np.uint8)) > 0 # close mask until we have a single outer contour contours = None for _ in range(10): mask = cv2.morphologyEx( mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((scale, scale), np.uint8)) > 0 contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if len(contours) == 1: break r['polygons'].append(Polygon( contours[0][:, 0, :])) # already in x,y order # to reduce overlaps, apply IoU-based non-maximum suppression # (and other post-processing against overlaps) across classes, # but not on the raw pixels, but the smoothed hull polygons LOG.info('post-processing detections on page "%s"', page_id) worse = [] if self.parameter['post_process']: active = True def _merge_rois(i, j): """merges i into j""" nonlocal r, active r['rois'][j][0] = min(r['rois'][i][0], r['rois'][j][0]) r['rois'][j][1] = min(r['rois'][i][1], r['rois'][j][1]) r['rois'][j][2] = max(r['rois'][i][2], r['rois'][j][2]) r['rois'][j][3] = max(r['rois'][i][3], r['rois'][j][3]) r['polygons'][j] = r['polygons'][i].union(r['polygons'][j]) #r['scores'][j] = max(r['scores'][i], r['scores'][i]) active = True # find overlapping pairs while active: active = False for i in range(len(r["class_ids"])): if i in worse: continue for j in range(i + 1, len(r['class_ids'])): if j in worse: continue iclass = r['class_ids'][i] jclass = r['class_ids'][j] iname = CLASS_NAMES[iclass] jname = CLASS_NAMES[jclass] if (iname == 'drop-capital') != (jname == 'drop-capital'): # ignore drop-capital overlapping with others continue # rs todo: lower priority for footnote? if (r['rois'][i][1] > r['rois'][j][3] or r['rois'][i][3] < r['rois'][j][1] or r['rois'][i][0] > r['rois'][j][2] or r['rois'][i][2] < r['rois'][j][0]): # no overlap (cut) continue iscore = r['scores'][i] jscore = r['scores'][j] if not self.parameter['use_masks']: LOG.debug( "roi %d[%s] overlaps roi %d[%s] and %s (replacing)", i, iname, j, jname, "looses" if iscore < jscore else "wins") if iscore < jscore: worse.append(i) break else: worse.append(j) continue # compare masks ipoly = r['polygons'][i] jpoly = r['polygons'][j] isize = ipoly.area jsize = jpoly.area inter = ipoly.intersection(jpoly).area union = ipoly.union(jpoly).area # LOG.debug("%d/%d %dpx/%dpx shared %dpx overall %dpx", # i, j, isize, jsize, inter, union) if inter / isize > self.parameter['min_share_drop']: LOG.debug( "roi %d[%s] contains roi %d[%s] (replacing)", j, jname, i, iname) worse.append(i) break elif inter / jsize > self.parameter['min_share_drop']: LOG.debug( "roi %d[%s] contains roi %d[%s] (replacing)", i, iname, j, jname) worse.append(j) elif inter / union > self.parameter['min_iou_drop']: LOG.debug( "roi %d[%s] heavily overlaps roi %d[%s] and %s (replacing)", i, iname, j, jname, "looses" if iscore < jscore else "wins") if iscore < jscore: worse.append(i) break else: worse.append(j) elif inter / isize > self.parameter['min_share_merge']: LOG.debug("roi %d[%s] covers roi %d[%s] (merging)", j, jname, i, iname) worse.append(i) _merge_rois(i, j) break elif inter / jsize > self.parameter['min_share_merge']: LOG.debug("roi %d[%s] covers roi %d[%s] (merging)", i, iname, j, jname) worse.append(j) _merge_rois(j, i) elif inter / union > self.parameter['min_iou_merge']: LOG.debug( "roi %d[%s] slightly overlaps roi %d[%s] and %s (merging)", i, iname, j, jname, "looses" if iscore < jscore else "wins") if iscore < jscore: worse.append(i) _merge_rois(i, j) break else: worse.append(j) _merge_rois(j, i) # define reading order on basis of coordinates partial_order = np.zeros((len(r['rois']), len(r['rois'])), np.uint8) for i, (min_y_i, min_x_i, max_y_i, max_x_i) in enumerate(r['rois']): for j, (min_y_j, min_x_j, max_y_j, max_x_j) in enumerate(r['rois']): if min_x_i < max_x_j and max_x_i > min_x_j: # xoverlaps if min_y_i < min_y_j: partial_order[i, j] = 1 else: min_y = min(min_y_i, min_y_j) max_y = max(max_y_i, max_y_j) min_x = min(min_x_i, min_x_j) max_x = max(max_x_i, max_x_j) if next( (False for (min_y_k, min_x_k, max_y_k, max_x_k) in r['rois'] if (min_y_k < max_y and max_y_k > min_y and min_x_k < max_x and max_x_k > min_x)), True): # no k in between if ((min_y_j + max_y_j) / 2 < min_y_i and (min_y_i + max_y_i) / 2 > max_y_j): # vertically unrelated partial_order[j, i] = 1 elif max_x_i < min_x_j: partial_order[i, j] = 1 def _topsort(po): visited = np.zeros(po.shape[0], np.bool) result = list() def _visit(k): if visited[k]: return visited[k] = True for l in np.nonzero(po[:, k])[0]: _visit(l) result.append(k) for k in range(po.shape[0]): _visit(k) return result reading_order = _topsort(partial_order) # Creating Reading Order object in PageXML order_group = OrderedGroupType(caption="Regions reading order", id=page_id) reading_order_object = ReadingOrderType() reading_order_object.set_OrderedGroup(order_group) page.set_ReadingOrder(reading_order_object) for i in range(len(r['rois'])): width, height, _ = img_array.shape min_y, min_x, max_y, max_x = r['rois'][i] score = r['scores'][i] class_id = r['class_ids'][i] class_name = CLASS_NAMES[class_id] if i in worse: LOG.debug( "Ignoring instance %d[%s] overlapping better/larger neighbour", i, class_name) continue if self.parameter['use_masks']: region_polygon = r['polygons'][i].exterior.coords[:-1] else: region_polygon = polygon_from_bbox( max(min_x - 5, 0) if class_name == 'paragraph' else min_x, min_y, min(max_x + 10, width) if class_name == 'paragraph' else max_x, max_y) # convert to absolute coordinates region_polygon = coordinates_for_segment(region_polygon, page_image, page_xywh) # intersect with parent and plausibilize cut_region_polygon = Polygon(region_polygon) if border_polygon: cut_region_polygon = border_polygon.intersection( cut_region_polygon) if cut_region_polygon.is_empty: LOG.warning('region %d does not intersect page frame', i) continue if not cut_region_polygon.is_valid: LOG.warning('region %d has invalid polygon', i) continue region_polygon = cut_region_polygon.exterior.coords[:-1] region_coords = CoordsType(points_from_polygon(region_polygon), conf=score) read_order = reading_order.index(i) region_args = { 'custom': 'readingOrder {index:' + str(read_order) + ';}', 'id': 'region%04d' % i, 'Coords': region_coords } if class_name == 'image': image_region = ImageRegionType(**region_args) page.add_ImageRegion(image_region) elif class_name == 'table': table_region = TableRegionType(**region_args) page.add_TableRegion(table_region) elif class_name == 'graphics': graphic_region = GraphicRegionType(**region_args) page.add_GraphicRegion(graphic_region) else: region_args['type_'] = class_name textregion = TextRegionType(**region_args) page.add_TextRegion(textregion) order_index = reading_order.index(i) regionRefIndex = RegionRefIndexedType(index=order_index, regionRef=region_args['id']) order_group.add_RegionRefIndexed(regionRefIndex) LOG.info('added %s region on page "%s"', class_name, page_id)
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, class_names, mask): LOG = getLogger('OcrdAnybaseocrBlockSegmenter') # check for existing text regions and whether to overwrite them border = None if page.get_TextRegion(): if self.parameter['overwrite']: LOG.info('removing existing TextRegions in page "%s"', page_id) page.set_TextRegion([]) else: LOG.warning('keeping existing TextRegions in page "%s"', page_id) return # check if border exists if page.get_Border(): border_coords = page.get_Border().get_Coords() border_points = polygon_from_points(border_coords.get_points()) border = Polygon(border_points) # page_image, page_xy = self.workspace.image_from_segment(page.get_Border(), page_image, page_xywh) img_array = ocrolib.pil2array(page_image) page_image.save('./checkthis.png') if len(img_array.shape) <= 2: img_array = np.stack((img_array, ) * 3, axis=-1) results = mrcnn_model.detect([img_array], verbose=1) r = results[0] th = self.parameter['th'] # check for existing semgentation mask # this code executes only when use_deeplr is set to True in ocrd-tool.json file if mask: mask = ocrolib.pil2array(mask) mask = mask // 255 mask = 1 - mask # multiply all the bounding box part with 2 for i in range(len(r['rois'])): min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] mask[min_x:max_x, min_y:max_y] *= i + 2 cv2.imwrite('mask_check.png', mask * (255 / (len(r['rois']) + 2))) # check for left over pixels and add them to the bounding boxes pixel_added = True while pixel_added: pixel_added = False left_over = np.where(mask == 1) for x, y in zip(left_over[0], left_over[1]): local_mask = mask[x - th:x + th, y - th:y + th] candidates = np.where(local_mask > 1) candidates = [k for k in zip(candidates[0], candidates[1])] if len(candidates) > 0: pixel_added = True # find closest pixel with x>1 candidates.sort(key=lambda j: np.sqrt((j[0] - th)**2 + (j[1] - th)**2)) index = local_mask[candidates[0]] - 2 # add pixel to mask/bbox # x,y to bbox with index if x < r['rois'][index][0]: r['rois'][index][0] = x elif x > r['rois'][index][2]: r['rois'][index][2] = x if y < r['rois'][index][1]: r['rois'][index][1] = y elif y > r['rois'][index][3]: r['rois'][index][3] = y # update the mask mask[x, y] = index + 2 # resolving overlapping problem bbox_dict = {} # to check any overlapping bbox class_id_check = [] for i in range(len(r['rois'])): min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] region_bbox = [min_y, min_x, max_y, max_x] for key in bbox_dict: for bbox in bbox_dict[key]: # checking for ymax case with vertical overlapping # along with y, check both for xmax and xmin if (region_bbox[3] <= bbox[3] and region_bbox[3] >= bbox[1] and ((region_bbox[0] >= bbox[0] and region_bbox[0] <= bbox[2]) or (region_bbox[2] >= bbox[0] and region_bbox[2] <= bbox[2]) or (region_bbox[0] <= bbox[0] and region_bbox[2] >= bbox[2])) and r['class_ids'][i] != 5): r['rois'][i][2] = bbox[1] - 1 # checking for ymin now # along with y, check both for xmax and xmin if (region_bbox[1] <= bbox[3] and region_bbox[1] >= bbox[1] and ((region_bbox[0] >= bbox[0] and region_bbox[0] <= bbox[2]) or (region_bbox[2] >= bbox[0] and region_bbox[2] <= bbox[2]) or (region_bbox[0] <= bbox[0] and region_bbox[2] >= bbox[2])) and r['class_ids'][i] != 5): r['rois'][i][0] = bbox[3] + 1 if r['class_ids'][i] not in class_id_check: bbox_dict[r['class_ids'][i]] = [] class_id_check.append(r['class_ids'][i]) bbox_dict[r['class_ids'][i]].append(region_bbox) # resolving overlapping problem code # define reading order on basis of coordinates reading_order = [] for i in range(len(r['rois'])): width, height, _ = img_array.shape min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] if (min_y - 5) > width and r['class_ids'][i] == 2: min_y -= 5 if (max_y + 10) < width and r['class_ids'][i] == 2: min_y += 10 reading_order.append((min_y, min_x, max_y, max_x)) reading_order = sorted(reading_order, key=lambda reading_order: (reading_order[1], reading_order[0])) for i in range(len(reading_order)): min_y, min_x, max_y, max_x = reading_order[i] min_y = 0 i_poly = Polygon([[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]) for j in range(i + 1, len(reading_order)): min_y, min_x, max_y, max_x = reading_order[j] j_poly = Polygon([[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]) inter = i_poly.intersection(j_poly) if inter: reading_order.insert(j + 1, reading_order[i]) del reading_order[i] # Creating Reading Order object in PageXML order_group = OrderedGroupType(caption="Regions reading order", id=page_id) for i in range(len(r['rois'])): min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] if (min_y - 5) > width and r['class_ids'][i] == 2: min_y -= 5 if (max_y + 10) < width and r['class_ids'][i] == 2: min_y += 10 region_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]] if border: cut_region_polygon = border.intersection( Polygon(region_polygon)) if cut_region_polygon.is_empty: continue else: cut_region_polygon = Polygon(region_polygon) order_index = reading_order.index((min_y, min_x, max_y, max_x)) region_id = '%s_region%04d' % (page_id, i) regionRefIndex = RegionRefIndexedType(index=order_index, regionRef=region_id) order_group.add_RegionRefIndexed(regionRefIndex) reading_order_object = ReadingOrderType() reading_order_object.set_OrderedGroup(order_group) page.set_ReadingOrder(reading_order_object) for i in range(len(r['rois'])): width, height, _ = img_array.shape min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] if (min_y - 5) > width and r['class_ids'][i] == 2: min_y -= 5 if (max_y + 10) < width and r['class_ids'][i] == 2: min_y += 10 # one change here to resolve flipped coordinates region_polygon = [[min_y, min_x], [max_y, min_x], [max_y, max_x], [min_y, max_x]] cut_region_polygon = border.intersection(Polygon(region_polygon)) if cut_region_polygon.is_empty: continue cut_region_polygon = [ j for j in zip(list(cut_region_polygon.exterior.coords.xy[0]), list(cut_region_polygon.exterior.coords.xy[1])) ][:-1] # checking whether coordinates are flipped region_polygon = coordinates_for_segment(cut_region_polygon, page_image, page_xywh) region_points = points_from_polygon(region_polygon) read_order = reading_order.index((min_y, min_x, max_y, max_x)) # this can be tested, provided whether we need previous comments or not? # resolving overlapping problem region_img = img_array[min_x:max_x, min_y: max_y] # extract from points and img_array region_img = ocrolib.array2pil(region_img) file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file( region_img, file_id + "_" + str(i), page_id=page_id, file_grp=self.output_file_grp) # ai = AlternativeImageType(filename=file_path, comments=page_xywh['features']) region_id = '%s_region%04d' % (page_id, i) coords = CoordsType(region_points) # incase of imageRegion if r['class_ids'][i] == 15: image_region = ImageRegionType( custom='readingOrder {index:' + str(read_order) + ';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]]) # image_region.add_AlternativeImage(ai) page.add_ImageRegion(image_region) continue if r['class_ids'][i] == 16: table_region = TableRegionType( custom='readingOrder {index:' + str(read_order) + ';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]]) # table_region.add_AlternativeImage(ai) page.add_TableRegion(table_region) continue if r['class_ids'][i] == 17: graphic_region = GraphicRegionType( custom='readingOrder {index:' + str(read_order) + ';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]]) # graphic_region.add_AlternativeImage(ai) page.add_GraphicRegion(graphic_region) continue textregion = TextRegionType(custom='readingOrder {index:' + str(read_order) + ';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]]) # textregion.add_AlternativeImage(ai) #border = page.get_Border() # if border: # border.add_TextRegion(textregion) # else: page.add_TextRegion(textregion)
def process(self): for (n, input_file) in enumerate(self.input_files): pcgts = page_from_file(self.workspace.download_file(input_file)) page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID page = pcgts.get_Page() page_image, page_xywh, _ = self.workspace.image_from_page( page, page_id) print(type(page_image), page_image.filename) # Get image orientation orientation = pcgts.get_Page().get_orientation() rotated_image = self.rotate_image(orientation, page_image) LOG.info("INPUT FILE %s ", input_file.pageId or input_file.ID) img_array = ocrolib.pil2array(rotated_image) #Check if image is RGB or not if len(img_array.shape) == 2: img_array = np.stack((img_array, ) * 3, axis=-1) img_array_bin = np.array(img_array > ocrolib.midrange(img_array), 'i') lineDetectH = [] lineDetectV = [] img_array_rr = self.remove_rular(img_array) textarea, img_array_rr_ta, height, width = self.detect_textarea( img_array_rr) self.parameter['colSeparator'] = int( width * self.parameter['colSeparator']) if len(textarea) > 1: textarea = self.crop_area(textarea, img_array_bin, img_array_rr_ta) if len(textarea) == 0: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) else: min_x, min_y, max_x, max_y = textarea[0] elif len(textarea) == 1 and ( height * width * 0.5 < (abs(textarea[0][2] - textarea[0][0]) * abs(textarea[0][3] - textarea[0][1]))): x1, y1, x2, y2 = textarea[0] x1 = x1 - 20 if x1 > 20 else 0 x2 = x2 + 20 if x2 < width - 20 else width y1 = y1 - 40 if y1 > 40 else 0 y2 = y2 + 40 if y2 < height - 40 else height #self.save_pf(base, [x1, y1, x2, y2]) min_x, min_y, max_x, max_y = textarea[0] else: min_x, min_y, max_x, max_y = self.select_borderLine( img_array_rr, lineDetectH, lineDetectV) brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) pcgts.get_Page().set_Border(brd) # Use input_file's basename for the new file - # this way the files retain the same basenames: file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) if file_id == input_file.ID: file_id = concat_padded(self.output_file_grp, n) self.workspace.add_file(ID=file_id, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype=MIMETYPE_PAGE, local_filename=os.path.join( self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts).encode('utf-8'))