def _process_segment(self, model, dataset, page, page_xywh, page_id, input_file, orig_img_size, n): for i, data in enumerate(dataset): w, h = orig_img_size generated = model.inference(data['label'], data['inst'], data['image']) dewarped = array(generated.data[0].permute(1, 2, 0).detach().cpu()) bin_array = array(255 * (dewarped > ocrolib.midrange(dewarped)), 'B') dewarped = ocrolib.array2pil(bin_array) dewarped = dewarped.resize((w, h)) page_xywh['features'] += ',dewarped' file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file( dewarped, file_id, page_id=page_id, file_grp=self.image_grp, force=self.parameter['force']) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, class_names): img_array = ocrolib.pil2array(page_image) results = mrcnn_model.detect([img_array], verbose=1) r = results[0] page_xywh['features'] += ',blksegmented' for i in range(len(r['rois'])): width, height, _ = img_array.shape min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] #small post-processing incase of paragrapgh to not cut last alphabets if (min_x - 5) > width and r['class_ids'][i] == 2: min_x -= 5 if (max_x + 10) < width and r['class_ids'][i] == 2: min_x += 10 # this can be tested, provided whether we need previous comments or not? region_img = img_array[min_x:max_x, min_y: max_y] #extract from points and img_array region_img = ocrolib.array2pil(region_img) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(region_img, file_id + "_" + str(i), page_id=page_id, file_grp=self.image_grp) ai = AlternativeImageType(filename=file_path, comments=page_xywh['features']) coords = CoordsType( "%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y)) textregion = TextRegionType(Coords=coords, type_=class_names[r['class_ids'][i]]) textregion.add_AlternativeImage(ai) page.add_TextRegion(textregion)
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): I = ocrolib.pil2array(page_image) if len(I.shape) > 2: I = np.mean(I, 2) I = 1 - I / I.max() rows, cols = I.shape # Generate Mask and Seed Images Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I) # Iseedfill: Union of Mask and Seed Images Iseedfill = self.pixSeedfillBinary(Imask, Iseed) # Dilation of Iseedfill mask = ones((3, 3)) Iseedfill = ndimage.binary_dilation(Iseedfill, mask) # Expansion of Iseedfill to become equal in size of I Iseedfill = self.expansion(Iseedfill, (rows, cols)) # Write Text and Non-Text images image_part = array((1 - I * Iseedfill), dtype=int) image_part[0, 0] = 0 # only for visualisation purpose text_part = array((1 - I * (1 - Iseedfill)), dtype=int) text_part[0, 0] = 0 # only for visualisation purpose page_xywh['features'] += ',tiseged' bin_array = array(255 * (text_part > ocrolib.midrange(text_part)), 'B') bin_image = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(bin_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, mrcnn_model, class_names, mask): LOG = getLogger('OcrdAnybaseocrBlockSegmenter') # check for existing text regions and whether to overwrite them border = None if page.get_TextRegion(): if self.parameter['overwrite']: LOG.info('removing existing TextRegions in page "%s"', page_id) page.set_TextRegion([]) else: LOG.warning('keeping existing TextRegions in page "%s"', page_id) return # check if border exists if page.get_Border(): border_coords = page.get_Border().get_Coords() border_points = polygon_from_points(border_coords.get_points()) border = Polygon(border_points) # page_image, page_xy = self.workspace.image_from_segment(page.get_Border(), page_image, page_xywh) img_array = ocrolib.pil2array(page_image) page_image.save('./checkthis.png') if len(img_array.shape) <= 2: img_array = np.stack((img_array, ) * 3, axis=-1) results = mrcnn_model.detect([img_array], verbose=1) r = results[0] th = self.parameter['th'] # check for existing semgentation mask # this code executes only when use_deeplr is set to True in ocrd-tool.json file if mask: mask = ocrolib.pil2array(mask) mask = mask // 255 mask = 1 - mask # multiply all the bounding box part with 2 for i in range(len(r['rois'])): min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] mask[min_x:max_x, min_y:max_y] *= i + 2 cv2.imwrite('mask_check.png', mask * (255 / (len(r['rois']) + 2))) # check for left over pixels and add them to the bounding boxes pixel_added = True while pixel_added: pixel_added = False left_over = np.where(mask == 1) for x, y in zip(left_over[0], left_over[1]): local_mask = mask[x - th:x + th, y - th:y + th] candidates = np.where(local_mask > 1) candidates = [k for k in zip(candidates[0], candidates[1])] if len(candidates) > 0: pixel_added = True # find closest pixel with x>1 candidates.sort(key=lambda j: np.sqrt((j[0] - th)**2 + (j[1] - th)**2)) index = local_mask[candidates[0]] - 2 # add pixel to mask/bbox # x,y to bbox with index if x < r['rois'][index][0]: r['rois'][index][0] = x elif x > r['rois'][index][2]: r['rois'][index][2] = x if y < r['rois'][index][1]: r['rois'][index][1] = y elif y > r['rois'][index][3]: r['rois'][index][3] = y # update the mask mask[x, y] = index + 2 # resolving overlapping problem bbox_dict = {} # to check any overlapping bbox class_id_check = [] for i in range(len(r['rois'])): min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] region_bbox = [min_y, min_x, max_y, max_x] for key in bbox_dict: for bbox in bbox_dict[key]: # checking for ymax case with vertical overlapping # along with y, check both for xmax and xmin if (region_bbox[3] <= bbox[3] and region_bbox[3] >= bbox[1] and ((region_bbox[0] >= bbox[0] and region_bbox[0] <= bbox[2]) or (region_bbox[2] >= bbox[0] and region_bbox[2] <= bbox[2]) or (region_bbox[0] <= bbox[0] and region_bbox[2] >= bbox[2])) and r['class_ids'][i] != 5): r['rois'][i][2] = bbox[1] - 1 # checking for ymin now # along with y, check both for xmax and xmin if (region_bbox[1] <= bbox[3] and region_bbox[1] >= bbox[1] and ((region_bbox[0] >= bbox[0] and region_bbox[0] <= bbox[2]) or (region_bbox[2] >= bbox[0] and region_bbox[2] <= bbox[2]) or (region_bbox[0] <= bbox[0] and region_bbox[2] >= bbox[2])) and r['class_ids'][i] != 5): r['rois'][i][0] = bbox[3] + 1 if r['class_ids'][i] not in class_id_check: bbox_dict[r['class_ids'][i]] = [] class_id_check.append(r['class_ids'][i]) bbox_dict[r['class_ids'][i]].append(region_bbox) # resolving overlapping problem code # define reading order on basis of coordinates reading_order = [] for i in range(len(r['rois'])): width, height, _ = img_array.shape min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] if (min_y - 5) > width and r['class_ids'][i] == 2: min_y -= 5 if (max_y + 10) < width and r['class_ids'][i] == 2: min_y += 10 reading_order.append((min_y, min_x, max_y, max_x)) reading_order = sorted(reading_order, key=lambda reading_order: (reading_order[1], reading_order[0])) for i in range(len(reading_order)): min_y, min_x, max_y, max_x = reading_order[i] min_y = 0 i_poly = Polygon([[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]) for j in range(i + 1, len(reading_order)): min_y, min_x, max_y, max_x = reading_order[j] j_poly = Polygon([[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]) inter = i_poly.intersection(j_poly) if inter: reading_order.insert(j + 1, reading_order[i]) del reading_order[i] # Creating Reading Order object in PageXML order_group = OrderedGroupType(caption="Regions reading order", id=page_id) for i in range(len(r['rois'])): min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] if (min_y - 5) > width and r['class_ids'][i] == 2: min_y -= 5 if (max_y + 10) < width and r['class_ids'][i] == 2: min_y += 10 region_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]] if border: cut_region_polygon = border.intersection( Polygon(region_polygon)) if cut_region_polygon.is_empty: continue else: cut_region_polygon = Polygon(region_polygon) order_index = reading_order.index((min_y, min_x, max_y, max_x)) region_id = '%s_region%04d' % (page_id, i) regionRefIndex = RegionRefIndexedType(index=order_index, regionRef=region_id) order_group.add_RegionRefIndexed(regionRefIndex) reading_order_object = ReadingOrderType() reading_order_object.set_OrderedGroup(order_group) page.set_ReadingOrder(reading_order_object) for i in range(len(r['rois'])): width, height, _ = img_array.shape min_x = r['rois'][i][0] min_y = r['rois'][i][1] max_x = r['rois'][i][2] max_y = r['rois'][i][3] if (min_y - 5) > width and r['class_ids'][i] == 2: min_y -= 5 if (max_y + 10) < width and r['class_ids'][i] == 2: min_y += 10 # one change here to resolve flipped coordinates region_polygon = [[min_y, min_x], [max_y, min_x], [max_y, max_x], [min_y, max_x]] cut_region_polygon = border.intersection(Polygon(region_polygon)) if cut_region_polygon.is_empty: continue cut_region_polygon = [ j for j in zip(list(cut_region_polygon.exterior.coords.xy[0]), list(cut_region_polygon.exterior.coords.xy[1])) ][:-1] # checking whether coordinates are flipped region_polygon = coordinates_for_segment(cut_region_polygon, page_image, page_xywh) region_points = points_from_polygon(region_polygon) read_order = reading_order.index((min_y, min_x, max_y, max_x)) # this can be tested, provided whether we need previous comments or not? # resolving overlapping problem region_img = img_array[min_x:max_x, min_y: max_y] # extract from points and img_array region_img = ocrolib.array2pil(region_img) file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file( region_img, file_id + "_" + str(i), page_id=page_id, file_grp=self.output_file_grp) # ai = AlternativeImageType(filename=file_path, comments=page_xywh['features']) region_id = '%s_region%04d' % (page_id, i) coords = CoordsType(region_points) # incase of imageRegion if r['class_ids'][i] == 15: image_region = ImageRegionType( custom='readingOrder {index:' + str(read_order) + ';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]]) # image_region.add_AlternativeImage(ai) page.add_ImageRegion(image_region) continue if r['class_ids'][i] == 16: table_region = TableRegionType( custom='readingOrder {index:' + str(read_order) + ';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]]) # table_region.add_AlternativeImage(ai) page.add_TableRegion(table_region) continue if r['class_ids'][i] == 17: graphic_region = GraphicRegionType( custom='readingOrder {index:' + str(read_order) + ';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]]) # graphic_region.add_AlternativeImage(ai) page.add_GraphicRegion(graphic_region) continue textregion = TextRegionType(custom='readingOrder {index:' + str(read_order) + ';}', id=region_id, Coords=coords, type_=class_names[r['class_ids'][i]]) # textregion.add_AlternativeImage(ai) #border = page.get_Border() # if border: # border.add_TextRegion(textregion) # else: page.add_TextRegion(textregion)
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): raw = ocrolib.pil2array(page_image) flat = raw.astype("float64") # estimate skew angle and rotate if self.parameter['maxskew'] > 0: if self.parameter['parallel'] < 2: LOG.info("Estimating Skew Angle") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) flat = amax(flat) - flat flat -= amin(flat) est = flat[o0:d0 - o0, o1:d1 - o1] ma = self.parameter['maxskew'] ms = int(2 * self.parameter['maxskew'] * self.parameter['skewsteps']) angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1)) flat = interpolation.rotate(flat, angle, mode='constant', reshape=0) flat = amax(flat) - flat else: angle = 0 # self.write_angles_to_pageXML(base,angle) # estimate low and high thresholds if self.parameter['parallel'] < 2: LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image if self.parameter['parallel'] < 2: LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) deskewed = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images #LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" %(lo, hi, angle)) #TODO: Need some clarification as the results effect the following pre-processing steps. #orientation = -angle #orientation = 180 - ((180 - orientation) % 360) if angle is None: # FIXME: quick fix to prevent angle of "none" angle = 0 page.set_orientation(angle) page_xywh['features'] += ',deskewed' bin_array = array(255 * (deskewed > ocrolib.midrange(deskewed)), 'B') page_image = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(page_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page, page_image, page_coords, page_id, input_file): LOG = getLogger('OcrdAnybaseocrTiseg') if self.model: I = ocrolib.pil2array( page_image.resize((800, 1024), Image.ANTIALIAS)) I = np.array(I)[np.newaxis, :, :, :] LOG.info('I shape %s', I.shape) if len(I.shape) < 3: print('Wrong input shape. Image should have 3 channel') # get prediction #out = self.model.predict_segmentation( # inp=I, # out_fname="/tmp/out.png" #) out = self.model.predict(I) out = out.reshape((2048, 1600, 3)).argmax(axis=2) text_part = 255 * np.ones(out.shape, 'B') text_part[np.where(out == 1)] = 0 LOG.info( 'text: %d percent', 100 * (1 - np.count_nonzero(text_part) / np.prod(out.shape))) image_part = 255 * np.ones(out.shape, 'B') image_part[np.where(out == 2)] = 0 LOG.info( 'image: %d percent', 100 * (1 - np.count_nonzero(image_part) / np.prod(out.shape))) image_part = ocrolib.array2pil(image_part) text_part = ocrolib.array2pil(text_part) image_part = image_part.resize(page_image.size, Image.BICUBIC) text_part = text_part.resize(page_image.size, Image.BICUBIC) else: I = ocrolib.pil2array(page_image) if len(I.shape) > 2: I = np.mean(I, 2) I = 1 - I / I.max() rows, cols = I.shape # Generate Mask and Seed Images Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I) # Iseedfill: Union of Mask and Seed Images Iseedfill = self.pixSeedfillBinary(Imask, Iseed) # Dilation of Iseedfill mask = np.ones((3, 3)) Iseedfill = ndimage.binary_dilation(Iseedfill, mask) # Expansion of Iseedfill to become equal in size of I Iseedfill = self.expansion(Iseedfill, (rows, cols)) # Write Text and Non-Text images image_part = np.array(255 * (1 - I * Iseedfill), dtype='B') text_part = np.array(255 * (1 - I * (1 - Iseedfill)), dtype='B') LOG.info( 'text: %d percent', 100 * (1 - np.count_nonzero(text_part) / np.prod(I.shape))) LOG.info( 'image: %d percent', 100 * (1 - np.count_nonzero(image_part) / np.prod(I.shape))) image_part = ocrolib.array2pil(image_part) text_part = ocrolib.array2pil(text_part) file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file( image_part, file_id + "_img", page_id=input_file.pageId, file_grp=self.output_file_grp, ) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_coords['features'] + ',non_text')) file_path = self.workspace.save_image_file( text_part, file_id + "_txt", page_id=input_file.pageId, file_grp=self.output_file_grp, ) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_coords['features'] + ',clipped'))
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n, model): I = ocrolib.pil2array(page_image) LOG.info('image size: %s', page_image.size) if model: if len(I.shape) < 3: print('Wrong input shape. Image should have 3 channel') # get prediction out = model.predict_segmentation(inp=I, out_fname="/tmp/out.png") cv2.imwrite('out_image.png', out * (255 / 2)) text_part = np.ones(out.shape) text_part[np.where(out == 1)] = 0 image_part = np.ones(out.shape) image_part[np.where(out == 2)] = 0 image_part = array(255 * (image_part), 'B') image_part = ocrolib.array2pil(image_part) text_part = array(255 * (text_part), 'B') text_part = ocrolib.array2pil(text_part) text_part = text_part.resize(page_image.size, Image.BICUBIC) image_part = image_part.resize(page_image.size, Image.BICUBIC) else: if len(I.shape) > 2: I = np.mean(I, 2) I = 1 - I / I.max() rows, cols = I.shape # Generate Mask and Seed Images Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I) # Iseedfill: Union of Mask and Seed Images Iseedfill = self.pixSeedfillBinary(Imask, Iseed) # Dilation of Iseedfill mask = ones((3, 3)) Iseedfill = ndimage.binary_dilation(Iseedfill, mask) # Expansion of Iseedfill to become equal in size of I Iseedfill = self.expansion(Iseedfill, (rows, cols)) # Write Text and Non-Text images image_part = array((1 - I * Iseedfill), dtype=int) text_part = array((1 - I * (1 - Iseedfill)), dtype=int) bin_array = array(255 * (text_part > ocrolib.midrange(img_part)), 'B') text_part = ocrolib.array2pil(bin_array) bin_array = array(255 * (text_part > ocrolib.midrange(text_part)), 'B') image_part = ocrolib.array2pil(bin_array) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file( image_part, file_id + "_img", page_id=page_id, file_grp=self.image_grp, force=self.parameter['force']) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features'] + ',non_text')) page_xywh['features'] += ',clipped' file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file( text_part, file_id + "_txt", page_id=page_id, file_grp=self.image_grp, force=self.parameter['force']) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))
def _process_segment(self, page, filename, page_id, file_id): raw = ocrolib.read_image_gray(filename) self.dshow(raw, "input") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (page_id)) return image /= amax(image) if not self.parameter['nocheck']: check = self.check_page(amax(image) - image) if check is not None: LOG.error(input_file.pageId or input_file.ID + " SKIPPED. " + check + " (use -n to disable this check)") return # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) #base, _ = ocrolib.allsplitext(filename) #ocrolib.write_image_binary(base + ".bin.png", binarized) # ocrolib.write_image_gray(base +".nrm.png", flat) # print("########### File path : ", base+".nrm.png") # write_to_xml(base+".bin.png") # return base+".bin.png" bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B') bin_image = ocrolib.array2pil(bin_array) file_path = self.workspace.save_image_file(bin_image, file_id, page_id=page_id, file_grp=self.image_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comment="binarized"))
def _process_segment(self, page_image, page, textregion, region_xywh, page_id, input_file, n): LOG = getLogger('OcrdAnybaseocrTextline') #check for existing text lines and whether to overwrite them if textregion.get_TextLine(): if self.parameter['overwrite']: LOG.info('removing existing TextLines in region "%s"', page_id) textregion.set_TextLine([]) else: LOG.warning('keeping existing TextLines in region "%s"', page_id) return binary = ocrolib.pil2array(page_image) if len(binary.shape) > 2: binary = np.mean(binary, 2) binary = np.array(1 - binary / np.amax(binary), 'B') if self.parameter['scale'] == 0: scale = psegutils.estimate_scale(binary) else: scale = self.parameter['scale'] if np.isnan( scale) or scale > 1000.0 or scale < self.parameter['minscale']: LOG.warning(str(scale) + ": bad scale; skipping!\n") return segmentation = self.compute_segmentation(binary, scale) if np.amax(segmentation) > self.parameter['maxlines']: LOG.warning("too many lines %i; skipping!\n", (np.amax(segmentation))) return lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = np.amax(segmentation) + 1 renumber = np.zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] lines = [lines[i] for i in lsort] cleaned = ocrolib.remove_noise(binary, self.parameter['noise']) for i, l in enumerate(lines): #LOG.info('check this: ') #LOG.info(type(l.bounds)) #LOG.info(l.bounds) #line_points = np.where(l.mask==1) #hull = MultiPoint([x for x in zip(line_points[0],line_points[1])]).convex_hull #x,y = hull.exterior.coords.xy #LOG.info('hull coords x: ',x) #LOG.info('hull coords y: ',y) min_x, max_x = (l.bounds[0].start, l.bounds[0].stop) min_y, max_y = (l.bounds[1].start, l.bounds[1].stop) line_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]] #line_polygon = [x for x in zip(y, x)] line_polygon = coordinates_for_segment(line_polygon, page_image, region_xywh) line_points = points_from_polygon(line_polygon) img = cleaned[l.bounds[0], l.bounds[1]] img = np.array(255 * (img > ocrolib.midrange(img)), 'B') img = 255 - img img = ocrolib.array2pil(img) file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file( img, file_id + "_" + str(n) + "_" + str(i), page_id=page_id, file_grp=self.output_file_grp) ai = AlternativeImageType(filename=file_path, comments=region_xywh['features']) line_id = '%s_line%04d' % (page_id, i) line = TextLineType(custom='readingOrder {index:' + str(i) + ';}', id=line_id, Coords=CoordsType(line_points)) line.add_AlternativeImage(ai) textregion.add_TextLine(line)
def _process_segment(self, page_image, page, region_xywh, page_id, input_file, n): binary = ocrolib.pil2array(page_image) binary = np.array(1 - binary / np.amax(binary), 'B') if page.get_TextRegion() is None or len(page.get_TextRegion()) < 1: min_x, max_x = (0, binary.shape[0]) min_y, max_y = (0, binary.shape[1]) textregion = TextRegionType( Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) page.add_TextRegion(textregion) else: textregion = page.get_TextRegion()[-1] ocrolib.write_image_binary("test.bin.png", binary) if self.parameter['scale'] == 0: scale = psegutils.estimate_scale(binary) else: scale = self.parameter['scale'] if np.isnan( scale) or scale > 1000.0 or scale < self.parameter['minscale']: LOG.warning("%s: bad scale (%g); skipping\n" % (fname, scale)) return segmentation = self.compute_segmentation(binary, scale) if np.amax(segmentation) > self.parameter['maxlines']: LOG.warning("%s: too many lines %i", (fname, np.amax(segmentation))) return lines = psegutils.compute_lines(segmentation, scale) order = psegutils.reading_order([l.bounds for l in lines]) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs nlabels = np.amax(segmentation) + 1 renumber = np.zeros(nlabels, 'i') for i, v in enumerate(lsort): renumber[lines[v].label] = 0x010000 + (i + 1) segmentation = renumber[segmentation] lines = [lines[i] for i in lsort] cleaned = ocrolib.remove_noise(binary, self.parameter['noise']) region_xywh['features'] += ",textline" for i, l in enumerate(lines): ocrolib.write_image_binary("test.bin.png", binary[l.bounds[0], l.bounds[1]]) min_x, max_x = (l.bounds[0].start, l.bounds[0].stop) min_y, max_y = (l.bounds[1].start, l.bounds[1].stop) img = binary[l.bounds[0], l.bounds[1]] img = np.array(255 * (img > ocrolib.midrange(img)), 'B') img = ocrolib.array2pil(img) file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) if file_id == input_file.ID: file_id = concat_padded(self.image_grp, n) file_path = self.workspace.save_image_file(img, file_id + "_" + str(i), page_id=page_id, file_grp=self.image_grp) ai = AlternativeImageType(filename=file_path, comments=region_xywh['features']) line = TextLineType( Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) line.add_AlternativeImage(ai) textregion.add_TextLine(line)
def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n): LOG = getLogger('OcrdAnybaseocrBinarizer') raw = ocrolib.pil2array(page_image) if len(raw.shape) > 2: raw = np.mean(raw, 2) raw = raw.astype("float64") # perform image normalization image = raw - amin(raw) if amax(image) == amin(image): LOG.info("# image is empty: %s" % (page_id)) return image /= amax(image) # check whether the image is already effectively binarized if self.parameter['gray']: extreme = 0 else: extreme = (np.sum(image < 0.05) + np.sum(image > 0.95)) * 1.0 / np.prod(image.shape) if extreme > 0.95: comment = "no-normalization" flat = image else: comment = "" # if not, we need to flatten it by estimating the local whitelevel LOG.info("Flattening") m = interpolation.zoom(image, self.parameter['zoom']) m = filters.percentile_filter(m, self.parameter['perc'], size=(self.parameter['range'], 2)) m = filters.percentile_filter(m, self.parameter['perc'], size=(2, self.parameter['range'])) m = interpolation.zoom(m, 1.0 / self.parameter['zoom']) if self.parameter['debug'] > 0: clf() imshow(m, vmin=0, vmax=1) ginput(1, self.parameter['debug']) w, h = minimum(array(image.shape), array(m.shape)) flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1) if self.parameter['debug'] > 0: clf() imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) # estimate low and high thresholds LOG.info("Estimating Thresholds") d0, d1 = flat.shape o0, o1 = int(self.parameter['bignore'] * d0), int( self.parameter['bignore'] * d1) est = flat[o0:d0 - o0, o1:d1 - o1] if self.parameter['escale'] > 0: # by default, we use only regions that contain # significant variance; this makes the percentile # based low and high estimates more reliable e = self.parameter['escale'] v = est - filters.gaussian_filter(est, e * 20.0) v = filters.gaussian_filter(v**2, e * 20.0)**0.5 v = (v > 0.3 * amax(v)) v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1))) v = morphology.binary_dilation(v, structure=ones((1, int(e * 50)))) if self.parameter['debug'] > 0: imshow(v) ginput(1, self.parameter['debug']) est = est[v] lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo']) hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi']) # rescale the image to get the gray scale image LOG.info("Rescaling") flat -= lo flat /= (hi - lo) flat = clip(flat, 0, 1) if self.parameter['debug'] > 0: imshow(flat, vmin=0, vmax=1) ginput(1, self.parameter['debug']) binarized = 1 * (flat > self.parameter['threshold']) # output the normalized grayscale and the thresholded images # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment)) LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment)) LOG.info("writing") if self.parameter['debug'] > 0 or self.parameter['show']: clf() gray() imshow(binarized) ginput(1, max(0.1, self.parameter['debug'])) page_xywh['features'] += ',binarized' bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B') bin_image = ocrolib.array2pil(bin_array) file_id = make_file_id(input_file, self.output_file_grp) file_path = self.workspace.save_image_file( bin_image, file_id + '-IMG', page_id=page_id, file_grp=self.output_file_grp) page.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=page_xywh['features']))