def _processData(params): """ Resize image and extract mask from PAGE file """ ( img_path, out_size, out_folder, classes, line_width, line_color, build_labels, ext_mode, node_types, ) = params img_id = os.path.splitext(os.path.basename(img_path))[0] img_dir = os.path.dirname(img_path) img_data = cv2.imread(img_path) # --- resize image res_img = cv2.resize(img_data, (out_size[1], out_size[0]), interpolation=cv2.INTER_CUBIC) new_img_path = os.path.join(out_folder, img_id + ".png") cv2.imwrite(new_img_path, res_img) # --- get label if build_labels: if os.path.isfile(img_dir + "/page/" + img_id + ".xml"): xml_path = img_dir + "/page/" + img_id + ".xml" else: # logger.critical('No xml found for file {}'.format(img_path)) # --- TODO move to logger print("No xml found for file {}".format(img_path)) raise Exception("Execution stop due Critical Errors") gt_data = pageData(xml_path) gt_data.parse() # --- build lines mask if ext_mode != "R": lin_mask = gt_data.build_baseline_mask(out_size, line_color, line_width) # --- buid regions mask if ext_mode == "LR": reg_mask = gt_data.build_mask(out_size, node_types, classes) label = np.array((lin_mask, reg_mask)) elif ext_mode == "R": label = gt_data.build_mask(out_size, node_types, classes) else: label = lin_mask new_label_path = os.path.join(out_folder, img_id + ".pickle") fh = open(new_label_path, "wb") pickle.dump(label, fh, -1) fh.close() return (new_img_path, new_label_path, xml_path) return (new_img_path, None, None)
def main(): """Build a PAGE-XML file from img encoded data""" in_path = sys.argv[1] out_path = sys.argv[2] nm = re.split(r"-", sys.argv[3]) name_map = {} for k in nm: z, c = re.split(r":", k) name_map[z] = tuple(c.split(",")) name_map = {"marginalia": [0, 0, 255]} formats = ["tif", "tiff", "png", "jpg", "jpeg", "JPG", "bmp"] img_paths = [] for ext in formats: img_paths.extend(glob.glob(in_path + "/*." + ext)) img_ids = [os.path.splitext(os.path.basename(x))[0] for x in img_paths] img_data = dict(zip(img_ids, img_paths)) for img_id, img_p in img_data.items(): page = pageData(os.path.join(out_path, img_id + ".xml"), logger=None) img_name = os.path.basename(img_p) # --- open file img = cv2.imread(img_p) rows, cols, _ = img.shape page.new_page(img_name, str(rows), str(cols)) r_id = 0 for zone_name, color in name_map.items(): zones_img = np.all(img == color, axis=-1).astype(np.uint8) _, contours, hierarchy = cv2.findContours(zones_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for cnt in contours: reg_coords = "" for x in cnt.reshape(-1, 2): reg_coords = reg_coords + " {},{}".format(x[0], x[1]) text_reg = page.add_element("TextRegion", str(r_id), zone_name, reg_coords.strip()) r_id += 1 page.save_xml()
def gen_page(self,img_id,data, reg_list=None, out_folder='./', approx_alg=None, num_segments=None): """ """ self.approx_alg = self.approx_alg if approx_alg==None else approx_alg self.num_segments = self.num_segments if num_segments==None else num_segments self.logger.debug('Gen PAGE for image: {}'.format(img_id)) #--- sym link to original image #--- TODO: check if orig image exist img_name = os.path.basename(self.img_data[img_id]) symlink_force(os.path.realpath(self.img_data[img_id]), os.path.join(out_folder,img_name)) o_img = cv2.imread(self.img_data[img_id]) (o_rows, o_cols, _) = o_img.shape o_max = max(o_rows,o_cols) o_min = min(o_rows,o_cols) cScale = np.array([o_cols/self.out_size[1], o_rows/self.out_size[0]]) page = pageData(os.path.join(out_folder, 'page', img_id + '.xml'), logger=self.logger) self.hyp_xml_list.append(page.filepath) self.hyp_xml_list.sort() page.new_page(img_name, str(o_rows), str(o_cols)) #### if self.out_type == 'C': if self.ext_mode == 'L': lines = data[0].astype(np.uint8) reg_list= ['full_page'] colors = {'full_page':0} r_data = np.zeros(lines.shape, dtype=np.uint8) elif self.ext_mode == 'R': r_data = data[0] lines = np.zeros(r_data.shape, dtype=np.uint8) colors = self.classes elif self.ext_mode == 'LR': lines = data[0].astype(np.uint8) r_data = data[1] colors = self.classes else: pass elif self.out_type == 'R': if self.ext_mode == 'L': l_color = (-1 - ((self.line_color*(2/255))-1))/2 lines = np.zeros(data[0].shape, dtype = np.uint8) lines[data[0] >= l_color] = 1 reg_list= ['full_page'] colors = {'full_page':128} r_data = np.zeros(lines.shape, dtype=np.uint8) elif self.ext_mode == 'R': r_data = data[1] colors = self.classes lines = np.zeros(r_data.shape, dtype=np.uint8) elif self.ext_mode == 'LR': l_color = (-1 - ((self.line_color*(2/255))-1))/2 lines = np.zeros(data[0].shape, dtype = np.uint8) lines[data[0] >= l_color] = 1 r_data = data[1] colors = self.classes else: pass else: pass reg_mask = np.zeros(r_data.shape,dtype='uint8') lin_mask = np.zeros(lines.shape,dtype='uint8') r_id = 0 kernel = np.ones((5,5),np.uint8) #--- get regions and lines for each class for reg in reg_list: r_color = colors[reg] #--- fill the array is faster then create a new one or mult by 0 reg_mask.fill(0) if self.out_type == 'R': lim_inf = ((r_color - self.th_span)*(2/255)) - 1 lim_sup = ((r_color + self.th_span)*(2/255)) - 1 reg_mask[np.where((r_data > lim_inf) & (r_data < lim_sup))] = 1 elif self.out_type == 'C': reg_mask[r_data == r_color] = 1 else: pass _ , contours, hierarchy = cv2.findContours(reg_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for cnt in contours: #--- remove small objects if(cnt.shape[0] < 4): continue if(cv2.contourArea(cnt) < 0.01*self.out_size[0]): continue #--- get lines inside the region lin_mask.fill(0) rect = cv2.minAreaRect(cnt) #--- soft a bit the region to prevent spikes epsilon = 0.005*cv2.arcLength(cnt,True) approx = cv2.approxPolyDP(cnt,epsilon,True) #box = np.array((rect[0][0], rect[0][1], rect[1][0], rect[1][1])).astype(int) r_id = r_id + 1 approx= (approx*cScale).astype('int32') reg_coords = '' for x in approx.reshape(-1,2): reg_coords = reg_coords + " {},{}".format(x[0],x[1]) if not self.ext_mode == 'R': cv2.fillConvexPoly(lin_mask,points=cnt, color=(1,1,1)) lin_mask = cv2.erode(lin_mask,kernel,iterations = 1) lin_mask = cv2.dilate(lin_mask,kernel,iterations = 1) reg_lines = lines * lin_mask #--- search for the lines _, l_cont, l_hier = cv2.findContours(reg_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if (len(l_cont) == 0): continue #--- Add region to XML only is there is some line text_reg = page.add_element('TextRegion', str(r_id), reg, reg_coords.strip()) n_lines = 0 for l_id,l_cnt in enumerate(l_cont): if(l_cnt.shape[0] < 4): continue if (cv2.contourArea(l_cnt) < 0.01*self.out_size[0]): continue #--- convert to convexHull if poly is not convex if (not cv2.isContourConvex(l_cnt)): l_cnt = cv2.convexHull(l_cnt) lin_coords = '' l_cnt = (l_cnt*cScale).astype('int32') for l_x in l_cnt.reshape(-1,2): lin_coords = lin_coords + " {},{}".format(l_x[0],l_x[1]) (is_line, approx_lin) = self._get_baseline(o_img, l_cnt) if is_line == False: continue text_line = page.add_element('TextLine', str(l_id) + '_' + str(r_id), reg, lin_coords.strip(), parent=text_reg) baseline = pa.points_to_str(approx_lin) page.add_baseline(baseline, text_line) n_lines += 1 #--- remove regions without text lines if n_lines == 0: page.remove_element(text_reg) else: text_reg = page.add_element('TextRegion', str(r_id), reg, reg_coords.strip()) page.save_xml()
def _extract_images_from_page(self, page_name): """ Args: page_name (string): The name of the page to extract all the patches Returns: pandas dataframe: with the coordinates of the patches as well as the filename used to store the patch """ logging.debug(f'page : {page_name}') x_arr_min = [] x_arr_max = [] y_arr_min = [] y_arr_max = [] filename_arr = [] page_name_arr = [] collumn_number = [] patch_number = [] MIN_LENGTH_OF_PATH = 50 xml_path = os.path.join(self.xml_directory, page_name + ".xml") image_path = os.path.join(self.image_directory, page_name + ".jpg") #line detection def load_image(path: str): return cv2.imread(path) def show_image(img, title="") -> None: return #plt.figure(figsize=(10,15)) #plt.imshow(img, cmap='Greys_r') #plt.title(title) #plt.show() def remove_connected_components(img, min_size=50): number_of_labels, labels, stats, centroids = cv2.connectedComponentsWithStats( img, 8, cv2.CV_32S) sizes = stats[ 1:, -1] # first index represents the background and the last column the total area in pixels number_of_labels = number_of_labels - 1 img_cleaned = np.full(img.shape, 0) for i in range(0, number_of_labels): if sizes[i] >= min_size: img_cleaned[labels == i + 1] = 255 else: number_of_labels = number_of_labels - 1 return img_cleaned def binarize_image(img, otsu=True): img_binarized = None if otsu: threshold, img_binarized = cv2.threshold( img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) else: threshold, img_binarized = cv2.threshold( img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) return img_binarized def _get_kernel(theta) -> float: ksize = 31 return cv2.getGaborKernel((ksize, ksize), 4.0, theta, 10.0, 0.5, 0, ktype=cv2.CV_32F) def filter_image(img, theta=np.pi): kernel = _get_kernel(theta) return cv2.filter2D(img, -1, kernel) def invert(img): return cv2.bitwise_not(img) #LOADING IMG connected_components_threshold = 600 img_og = load_image(image_path) image = mpimg.imread(image_path) #LOADING PAGE page = xmlPAGE.pageData(xml_path) page.parse() imgBoundingBox = page.build_baseline_coordinates() img = cv2.cvtColor(img_og, cv2.COLOR_BGR2GRAY) show_image(img, "Original") #Finding Region of Intreset ROI: x_min_global = 6000 x_max_global = -1 y_min_global = 6000 y_max_global = -1 for i in imgBoundingBox: for j in i[0]: x_max_global = max(j[0][0], x_max_global) x_min_global = min(j[0][0], x_min_global) y_max_global = max(j[0][1], y_max_global) y_min_global = min(j[0][1], y_min_global) x_min_global = max(300, x_min_global) x_max_global = min(4000, x_max_global) def ROILineMask(img): connected_components_threshold = 500 img_filtered = filter_image(img, theta=np.pi) img_vertical_binarized = binarize_image(img_filtered.copy()) img_vertical_binarized = invert(img_vertical_binarized) img_cleaned_cc = remove_connected_components( img_vertical_binarized.copy(), connected_components_threshold) #show_image(img_cleaned_cc, "adapt threshold cc") img_gg_blur = cv2.blur(img_cleaned_cc, (20, 20)) _, img_bin = cv2.threshold(np.float32(img_gg_blur), 20, 255, cv2.THRESH_BINARY) rows, cols = img_bin.shape M = np.float32([[1, 0, 0], [0, 1, -150]]) dst = cv2.warpAffine(img_bin, M, (cols, rows)) res = cv2.bitwise_or(img_bin, dst) #show_image(res, "translationOR") connected_components_threshold = 50000 img_col = remove_connected_components( np.uint8(res).copy(), connected_components_threshold) show_image(img_col, "cleanup") return img_col y_min_global_margin = -100 y_max_global_margin = 150 logging.debug( f'ROILineMask input coordinates {y_min_global+y_min_global_margin}:{y_max_global+y_max_global_margin}, {x_min_global}:{x_max_global}' ) img_col_ROI = ROILineMask( img[y_min_global + y_min_global_margin:y_max_global + y_max_global_margin, x_min_global:x_max_global]) #img = img[y_min_global-100:y_max_global+150, x_min_global:x_max_global] #show_image(img, "ROI") def x_ogToROICoordinates(og_x): return max(0, og_x - x_min_global) def y_ogToROICoordinates(og_y): return og_y - (y_min_global + y_min_global_margin) #img_filtered = filter_image(img, theta=np.pi) #img_vertical_binarized = binarize_image(img_filtered.copy()) #img_vertical_binarized = invert(img_vertical_binarized) #img_cleaned_cc = remove_connected_components(img_vertical_binarized.copy(), connected_components_threshold) #show_image(img_cleaned_cc, "cleaned") if not (os.path.isdir(self.save_directory)): os.mkdir(self.save_directory) patch_height_above = -100 patch_height_below = 20 for i in imgBoundingBox: x_min = 6000 x_max = -1 y_min = 6000 y_max = -1 for j in i[0]: x_max = max(j[0][0], x_max) x_min = min(j[0][0], x_min) y_max = max(j[0][1], y_max) y_min = min(j[0][1], y_min) og_patch_img = image[y_min + patch_height_above:y_max + patch_height_below, x_min:x_max] #show_image(og_patch_img, f'patch_{x_min}_{x_max}_{y_min}_{y_max}') #show_image(img_cleaned_cc[y_min+patch_height_above:y_max+patch_height_below, x_min:x_max], "vert lines") show_image( np.uint8(img_col_ROI) [y_ogToROICoordinates(y_min + patch_height_above): y_ogToROICoordinates(y_max + patch_height_below), :], "Line") number_of_labels, labels, stats, centroids = cv2.connectedComponentsWithStats( np.uint8(img_col_ROI) [y_ogToROICoordinates(y_min + patch_height_above): y_ogToROICoordinates(y_max + patch_height_below), :], 8, cv2.CV_32S) centroid_x_locations = centroids[1:, 0] centroid_x_locations.sort() #print(f"centroid x location : {centroid_x_locations}") #Locations of lines x-coord number_of_labels, labels, stats, centroids = cv2.connectedComponentsWithStats( np.uint8(img_col_ROI) [y_ogToROICoordinates(y_min + patch_height_above): y_ogToROICoordinates(y_max + patch_height_below), x_ogToROICoordinates(x_min):x_ogToROICoordinates(x_max)], 8, cv2.CV_32S) #print(f"centroidis raw location = {centroids[:,0].astype(int)}") cut_locations = np.append(centroids[1:, 0].astype(int), [0, x_max - x_min], axis=0) + x_min - x_min_global cut_locations.sort() show_image( img_col_ROI[ y_ogToROICoordinates(y_min + patch_height_above): y_ogToROICoordinates(y_max + patch_height_below), x_ogToROICoordinates(x_min):x_ogToROICoordinates(x_max)], "SIMPLE_PATH") #print(f"{x_min_global=}, {x_min=}") #print(f"{cut_locations=}") for c in range(1, len(cut_locations[1:]) + 1): #print("patches") if (cut_locations[c] - cut_locations[c - 1] > MIN_LENGTH_OF_PATH): locationOfPatch = cut_locations[ c - 1] + (cut_locations[c] - cut_locations[c - 1]) / 2 #print(f"location of patch={locationOfPatch}") col_location = np.searchsorted(centroid_x_locations, locationOfPatch) x_min_temp = cut_locations[c - 1] + x_min_global x_max_temp = cut_locations[c] + x_min_global y_min_temp = y_min + patch_height_above y_max_temp = y_max + patch_height_below output_img = image[y_min_temp:y_max_temp, x_min_temp:x_max_temp] x_arr_max.append(x_max_temp) x_arr_min.append(x_min_temp) y_arr_min.append(y_min_temp) y_arr_max.append(y_max_temp) collumn_number.append(col_location) patch_number.append(c) filename = f'{page_name}_{x_min_temp}_{x_max_temp}_{y_min_temp}_{y_max_temp}_{c}_{col_location}.jpg' filename_arr.append(filename) page_name_arr.append(page_name) plt.imsave(os.path.join(self.save_directory, filename), output_img) return pd.DataFrame({ "x_min": x_arr_min, "x_max": x_arr_max, "y_min": y_arr_min, "y_max": y_arr_max, "collumn_number": collumn_number, "patch_number": patch_number, "filename": filename_arr, "page_name": page_name_arr })
def gen_page( self, img_id, data, reg_list=None, out_folder="./", approx_alg=None, num_segments=None, ): """ """ self.approx_alg = self.opts.approx_alg if approx_alg == None else approx_alg self.num_segments = ( self.opts.num_segments if num_segments == None else num_segments ) self.logger.debug("Gen PAGE for image: {}".format(img_id)) # --- sym link to original image # --- TODO: check if orig image exist img_name = os.path.basename(self.img_data[img_id]) symlink_force( os.path.realpath(self.img_data[img_id]), os.path.join(out_folder, img_name) ) o_img = cv2.imread(self.img_data[img_id]) (o_rows, o_cols, _) = o_img.shape o_max = max(o_rows, o_cols) o_min = min(o_rows, o_cols) cScale = np.array( [o_cols / self.opts.img_size[1], o_rows / self.opts.img_size[0]] ) page = pageData( os.path.join(out_folder, "page", img_id + ".xml"), logger=self.logger ) self.hyp_xml_list.append(page.filepath) self.hyp_xml_list.sort() page.new_page(img_name, str(o_rows), str(o_cols)) #### if self.opts.net_out_type == "C": if self.opts.out_mode == "L": lines = data[0].astype(np.uint8) reg_list = ["full_page"] colors = {"full_page": 0} r_data = np.zeros(lines.shape, dtype=np.uint8) elif self.opts.out_mode == "R": r_data = data[0] lines = np.zeros(r_data.shape, dtype=np.uint8) colors = self.opts.regions_colors elif self.opts.out_mode == "LR": lines = data[0].astype(np.uint8) #lines = data[0] r_data = data[1] colors = self.opts.regions_colors else: pass elif self.opts.net_out_type == "R": if self.opts.out_mode == "L": l_color = (-1 - ((self.line_color * (2 / 255)) - 1)) / 2 lines = np.zeros(data[0].shape, dtype=np.uint8) lines[data[0] >= l_color] = 1 reg_list = ["full_page"] colors = {"full_page": 128} r_data = np.zeros(lines.shape, dtype=np.uint8) elif self.opts.out_mode == "R": r_data = data[1] colors = self.opts.regions_colors lines = np.zeros(r_data.shape, dtype=np.uint8) elif self.opts.out_mode == "LR": l_color = (-1 - ((self.line_color * (2 / 255)) - 1)) / 2 lines = np.zeros(data[0].shape, dtype=np.uint8) lines[data[0] >= l_color] = 1 r_data = data[1] colors = self.opts.regions_colors else: pass else: pass reg_mask = np.zeros(r_data.shape, dtype="uint8") lin_mask = np.zeros(lines.shape, dtype="uint8") r_id = 0 kernel = np.ones((5, 5), np.uint8) if self.opts.line_alg == 'external' and self.opts.net_out_type != "R": cv2.imwrite(os.path.join(out_folder, "page", img_id + ".png"), cv2.resize(np.abs(lines-1).astype(np.uint8)*255, (o_cols,o_rows), interpolation = cv2.INTER_NEAREST) ) # --- get regions and lines for each class for reg in reg_list: r_color = colors[reg] r_type = self.opts.region_types[reg] # --- fill the array is faster then create a new one or mult by 0 reg_mask.fill(0) if self.opts.net_out_type == "R": lim_inf = ((r_color - self.th_span) * (2 / 255)) - 1 lim_sup = ((r_color + self.th_span) * (2 / 255)) - 1 reg_mask[np.where((r_data > lim_inf) & (r_data < lim_sup))] = 1 elif self.opts.net_out_type == "C": reg_mask[r_data == r_color] = 1 else: pass #_, contours, hierarchy = cv2.findContours( # reg_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE #) res_ = cv2.findContours( reg_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE ) if len(res_) == 2: contours, hierarchy = res_ else: _, contours, hierarchy = res_ for cnt in contours: # --- remove small objects if cnt.shape[0] < 4: continue if cv2.contourArea(cnt) < self.opts.min_area * self.opts.img_size[0]: continue rect = cv2.minAreaRect(cnt) # --- soft a bit the region to prevent spikes epsilon = 0.005 * cv2.arcLength(cnt, True) approx = cv2.approxPolyDP(cnt, epsilon, True) # box = np.array((rect[0][0], rect[0][1], rect[1][0], rect[1][1])).astype(int) r_id = r_id + 1 approx = (approx * cScale).astype("int32") reg_coords = "" for x in approx.reshape(-1, 2): reg_coords = reg_coords + " {},{}".format(x[0], x[1]) if ( self.opts.nontext_regions == None or reg not in self.opts.nontext_regions ): # --- get lines inside the region lin_mask.fill(0) if not self.opts.out_mode == "R" and self.opts.line_alg != "external": cv2.fillConvexPoly(lin_mask, points=cnt, color=(1, 1, 1)) lin_mask = cv2.erode(lin_mask, kernel, iterations=1) lin_mask = cv2.dilate(lin_mask, kernel, iterations=1) reg_lines = lines * lin_mask # --- search for the lines #_, l_cont, l_hier = cv2.findContours( # reg_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE #) resl_ = cv2.findContours( reg_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE ) if len(resl_) ==2: l_cont, l_hier = resl_ else: _, l_cont, l_hier = resl_ if len(l_cont) == 0: continue # --- Add region to XML only is there is some line uuid = ''.join(random.choice(self.validValues) for _ in range(4)) text_reg = page.add_element( r_type, "r" + uuid + "_" +str(r_id), reg, reg_coords.strip() ) n_lines = 0 for l_id, l_cnt in enumerate(l_cont): if l_cnt.shape[0] < 4: continue if cv2.contourArea(l_cnt) < 0.01 * self.opts.img_size[0]: continue # --- convert to convexHull if poly is not convex if not cv2.isContourConvex(l_cnt): l_cnt = cv2.convexHull(l_cnt) lin_coords = "" l_cnt = (l_cnt * cScale).astype("int32") (is_line, approx_lin) = self._get_baseline(o_img, l_cnt) if is_line == False: continue is_line, l_cnt = build_baseline_offset( approx_lin, offset=self.opts.line_offset ) if is_line == False: continue for l_x in l_cnt.reshape(-1, 2): lin_coords = lin_coords + " {},{}".format( l_x[0], l_x[1] ) uuid = ''.join(random.choice(self.validValues) for _ in range(4)) text_line = page.add_element( "TextLine", "l" + uuid + "_" + str(l_id), reg, lin_coords.strip(), parent=text_reg, ) baseline = pa.points_to_str(approx_lin) page.add_baseline(baseline, text_line) n_lines += 1 # --- remove regions without text lines if n_lines == 0: page.remove_element(text_reg) else: uuid = ''.join(random.choice(self.validValues) for _ in range(4)) text_reg = page.add_element( r_type, "r" + uuid + "_" + str(r_id), reg, reg_coords.strip() ) else: uuid = ''.join(random.choice(self.validValues) for _ in range(4)) text_reg = page.add_element( r_type, "r" + uuid + "_" + str(r_id), reg, reg_coords.strip() ) page.save_xml()
def gen_page(self, in_img_path, line_mask, id): in_img = cv2.imread(in_img_path) (in_img_rows, in_img_cols, _) = in_img.shape # print('line_mask.shape:', line_mask.shape) cScale = np.array([ in_img_cols / line_mask.shape[1], in_img_rows / line_mask.shape[0] ]) page = pageData(os.path.join('out', id + ".xml")) page.new_page(os.path.basename(in_img_path), str(in_img_rows), str(in_img_cols)) kernel = np.ones((5, 5), np.uint8) validValues = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789' lines = line_mask.copy() lines[line_mask > 0.1] = 1 lines = lines.astype(np.uint8) # plt.axis("off") # plt.imshow(lines, cmap='gray') # plt.show() r_id = 0 lin_mask = np.zeros(line_mask.shape, dtype="uint8") reg_mask = np.ones(line_mask.shape, dtype="uint8") res_ = cv2.findContours(reg_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if len(res_) == 2: contours, hierarchy = res_ else: _, contours, hierarchy = res_ for cnt in contours: min_area = 0.01 # --- remove small objects if cnt.shape[0] < 4: continue if cv2.contourArea(cnt) < min_area * line_mask.shape[0]: continue rect = cv2.minAreaRect(cnt) # --- soft a bit the region to prevent spikes epsilon = 0.005 * cv2.arcLength(cnt, True) approx = cv2.approxPolyDP(cnt, epsilon, True) # box = np.array((rect[0][0], rect[0][1], rect[1][0], rect[1][1])).astype(int) r_id = r_id + 1 approx = (approx * cScale).astype("int32") reg_coords = "" for x in approx.reshape(-1, 2): reg_coords = reg_coords + " {},{}".format(x[0], x[1]) cv2.fillConvexPoly(lin_mask, points=cnt, color=(1, 1, 1)) lin_mask = cv2.erode(lin_mask, kernel, iterations=1) lin_mask = cv2.dilate(lin_mask, kernel, iterations=1) reg_lines = lines * lin_mask resl_ = cv2.findContours(reg_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if len(resl_) == 2: l_cont, l_hier = resl_ else: _, l_cont, l_hier = resl_ # IMPORTANT l_cont, l_hier if len(l_cont) == 0: continue # --- Add region to XML only is there is some line uuid = ''.join(random.choice(validValues) for _ in range(4)) text_reg = page.add_element('TextRegion', "r" + uuid + "_" + str(r_id), 'full_page', reg_coords.strip()) n_lines = 0 for l_id, l_cnt in enumerate(l_cont): if l_cnt.shape[0] < 4: continue if cv2.contourArea(l_cnt) < 0.01 * line_mask.shape[0]: continue # --- convert to convexHull if poly is not convex if not cv2.isContourConvex(l_cnt): l_cnt = cv2.convexHull(l_cnt) lin_coords = "" l_cnt = (l_cnt * cScale).astype("int32") # IMPORTANT (is_line, approx_lin) = self._get_baseline(in_img, l_cnt) if is_line == False: continue is_line, l_cnt = build_baseline_offset(approx_lin, offset=50) if is_line == False: continue for l_x in l_cnt.reshape(-1, 2): lin_coords = lin_coords + " {},{}".format(l_x[0], l_x[1]) uuid = ''.join(random.choice(validValues) for _ in range(4)) text_line = page.add_element( "TextLine", "l" + uuid + "_" + str(l_id), 'full_page', lin_coords.strip(), parent=text_reg, ) # IMPORTANT baseline = pa.points_to_str(approx_lin) page.add_baseline(baseline, text_line) n_lines += 1 page.save_xml()
def zone_map(hyp, target, img, alpha=0, dist=lambda r,h:int(r==h)): """ Computes ZoneMap metric from: O. Galibert, J. Kahn and I. Oparin, The zonemap metric for page segmentation and area classification in scanned documents, 2014 IEEE International Conference on Image Processing (ICIP), Paris, 2014, pp. 2594-2598. Inputs: hyp: hypoteses data (PAGE-XML object) target: reference data (PAGE-XML object) img: pointer to image file. If img is not binary img=Otsu(img) alpha: classification error weigth [0,1] dist: difference function between zones [0,1], default: dist(h,r)=lambda(0 if h==r; 1 else) """ if os.path.isfile(img): img = cv2.imread(img,0) if np.max(img) > 1: _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) img = np.abs(1-img).astype(np.uint8) hyp_data = pageData(hyp) hyp_data.parse() tar_data = pageData(target) tar_data.parse() Hzones = hyp_data.get_zones(["TextRegion"]) Rzones = tar_data.get_zones(["TextRegion"]) #--- Mapping tmp_H = np.zeros(img.shape,dtype=np.uint8) tmp_R = np.zeros(img.shape,dtype=np.uint8) force = np.zeros((len(Rzones),len(Hzones))) area_R = 0 for i,R in enumerate(sorted(Rzones.keys())): tmp_R.fill(0) cv2.fillConvexPoly(tmp_R, Rzones[R]['coords'].astype(np.int), 1) Rzones[R]['area'] = np.logical_and(img==1, img==tmp_R).sum() area_R += Rzones[R]['area'] for j,H in enumerate(sorted(Hzones.keys())): tmp_H.fill(0) cv2.fillConvexPoly(tmp_H, Hzones[H]['coords'].astype(np.int),1) #--- Intersection I = img[np.where(np.logical_and(tmp_R == tmp_H,tmp_R ==1))].sum() #I = img[np.where(tmp_R == tmp_H)].sum() Hzones[H]['area'] = np.logical_and(tmp_H==1, img==1).sum() force[i][j] = I*((1/Rzones[R]['area'])+(1/Hzones[H]['area'])) print(force) #--- get and sort non-zero links nz = force != 0 s_index = np.unravel_index(np.where(nz, force, np.nan).argsort(axis=None)[:nz.sum()],force.shape) s_index= (s_index[0][::-1], s_index[1][::-1]) #--- make groups #--- Manage False Alarm and Miss groups #--- False Alarm: fa = np.where(force.sum(axis=0)==0) g_id = 1 G = {} for f in fa[0]: G[g_id] = ([],[f]) #G[g_id] = ([],[Hzones[f]['id']]) g_id += 1 #--- Miss mi = np.where(force.sum(axis=1)==0) L = (np.zeros(len(Rzones), dtype=np.uint8),np.zeros(len(Hzones),dtype=np.uint8)) for m in mi[0]: G[g_id] = ([m],[]) #G[g_id] = ([Rzones[m]['id']],[]) g_id += 1 for r_ix,h_ix in zip(s_index[0],s_index[1]): print("H:{} R:{} L:{}".format(Hzones[h_ix]['id'],Rzones[r_ix]['id'],force[r_ix,h_ix])) if L[0][r_ix] > 0 and L[1][h_ix] > 0: #--- do not add to any group pass elif L[0][r_ix] == 0 and L[1][h_ix] ==0: #--- create new group G[g_id] = ([r_ix],([h_ix])) #G[g_id] = ([Rzones[r_ix]['id']],[Hzones[h_ix]['id']]) L[0][r_ix] = g_id L[1][h_ix] = g_id g_id += 1 elif L[0][r_ix] == 0: #--- only ref is not assigned p_id = L[1][h_ix] G[p_id][0].append(r_ix) #G[p_id][0].append(Rzones[r_ix]['id']) L[0][r_ix] = p_id else: #--- only hyp is not assigned p_id = L[0][r_ix] G[p_id][1].append(h_ix) #G[p_id][1].append(Hzones[h_ix]['id']) L[1][h_ix] = p_id print(G) #--- Second stage, error calcualtion Ezm = 0 for gr_key in G.keys(): #--- handle false alarm: if len(G[gr_key][0]) == 0: Es = Hzones[G[gr_key][1][0]]['area'] #--- Ec = Es, E=(1-a)Es + aEc == Ec Ezm += Es #--- handle miss: elif len(G[gr_key][1]) == 0: Es = Rzones[G[gr_key][0][0]]['area'] Ezm += Es #--- handle match elif len(G[gr_key][0]) == 1 and len(G[gr_key][1]) == 1: tmp_R.fill(0) cv2.fillConvexPoly(tmp_R, Rzones[G[gr_key][0][0]]['coords'].astype(np.int), 1) tmp_H.fill(0) cv2.fillConvexPoly(tmp_H, Hzones[G[gr_key][1][0]]['coords'].astype(np.int), 1) reg_intersection = (img*tmp_R)[np.where(tmp_R == tmp_H)].sum() Es = Rzones[G[gr_key][0][0]]['area'] + Hzones[G[gr_key][1][0]]['area'] - \ (2* reg_intersection) Ec = (dist(Rzones[G[gr_key][0][0]]['type'],Hzones[G[gr_key][1][0]]['type']) * reg_intersection) + Es Ezm += ((1-alpha)*Es)+(alpha*Ec) #--- handle split elif len(G[gr_key][0]) == 1 and len(G[gr_key][1]) > 1: tmp_R.fill(0) cv2.fillConvexPoly(tmp_R, Rzones[G[gr_key][0][0]]['coords'].astype(np.int), 1) tmp_H.fill(0) tmp_H_o = tmp_H.copy() for i,h in enumerate(G[gr_key][1]): print(h) cv2.fillConvexPoly(tmp_H, Hzones[h]['coords'].astype(np.int), 1 + i) cv2.fillConvexPoly(tmp_H_o, Hzones[h]['coords'].astype(np.int), 1) #--- handle sub-zones by configuration #--- Miss Error i.e. in R but not in any H #--- E = Area(z) Er = (img*tmp_R)[np.where(tmp_R != tmp_H_o)].sum() #--- False detection error i.e. H but not in R Er += (img*tmp_H_o)[np.where(tmp_R != tmp_H_o)].sum() #--- Segmentation error and Correct parts Ec = 0 #--- handle merge elif len(G[gr_key][0]) > 1 and len(G[gr_key][1]) == 1: pass print(Ezm/area_R)
def compute_metrics(hyp, target, opts, logger=None): """ """ #logger = logging.getLogger(__name__) if logger==None else logger if logger == None: logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(module)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) num_samples = len(target) metrics = {} #--- force dict to appear in the same order always summary = OrderedDict() if opts.out_mode == 'L' or opts.out_mode == 'LR': metrics.update({ 'p_bl': np.empty(num_samples, dtype=np.float), 'r_bl': np.empty(num_samples, dtype=np.float), 'f1_bl': np.empty(num_samples, dtype=np.float) }) t_file = '/tmp/' #--- sufix must be added because for some extrange reason #--- Transkribus tool need it t_fd, t_path = tempfile.mkstemp(suffix='.lst') h_fd, h_path = tempfile.mkstemp(suffix='.lst') try: with os.fdopen(t_fd, 'w') as tmp: tmp.write('\n'.join(target)) with os.fdopen(h_fd, 'w') as tmp: tmp.write('\n'.join(hyp)) evaltool = os.path.dirname(__file__) + '/baselineEvaluator.jar' cmd = subprocess.Popen( ['java', '-jar', evaltool, '-no_s', t_path, h_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) bl_results, _err = cmd.communicate() logger.debug(_err) finally: os.remove(t_path) os.remove(h_path) logger.info("-" * 10 + "BASELINE EVALUATION RESULTS" + "-" * 10) logger.debug(bl_results) bl_results = bl_results.split('\n') for i in range(num_samples): res = bl_results[17 + i].split(',') metrics['p_bl'][i] = float(res[0]) metrics['r_bl'][i] = float(res[1]) metrics['f1_bl'][i] = float(res[2]) logger.info(bl_results[-6]) logger.info(bl_results[-5]) logger.info(bl_results[-4]) summary['p_bl'] = bl_results[-6] summary['r_bl'] = bl_results[-5] summary['f1_bl'] = bl_results[-4] if opts.out_mode == 'R' or opts.out_mode == 'LR': metrics.update({ 'p_acc': np.empty(num_samples, dtype=np.float), 'm_acc': np.empty(num_samples, dtype=np.float), 'm_iu': np.empty(num_samples, dtype=np.float), 'f_iu': np.empty(num_samples, dtype=np.float) }) per_class_m = np.zeros( (num_samples, np.unique(opts.regions_colors.values()).size + 1), dtype=np.float) logger.info("-" * 10 + "REGIONS EVALUATION RESULTS" + "-" * 10) logger.debug("p_cc,m_acc,m_iu,f_iu,target_file,hyp_file") hyp = np.sort(hyp) target = np.sort(target) for i, (h, t) in enumerate(zip(hyp, target)): target_data = pageData(t) target_data.parse() img_size = np.array(target_data.get_size()) target_mask = target_data.build_mask(img_size, 'TextRegion', opts.regions_colors) hyp_data = pageData(h) hyp_data.parse() hyp_mask = hyp_data.build_mask(img_size, 'TextRegion', opts.regions_colors) metrics['p_acc'][i] = ev.pixel_accuraccy(hyp_mask, target_mask) metrics['m_acc'][i] = ev.mean_accuraccy(hyp_mask, target_mask) metrics['m_iu'][i] = ev.mean_IU(hyp_mask, target_mask) metrics['f_iu'][i] = ev.freq_weighted_IU(hyp_mask, target_mask) tmp = ev.per_class_accuraccy(hyp_mask, target_mask) for m, c in zip(tmp[0], tmp[1]): per_class_m[i, c] = m #per_class_m['pc_p_acc'][i] = ev.per_class_accuraccy(hyp_mask,target_mask) #per_class_m += ev.per_class_accuraccy(hyp_mask,target_mask)[0] logger.debug('{:.4f},{:.4f},{:.4f} {:.4f},{},{}'.format( metrics['p_acc'][i], metrics['m_acc'][i], metrics['m_iu'][i], metrics['f_iu'][i], t, h)) #t_polygons = target_data.get_polygons('TextRegion') #h_polygons = hyp_data.get_polygons('TextRegion') #metrics['m_struct'][i] = ev.matching_structure(h_polygons,t_polygons) #ev.matching_structure(h_polygons,t_polygons) summary.update({m: metrics[m].sum() / num_samples for m in metrics}) logger.info("Pixel accuracy: {}".format(summary['p_acc'])) logger.info("Mean accuracy: {}".format(summary['m_acc'])) logger.info("Mean IU: {}".format(summary['m_iu'])) logger.info("freq weighted IU: {}".format(summary['f_iu'])) logger.info("Per_class Pixel accuracy: {}".format( per_class_m.sum(axis=0) / num_samples)) mm = per_class_m.sum(axis=0) / num_samples #print("BG:{}".format(mm[0])) #for n,c in opts.regions_colors.items(): # print("{}:{}".format(n,mm[c])) #--- return averages only return summary