def text_line_segmentation_NN(image, scale, mask=None, use_binary=False, debug_image=None, offset=(0, 0)): h, w = image.shape[:2] if debug_image is None: debug_image = image if len(debug_image.shape) < 3: debug_image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) if len(image.shape) > 2: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if use_binary: image = morph.thresh_sauvola(image, k=0.15) * 255 if mask is not None: image = (image + mask * 255).astype('uint8') labels, _ = morph.label(image == 0) objects = morph.find_objects(labels) height_map = [ sl.height(o) for o in objects if sl.height(o) > 6 and sl.height(o) < 100 and sl.aspect_normalized(o) < 8 ] avg_h = max(np.nan_to_num(np.mean(height_map)), scale * 0.6) block = Block(image, avg_h) words = block.getWordBoundingBoxes() lines = filter_and_merge_overlap_boxes(words, max(avg_h, scale * 1.2) * 0.3) lines = filter_and_merge_overlap_boxes(lines, max(avg_h, scale * 1.2) * 0.3, use_merge_same_line_only=True) offset_x, offset_y = offset # filter line by size lines = [ l for l in lines if l[3] - l[1] > avg_h * 0.3 and l[3] - l[1] < avg_h * 2.5 and l[2] - l[0] > avg_h * 0.5 ] lines = [sl.pad_box(l, 0, (h, w)) for l in lines] lines = [[ l[0] + offset_x, l[1] + offset_y, l[2] + offset_x, l[3] + offset_y ] for l in lines] debug_image = block.paint(None) return lines, debug_image
def remove_vlines(binary, gray, scale, maxsize=10): labels, _ = morph.label(binary) objects = morph.find_objects(labels) for i, b in enumerate(objects): if (sl.width(b) <= 20 and sl.height(b) > 200) or (sl.width(b) <= 45 and sl.height(b) > 500): gray[b][labels[b] == i + 1] = 140 # gray[:,b[1].start:b[1].stop]=140 labels[b][labels[b] == i + 1] = 0 return array(labels != 0, "B")
def remove_vlines(binary,gray,scale,maxsize=10): labels,_ = morph.label(binary) objects = morph.find_objects(labels) for i,b in enumerate(objects): if (sl.width(b)<=20 and sl.height(b)>200) or (sl.width(b)<=45 and sl.height(b)>500): gray[b][labels[b]==i+1] = 140 #gray[:,b[1].start:b[1].stop]=140 labels[b][labels[b]==i+1] = 0 return array(labels!=0, 'B')
def firstAnalyse(binary): binaryary = morph.r_closing(binary.astype(bool), (1,1)) labels,_ = morph.label(binaryary) objects = morph.find_objects(labels) ### <<<==== objects here bysize = sorted(range(len(objects)), key=lambda k: sl.area(objects[k])) # bysize = sorted(objects,key=sl.area) scalemap = zeros(binaryary.shape) smalldot = zeros(binaryary.shape, dtype=binary.dtype) for i in bysize: o = objects[i] if amax(scalemap[o])>0: # mask = where(labels[o] != (i+1),uint8(255),uint8(0)) # binary[o] = cv2.bitwise_and(binary[o],binary[o],mask=mask) continue scalemap[o] = sl.area(o)**0.5 scale = median(scalemap[(scalemap>3)&(scalemap<100)]) ### <<<==== scale here for i,o in enumerate(objects): if (sl.width(o) < scale/2) or (sl.height(o) < scale/2): smalldot[o] = binary[o] if sl.dim0(o) > 3*scale: mask = where(labels[o] != (i+1),uint8(255),uint8(0)) binary[o] = cv2.bitwise_and(binary[o],binary[o],mask=mask) continue return objects, smalldot, scale
def simplefirstAnalyse(binary): binaryary = morph.r_closing(binary.astype(bool), (1,1)) labels,_ = morph.label(binaryary) objects = morph.find_objects(labels) ### <<<==== objects here smalldot = zeros(binaryary.shape, dtype=binary.dtype) scale = int(binary.shape[0]*0.7) for i,o in enumerate(objects): if (sl.width(o) < scale/2) or (sl.height(o) < scale/2): smalldot[o] = binary[o] if sl.dim0(o) > 3*scale: mask = where(labels[o] != (i+1),uint8(255),uint8(0)) binary[o] = cv2.bitwise_and(binary[o],binary[o],mask=mask) continue return objects, smalldot, scale
def filter_junk_cc(binary, scale, maxsize): junk_cc = np.zeros(binary.shape, dtype='B') text_like = np.zeros(binary.shape, dtype='B') labels, _ = morph.label(binary) objects = morph.find_objects(labels) for i, b in enumerate(objects): if sl.width(b) > maxsize * scale or sl.area(b) > scale * scale * 8 or \ sl.aspect_normalized(b) > 8 or sl.min_dim(b) < scale * 0.35: junk_cc[b][labels[b] == i + 1] = 1 else: if sl.width(b) > 0.3 * scale and sl.height(b) > 0.3 * scale: text_like[b][labels[b] == i + 1] = 1 return junk_cc, text_like
def detect_table(image, scale, maxsize=10, debug_path=None): h, w = image.shape[:2] if len(image.shape) > 2 and image.shape[2] >= 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image # kernel = np.ones((5,5),np.uint8) # gray = 255-cv2.morphologyEx(255-gray, cv2.MORPH_CLOSE, kernel) binary = 1 - morph.thresh_sauvola(gray, k=0.05) junk_cc, _ = filter_junk_cc(binary, scale, maxsize) junk_cc = morph.r_closing(junk_cc, (5, 5)) print('calculating combine sep...') combine_sep = compute_combine_seps(junk_cc, scale) # using closing morphology to connect disconnected edges close_thes = int(scale * 0.15) closed_sep = morph.r_closing(combine_sep, (close_thes, close_thes)) if debug_path is not None: cv2.imwrite(filename[:-4] + '_bin.png', ((1 - junk_cc) * 255).astype('uint8')) cv2.imwrite(filename[:-4] + '_sep.png', (closed_sep * 255).astype('uint8')) labels, _ = morph.label(closed_sep) objects = morph.find_objects(labels) # result table list boxes = [] for i, b in enumerate(objects): if sl.width(b) > maxsize * scale or sl.area( b) > scale * scale * 10 or (sl.aspect_normalized(b) > 6 and sl.max_dim(b) > scale * 1.5): density = np.sum(combine_sep[b]) density = density / sl.area(b) if (sl.area(b) > scale * scale * 10 and sl.min_dim(b) > scale * 1.0 and sl.max_dim(b) > scale * 8 and density < 0.4): # calculate projection to determine table border w = sl.width(b) h = sl.height(b) region = (labels[b] == i + 1).astype('uint8') border_pad = max(w, h) border_thres = scale * 2 proj_x = np.sum(region, axis=0) proj_y = np.sum(region, axis=1) proj_x[3:] += proj_x[:-3] proj_y[3:] += proj_y[:-3] sep_x = np.sort([j[0] for j in np.argwhere(proj_x > 0.75 * h)]) sep_y = np.sort([j[0] for j in np.argwhere(proj_y > 0.4 * w)]) # skip if sep count < 2 if len(sep_x) < 1 or len(sep_y) < 1: continue border_left, border_right, border_top, border_bottom = None, None, None, None if sep_x[0] < border_pad: border_left = sep_x[0] if sep_x[-1] > w - border_pad: border_right = sep_x[-1] if sep_y[0] < border_pad: border_top = sep_y[0] if sep_y[-1] > h - border_pad: border_bottom = sep_y[-1] # print_info(border_top, border_bottom, border_left, border_right) if all([ j is not None for j in [border_top, border_bottom, border_left, border_right] ]): border_right = b[1].stop - b[1].start boxes.append([ b[1].start + border_left, b[0].start + border_top, b[1].start + border_right, b[0].start + border_bottom ]) # boxes.append(([b[1].start, b[0].start, b[1].stop, b[0].stop])) return boxes