def draw_table_to_image(img, lines_snip, points_snip): orig_img_copy = img.copy() X, Y = [], [] min_coords = None try: for l in lines_snip: continue x1, y1, x2, y2 = l cv2.line(orig_img_copy, (x1, y1), (x2, y2), (0, 255, 0), 3) # Write points to output Image for i in points_snip: cx, cy = i X.append(cx) Y.append(cy) continue cv2.circle(orig_img_copy, (int(cx), int(cy)), 10, (255, 255, 255), -11) cv2.circle(orig_img_copy, (int(cx), int(cy)), 11, (0, 0, 255), 1) # draw circle cv2.ellipse(orig_img_copy, (int(cx), int(cy)), (10, 10), 0, 0, 90, (0, 0, 255), -1) cv2.ellipse(orig_img_copy, (int(cx), int(cy)), (10, 10), 0, 180, 270, (0, 0, 255), -1) cv2.circle(orig_img_copy, (int(cx), int(cy)), 1, (0, 255, 0), 1) # draw center x_min, y_min = min(X), min(Y) min_coords = (x_min, y_min) except Exception as e: logger.debug(e) return orig_img_copy, min_coords
def split_image_2_table_images(img, hor_scale, vert_scale, contour_level, file_name='file1'): file_name = file_name + '.png' mask_dotted = imageop.detect_dotted_lines(img, file_name) gray_image = imageop.convert_to_gray(img) bw = cv2.adaptiveThreshold(~gray_image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, -2) if debug_flag: cv2.imwrite(debug_dir + file_name + '_shadow_input_' + contour_level + '.png', img) cv2.imwrite(debug_dir + file_name + '_gray_image_' + contour_level+ '.png', gray_image) cv2.imwrite(debug_dir + file_name + '_bw_' + contour_level+ '.png', bw) bboxs = get_all_rect_bbox(bw, hor_scale, vert_scale, contour_level, file_name) logger.debug('Contour Count before: '+str(len(bboxs))) # bboxs = remove_small_bboxs(bboxs) unique_table_bboxs, _ = eliminate_inner_rect_bbox_thresh(bboxs) if debug_flag: cnt = 0 img_cpy = img.copy() for bbox in unique_table_bboxs: clr = colours[cnt % 14] x,y,w,h = bbox cv2.rectangle(img_cpy,(x,y), (x+w,y+h),clr,3) cnt += 1 cv2.imwrite(debug_dir + file_name + '_preContours_' + contour_level + '.png', img_cpy) character_dim_cutoffs = [] for box in unique_table_bboxs: x, y, w, h = box orig_img_snip = np.zeros(bw.shape, dtype = 'uint8') orig_img_snip[y:y + h + 1, x:x + w + 1] = bw[y:y + h + 1, x:x + w + 1] w_cutoff, h_cutoff = imageop.get_average_character_size(orig_img_snip) character_dim_cutoffs.append((w_cutoff, h_cutoff)) rois, bbox, bboxline_coords = _find_rectangle_contours_and_clean2(bw, hor_scale, vert_scale, unique_table_bboxs, character_dim_cutoffs,mask_dotted, contour_level, file_name) logger.debug('Contour Count after: '+str(len(rois))) if debug_flag: cnt = 1 for roi in rois: cv2.imwrite(debug_dir + file_name + '_postContour_'+ contour_level + str(cnt) +'.png', roi) cnt += 1 if debug_flag: cnt = 0 img_cpy = img.copy() for bb in bbox: clr = colours[cnt % 14] x,y,w,h = bb cv2.rectangle(img_cpy,(x,y), (x+w,y+h),clr,3) cnt += 1 cv2.imwrite(debug_dir + file_name + '_postContours' + contour_level + '.png', img_cpy) return rois, bboxline_coords, bbox
def get_image(input_dir, file): try: image = cv2.imread(input_dir + file) except Exception as e: logger.debug('Unable to extract files') logger.exception(e) return image
def detect_points_in_image_lines(lines): points_lst, new_lines_lst = [], [] for lines in lines: points, new_lines = detect_points(lines) points_lst.append(points) new_lines_lst.append(new_lines) logger.debug('-------------------------------------------------') logger.debug('Starting Point detection for : {0}'.format(lines)) points = points_lst lines = new_lines_lst return points, lines
def detect_points(lines): logger.debug('-------------------------------------------------') logger.debug('Starting Point detection for : {0}'.format(lines)) points = [] new_lines = [] try: points, new_lines = lineop.find_points_of_intersection_and_extend_lines( lines) points = remove_adjacent_points(points) except Exception as e: logger.exception(e) return points, new_lines
def slope(line): x1, y1, x2, y2 = 0, 1, 2, 3 try: if line[x2] != line[x1]: s = abs(line[y2] - line[y1]) / abs(line[x2] - line[x1]) else: s = float('inf') except Exception as e: logger.debug(e) s = float('inf') return s
def _find_rectangle_contours_and_clean2(bw, hor_scale, vert_scale, bboxs, character_dim_cutoffs, mask_dotted, contour_level,file_name='file1'): if contour_level == 'all': vert_scale = 55 # vert_scale -= 20 overlap_areas, overlap_thresh_flags, overlap_bound_rects, rect_areas_img = get_overlap_area_tables(bboxs) counter = 0 tot_unique_bboxs, tot_unique_rois, tot_unique_bboxline_coords = [],[],[] net_img = np.zeros(bw.shape, dtype=int) for bbox, char_dim_cutoff in zip(bboxs, character_dim_cutoffs): rois, bboxline_coords, bbox_out = [], [], [] x, y, w, h = bbox single_table_snip = np.zeros(bw.shape, dtype=int) single_table_snip[y:y + h + 1, x:x + w + 1] = bw[y:y + h + 1, x:x + w + 1] overlap_areas_row = overlap_areas[counter] nonzero_cols = np.nonzero(overlap_areas_row) nonzero_cols = list(nonzero_cols) nonzero_cols = nonzero_cols[0] nonzero_cols = nonzero_cols.tolist() if len(nonzero_cols) > 0: rect_areas_ind = rect_areas_img[nonzero_cols+[counter]] max_rect_area_ind = np.max(rect_areas_ind) current_rect_area_ind = rect_areas_img[counter] if current_rect_area_ind == max_rect_area_ind: for col in nonzero_cols: ltop_x, ltop_y, width, height = overlap_bound_rects[counter, col] single_table_snip[ltop_y:ltop_y + height + 1, ltop_x:ltop_x + width + 1] = 0 single_table_snip = single_table_snip.astype('uint8') snip_input_copy = single_table_snip.copy() if debug_flag: cv2.imwrite(debug_dir + file_name + '_snip_input_bw_'+contour_level+ str(random.randint(1, 101)) + '.png', snip_input_copy) horizontal = imageop.detect_horizontal(single_table_snip, scale=hor_scale) ## OLD # vertical = imageop.detect_vertical_and_clean_text(single_table_snip, char_dim_cutoff, scale=55) ## ## NEW vertical = imageop.detect_vertical_and_clean_text(single_table_snip, char_dim_cutoff, scale=vert_scale) ## mask = horizontal + vertical joints = cv2.bitwise_and(horizontal, vertical) _, thresh = cv2.threshold(mask, 127, 255, 0) final_img, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) thresh_cpy = thresh.copy() if debug_flag: cv2.imwrite(debug_dir + file_name + '_maskHor_' +contour_level+ str(random.randint(1, 101)) + '.png', horizontal) cv2.imwrite(debug_dir + file_name + '_maskVert_' +contour_level+ str(random.randint(1, 101)) + '.png', vertical) cv2.imwrite(debug_dir + file_name + '_mask_' +contour_level+ str(random.randint(1, 101)) + '.png', mask) cv2.imwrite(debug_dir + file_name + '_joints_' +contour_level+ '.png', joints) cv2.imwrite(debug_dir + file_name + '_thresh_' +contour_level+ '.png', thresh) cv2.imwrite(debug_dir + file_name + '_find_contours_' +contour_level+ '.png', final_img) cv2.imwrite(debug_dir + file_name + '_thrsh_cpy_' +contour_level+ '.png', thresh_cpy) if debug_flag: debug_contour_counter = 0 for contour in contours: all_black = np.zeros((bw.shape[0], bw.shape[1]), dtype=int) contours_poly = cv2.approxPolyDP(contour, 3, True) bound_rect = cv2.boundingRect(contours_poly) x, y, w, h = bound_rect roi = joints[y:y + h, x:x + w] if debug_flag: cv2.rectangle(snip_input_copy, (x, y), (x + w, y + h), (0, 255, 0), 3) row, col = np.where(joints) row, col = list(row), list(col) ind = list(zip(row, col)) for j in ind: cx, cy = j cv2.circle(snip_input_copy, (int(cy), int(cx)), 5, (0, 0, 0), -5) if debug_flag: cv2.imwrite(debug_dir + file_name + '_roi_' +contour_level+ str(debug_contour_counter) + '.png', roi) debug_contour_counter += 1 ''' _, thresh1 = cv2.threshold(roi, 100, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU) img_, joints_contours, __ = cv2.findContours(thresh1, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) if debug_flag: cv2.imwrite(debug_dir + file_name + '_find_contours_joints' + str(debug_contour_counter) + '.png', img_) if len(joints_contours) <= 0: continue ''' all_black[y:y + h, x:x + w] = thresh_cpy[y:y + h, x:x + w] bbox_out.append((x, y, w, h)) rois.append(all_black.copy()) bboxline_coords.append([(x, y, x + w, y), (x + w, y, x + w, y + h), (x, y + h, x + w, y + h), (x, y, x, y + h)]) # bbox_out = remove_small_bboxs(bbox_out) unique_bboxs, flags = eliminate_inner_rect_bbox_thresh(bbox_out) unique_bboxline_coords, unique_rois = [], [] for idx, flag in enumerate(flags): if flag != 0: unique_bboxline_coords.append(bboxline_coords[idx]) mask_img = rois[idx] [x, y, w, h] = bbox_out[idx] all_black = mask_dotted * 0 pad = 5 all_black[y - pad:y + h + pad, x - pad:x + w + pad] = mask_dotted[y - pad:y + h + pad, x - pad:x + w + pad] mask_img += all_black unique_rois.append(mask_img) if debug_flag: cv2.imwrite(debug_dir + file_name + '_thrsh_cpy_' +contour_level+ str(idx) + '.png', rois[idx]) if debug_flag: logger.debug('rois_unique: '+str(unique_rois)) net = np.zeros(unique_rois[0].shape, dtype=int) for p in unique_rois: net += p cv2.imwrite(debug_dir + file_name + '_thrsh_cpy_net_' +contour_level+ str(random.randint(1, 101)) + '.png', net) net_img += net tot_unique_bboxs += unique_bboxs tot_unique_rois += unique_rois tot_unique_bboxline_coords += unique_bboxline_coords counter += 1 if debug_flag: cv2.imwrite(debug_dir + file_name + '_thrsh_cpy_net_img_' +contour_level+ str(random.randint(1, 101)) + '.png', net_img) return tot_unique_rois, tot_unique_bboxs, tot_unique_bboxline_coords