def processing(org, binary, clf, main=True): if main: # *** Step 2 *** object detection: get connected areas -> get boundary -> get corners boundary_rec, boundary_non_rec = det.boundary_detection(binary, write_boundary=True) corners_rec = det.get_corner(boundary_rec) corners_non_rec = det.get_corner(boundary_non_rec) # *** Step 3 *** data processing: identify blocks and compos from rectangles -> identify irregular compos corners_block, corners_img, corners_compo = det.block_or_compo(org, binary, corners_rec) det.compo_irregular(org, corners_non_rec, corners_img, corners_compo) # *** Step 4 *** classification: clip and classify the components candidates -> ignore noises -> refine img compos = seg.clipping(org, corners_compo) compos_class = clf.predict(compos) corners_compo, compos_class = det.strip_img(corners_compo, compos_class, corners_img) # *** Step 5 *** result refinement if is_shrink_img: corners_img = det.img_shrink(org, binary, corners_img) # *** Step 6 *** recursive inspection: search components nested in components corners_block, corners_img, corners_compo, compos_class = det.compo_in_img(processing, org, binary, clf, corners_img, corners_block, corners_compo, compos_class) # *** Step 7 *** ocr check and text detection from cleaned image if is_ocr: corners_block, _ = det.rm_text(org, corners_block, ['block' for i in range(len(corners_block))]) corners_img, _ = det.rm_text(org, corners_img, ['img' for i in range(len(corners_img))]) corners_compo, compos_class = det.rm_text(org, corners_compo, compos_class) # *** Step 8 *** merge overlapped components # corners_img = det.rm_img_in_compo(corners_img, corners_compo) corners_img, _ = det.merge_corner(org, corners_img, ['img' for i in range(len(corners_img))], is_merge_nested_same=True) corners_compo, compos_class = det.merge_corner(org, corners_compo, compos_class, is_merge_nested_same=True) return corners_block, corners_img, corners_compo, compos_class # *** used for img inspection *** # only consider rectangular components else: boundary_rec, boundary_non_rec = det.boundary_detection(binary) corners_rec = det.get_corner(boundary_rec) corners_block, corners_img, corners_compo = det.block_or_compo(org, binary, corners_rec) compos = seg.clipping(org, corners_compo) compos_class = clf.predict(compos) corners_compo, compos_class = det.strip_img(corners_compo, compos_class, corners_img) return corners_block, corners_compo, compos_class
def processing(org, binary, main=True): if main: # *** Step 2 *** object detection: get connected areas -> get boundary -> get corners boundary_rec, boundary_non_rec = det.boundary_detection(binary, show=False) corners_rec = det.get_corner(boundary_rec) corners_non_rec = det.get_corner(boundary_non_rec) # *** Step 3 *** data processing: identify blocks and compos from rectangles -> identify irregular compos corners_block, corners_img, corners_compo = det.block_or_compo( org, binary, corners_rec) det.compo_irregular(org, corners_non_rec, corners_img, corners_compo) corners_img, _ = det.rm_text(org, corners_img, ['img' for i in range(len(corners_img))]) # *** Step 4 *** classification: clip and classify the components candidates -> ignore noises -> refine img compos = seg.clipping(org, corners_compo) compos_class = CNN.predict(compos) # corners_compo, compos_class = det.compo_filter(org, corners_compo, compos_class, is_icon) corners_compo, compos_class = det.strip_img(corners_compo, compos_class, corners_img) # *** Step 5 *** result refinement if is_merge: corners_img, _ = det.merge_corner( corners_img, ['img' for i in range(len(corners_img))]) corners_block, _ = det.rm_text( org, corners_block, ['block' for i in range(len(corners_block))]) corners_img, _ = det.rm_text(org, corners_img, ['img' for i in range(len(corners_img))]) corners_compo, compos_class = det.rm_text(org, corners_compo, compos_class) if is_shrink_img: corners_img = det.img_shrink(org, binary, corners_img) # *** Step 6 *** text detection from cleaned image img_clean = draw.draw_bounding_box(org, corners_img, color=(255, 255, 255), line=-1) corners_word = ocr.text_detection(org, img_clean) corners_text = ocr.text_merge_word_into_line(org, corners_word) # *** Step 7 *** img inspection: search components in img element if is_img_inspect: corners_block, corners_img, corners_compo, compos_class = det.compo_in_img( processing, org, binary, corners_img, corners_block, corners_compo, compos_class) return corners_block, corners_img, corners_compo, compos_class, corners_text # *** used for img inspection *** # only consider rectangular components else: boundary_rec, boundary_non_rec = det.boundary_detection(binary) corners_rec = det.get_corner(boundary_rec) corners_block, corners_img, corners_compo = det.block_or_compo( org, binary, corners_rec) compos = seg.clipping(org, corners_compo) compos_class = CNN.predict(compos) corners_compo, compos_class = det.compo_filter(org, corners_compo, compos_class, is_icon) corners_compo, compos_class = det.strip_img(corners_compo, compos_class, corners_img) corners_block, _ = det.rm_text( org, corners_block, ['block' for i in range(len(corners_block))]) corners_compo, compos_class = det.rm_text(org, corners_compo, compos_class) return corners_block, corners_compo, compos_class
corners_img = det.img_refine(org, corners_img) # merge overlapped corners, and remove nested corners if is_merge_img: corners_img = det.merge_corners(corners_img) # detect components in img if is_detect_compo_in_img: corners_compo += det.uicomponent_in_img(org, binary, corners_img) # remove pure text element corners_block = det.rm_text(org, corners_block) corners_img = det.rm_text(org, corners_img) corners_compo = det.rm_text(org, corners_compo) # *** Step 5 *** classification: clip and classify the potential components if is_classify: CNN.load() compos = seg.clipping(org, corners_compo) compos_classes = CNN.predict(compos) else: compos_classes = None # *** Step 6 *** text detection from cleaned image img_clean = draw.draw_bounding_box(org, corners_img, color=(255, 255, 255), line=-1) if is_ocr: corners_word = ocr.text_detection(org, img_clean) corners_line = ocr.text_merge_into_line(org, corners_word) draw_bounding = draw.draw_bounding_box(org, corners_line, line=1) else: draw_bounding = org
def block_clip(org, blocks_corner, show=False): blocks_clip = seg.clipping(org, blocks_corner, show) return blocks_clip