def rm_text(org, corners, compo_class, max_text_height=C.THRESHOLD_TEXT_MAX_HEIGHT, max_text_width=C.THRESHOLD_TEXT_MAX_WIDTH, ocr_padding=C.OCR_PADDING, ocr_min_word_area=C.OCR_MIN_WORD_AREA, show=False): """ Remove area that full of text :param org: original image :param corners: [(top_left, bottom_right)] -> top_left: (column_min, row_min) -> bottom_right: (column_max, row_max) :param compo_class: classes of corners :param max_text_height: Too large to be text :param max_text_width: Too large to be text :param ocr_padding: Padding for clipping :param ocr_min_word_area: If too text area ratio is too large :param show: Show or not :return: corners without text objects """ new_corners = [] new_class = [] for i in range(len(corners)): corner = corners[i] (top_left, bottom_right) = corner (col_min, row_min) = top_left (col_max, row_max) = bottom_right height = row_max - row_min width = col_max - col_min # highly likely to be block or img if too large if height > max_text_height and width > max_text_width: new_corners.append(corner) new_class.append(compo_class[i]) else: row_min = row_min - ocr_padding if row_min - ocr_padding >= 0 else 0 row_max = row_max + ocr_padding if row_max + ocr_padding < org.shape[ 0] else org.shape[0] col_min = col_min - ocr_padding if col_min - ocr_padding >= 0 else 0 col_max = col_max + ocr_padding if col_max + ocr_padding < org.shape[ 1] else org.shape[1] # check if this area is text clip = org[row_min:row_max, col_min:col_max] if not ocr.is_text(clip, ocr_min_word_area, show=show): new_corners.append(corner) new_class.append(compo_class[i]) return new_corners, new_class
def rm_text(org, corners, must_img_height, must_img_width, ocr_padding, ocr_min_word_area, show=False): new_corners = [] for corner in corners: (up_left, bottom_right) = corner (y_min, x_min) = up_left (y_max, x_max) = bottom_right height = x_max - x_min width = y_max - y_min # highly likely to be block or img if too large if height > must_img_height and width > must_img_width: new_corners.append(corner) else: # check if this area is text clip = org[x_min - ocr_padding: x_max + ocr_padding, y_min - ocr_padding: y_max + ocr_padding] if not ocr.is_text(clip, ocr_min_word_area, show=show): new_corners.append(corner) return new_corners