def propose_comps_and_attribs(self, text_region_map, center_region_map, top_height_map, bot_height_map, sin_map, cos_map): """Generate text components and attributes. Args: text_region_map (ndarray): The predicted text region probability map. center_region_map (ndarray): The predicted text center region probability map. top_height_map (ndarray): The predicted text height map from each pixel in text center region to top sideline. bot_height_map (ndarray): The predicted text height map from each pixel in text center region to bottom sideline. sin_map (ndarray): The predicted sin(theta) map. cos_map (ndarray): The predicted cos(theta) map. Returns: comp_attribs (ndarray): The text component attributes. text_comps (ndarray): The text components. """ assert (text_region_map.shape == center_region_map.shape == top_height_map.shape == bot_height_map.shape == sin_map.shape == cos_map.shape) text_mask = text_region_map > self.text_region_thr center_region_mask = (center_region_map > self.center_region_thr) * text_mask scale = np.sqrt(1.0 / (sin_map**2 + cos_map**2 + 1e-8)) sin_map, cos_map = sin_map * scale, cos_map * scale center_region_mask = fill_hole(center_region_mask) center_region_contours, _ = cv2.findContours( center_region_mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) mask_sz = center_region_map.shape comp_list = [] for contour in center_region_contours: current_center_mask = np.zeros(mask_sz) cv2.drawContours(current_center_mask, [contour], -1, 1, -1) if current_center_mask.sum() <= self.center_region_area_thr: continue score_map = text_region_map * current_center_mask text_comps = self.propose_comps(score_map, top_height_map, bot_height_map, sin_map, cos_map, self.comp_score_thr, self.min_width, self.max_width, self.comp_shrink_ratio, self.comp_w_h_ratio) text_comps = la_nms(text_comps, self.nms_thr) text_comp_mask = np.zeros(mask_sz) text_comp_boxes = text_comps[:, :8].reshape( (-1, 4, 2)).astype(np.int32) cv2.drawContours(text_comp_mask, text_comp_boxes, -1, 1, -1) if (text_comp_mask * text_mask).sum() < text_comp_mask.sum() * 0.5: continue if text_comps.shape[-1] > 0: comp_list.append(text_comps) if len(comp_list) <= 0: return None, None text_comps = np.vstack(comp_list) text_comp_boxes = text_comps[:, :8].reshape((-1, 4, 2)) centers = np.mean(text_comp_boxes, axis=1).astype(np.int32) x = centers[:, 0] y = centers[:, 1] scores = [] for text_comp_box in text_comp_boxes: text_comp_box[:, 0] = np.clip(text_comp_box[:, 0], 0, mask_sz[1] - 1) text_comp_box[:, 1] = np.clip(text_comp_box[:, 1], 0, mask_sz[0] - 1) min_coord = np.min(text_comp_box, axis=0).astype(np.int32) max_coord = np.max(text_comp_box, axis=0).astype(np.int32) text_comp_box = text_comp_box - min_coord box_sz = (max_coord - min_coord + 1) temp_comp_mask = np.zeros((box_sz[1], box_sz[0]), dtype=np.uint8) cv2.fillPoly(temp_comp_mask, [text_comp_box.astype(np.int32)], 1) temp_region_patch = text_region_map[min_coord[1]:(max_coord[1] + 1), min_coord[0]:(max_coord[0] + 1)] score = cv2.mean(temp_region_patch, temp_comp_mask)[0] scores.append(score) scores = np.array(scores).reshape((-1, 1)) text_comps = np.hstack([text_comps[:, :-1], scores]) h = top_height_map[y, x].reshape( (-1, 1)) + bot_height_map[y, x].reshape((-1, 1)) w = np.clip(h * self.comp_w_h_ratio, self.min_width, self.max_width) sin = sin_map[y, x].reshape((-1, 1)) cos = cos_map[y, x].reshape((-1, 1)) x = x.reshape((-1, 1)) y = y.reshape((-1, 1)) comp_attribs = np.hstack([x, y, h, w, cos, sin]) return comp_attribs, text_comps
def generate_comp_attribs(self, center_lines, text_mask, center_region_mask, top_height_map, bot_height_map, sin_map, cos_map): """Generate text component attributes. Args: center_lines (list[ndarray]): The list of text center lines . text_mask (ndarray): The text region mask. center_region_mask (ndarray): The text center region mask. top_height_map (ndarray): The map on which the distance from points to top side lines will be drawn for each pixel in text center regions. bot_height_map (ndarray): The map on which the distance from points to bottom side lines will be drawn for each pixel in text center regions. sin_map (ndarray): The sin(theta) map where theta is the angle between vector (top point - bottom point) and vector (1, 0). cos_map (ndarray): The cos(theta) map where theta is the angle between vector (top point - bottom point) and vector (1, 0). Returns: pad_comp_attribs (ndarray): The padded text component attributes of a fixed size. """ assert isinstance(center_lines, list) assert (text_mask.shape == center_region_mask.shape == top_height_map.shape == bot_height_map.shape == sin_map.shape == cos_map.shape) center_lines_mask = np.zeros_like(center_region_mask) cv2.polylines(center_lines_mask, center_lines, 0, 1, 1) center_lines_mask = center_lines_mask * center_region_mask comp_centers = np.argwhere(center_lines_mask > 0) y = comp_centers[:, 0] x = comp_centers[:, 1] top_height = top_height_map[y, x].reshape( (-1, 1)) * self.comp_shrink_ratio bot_height = bot_height_map[y, x].reshape( (-1, 1)) * self.comp_shrink_ratio sin = sin_map[y, x].reshape((-1, 1)) cos = cos_map[y, x].reshape((-1, 1)) top_mid_points = comp_centers + np.hstack( [top_height * sin, top_height * cos]) bot_mid_points = comp_centers - np.hstack( [bot_height * sin, bot_height * cos]) width = (top_height + bot_height) * self.comp_w_h_ratio width = np.clip(width, self.min_width, self.max_width) r = width / 2 tl = top_mid_points[:, ::-1] - np.hstack([-r * sin, r * cos]) tr = top_mid_points[:, ::-1] + np.hstack([-r * sin, r * cos]) br = bot_mid_points[:, ::-1] + np.hstack([-r * sin, r * cos]) bl = bot_mid_points[:, ::-1] - np.hstack([-r * sin, r * cos]) text_comps = np.hstack([tl, tr, br, bl]).astype(np.float32) score = np.ones((text_comps.shape[0], 1), dtype=np.float32) text_comps = np.hstack([text_comps, score]) text_comps = la_nms(text_comps, self.text_comp_nms_thr) if text_comps.shape[0] >= 1: img_h, img_w = center_region_mask.shape text_comps[:, 0:8:2] = np.clip(text_comps[:, 0:8:2], 0, img_w - 1) text_comps[:, 1:8:2] = np.clip(text_comps[:, 1:8:2], 0, img_h - 1) comp_centers = np.mean(text_comps[:, 0:8].reshape((-1, 4, 2)), axis=1).astype(np.int32) x = comp_centers[:, 0] y = comp_centers[:, 1] height = (top_height_map[y, x] + bot_height_map[y, x]).reshape( (-1, 1)) width = np.clip(height * self.comp_w_h_ratio, self.min_width, self.max_width) cos = cos_map[y, x].reshape((-1, 1)) sin = sin_map[y, x].reshape((-1, 1)) _, comp_label_mask = cv2.connectedComponents(center_region_mask, connectivity=8) comp_labels = comp_label_mask[y, x].reshape( (-1, 1)).astype(np.float32) x = x.reshape((-1, 1)).astype(np.float32) y = y.reshape((-1, 1)).astype(np.float32) comp_attribs = np.hstack( [x, y, height, width, cos, sin, comp_labels]) comp_attribs = self.jitter_comp_attribs(comp_attribs, self.jitter_level) if comp_attribs.shape[0] < self.num_min_comps: num_rand_comps = self.num_min_comps - comp_attribs.shape[0] rand_comp_attribs = self.generate_rand_comp_attribs( num_rand_comps, 1 - text_mask) comp_attribs = np.vstack([comp_attribs, rand_comp_attribs]) else: comp_attribs = self.generate_rand_comp_attribs( self.num_min_comps, 1 - text_mask) num_comps = (np.ones((comp_attribs.shape[0], 1), dtype=np.float32) * comp_attribs.shape[0]) comp_attribs = np.hstack([num_comps, comp_attribs]) if comp_attribs.shape[0] > self.num_max_comps: comp_attribs = comp_attribs[:self.num_max_comps, :] comp_attribs[:, 0] = self.num_max_comps pad_comp_attribs = np.zeros( (self.num_max_comps, comp_attribs.shape[1]), dtype=np.float32) pad_comp_attribs[:comp_attribs.shape[0], :] = comp_attribs return pad_comp_attribs