def pse_decode(preds, text_repr_type='poly', min_kernel_confidence=0.5, min_text_avg_confidence=0.85, min_kernel_area=0, min_text_area=16): """Decoding predictions of PSENet to instances. This is partially adapted from https://github.com/whai362/PSENet. Args: preds (tensor): The head output tensor of size nxHxW. text_repr_type (str): The boundary encoding type 'poly' or 'quad'. min_text_confidence (float): The minimal text confidence. min_kernel_confidence (float): The minimal kernel confidence. min_text_avg_confidence (float): The minimal text average confidence. min_kernel_area (int): The minimal text kernel area. min_text_area (int): The minimal text instance region area. Returns: boundaries: (list[list[float]]): The instance boundary and its instance confidence list. """ preds = torch.sigmoid(preds) # text confidence score = preds[0, :, :] masks = preds > min_kernel_confidence text_mask = masks[0, :, :] kernel_masks = masks[0:, :, :] * text_mask score = score.data.cpu().numpy().astype(np.float32) # to numpy kernel_masks = kernel_masks.data.cpu().numpy().astype(np.uint8) # to numpy from .pse import pse region_num, labels = cv2.connectedComponents(kernel_masks[-1], connectivity=4) # labels = pse(kernel_masks, min_kernel_area) labels = pse(kernel_masks, min_kernel_area, labels, region_num) labels = np.array(labels) label_num = np.max(labels) + 1 boundaries = [] for i in range(1, label_num): points = np.array(np.where(labels == i)).transpose((1, 0))[:, ::-1] area = points.shape[0] score_instance = np.mean(score[labels == i]) if filter_instance(area, score_instance, min_text_area, min_text_avg_confidence): continue vertices_confidence = points2boundary(points, text_repr_type, score_instance) if vertices_confidence is not None: boundaries.append(vertices_confidence) return boundaries
def pan_decode(preds, text_repr_type='poly', min_text_confidence=0.5, min_kernel_confidence=0.5, min_text_avg_confidence=0.85, min_text_area=16): """Convert scores to quadrangles via post processing in PANet. This is partially adapted from https://github.com/WenmuZhou/PAN.pytorch. Args: preds (tensor): The head output tensor of size 6xHxW. text_repr_type (str): The boundary encoding type 'poly' or 'quad'. min_text_confidence (float): The minimal text confidence. min_kernel_confidence (float): The minimal kernel confidence. min_text_avg_confidence (float): The minimal text average confidence. min_text_area (int): The minimal text instance region area. Returns: boundaries: (list[list[float]]): The instance boundary and its instance confidence list. """ preds[:2, :, :] = torch.sigmoid(preds[:2, :, :]) preds = preds.detach().cpu().numpy() text_score = preds[0].astype(np.float32) text = preds[0] > min_text_confidence kernel = (preds[1] > min_kernel_confidence) * text embeddings = preds[2:].transpose((1, 2, 0)) # (h, w, 4) region_num, labels = cv2.connectedComponents( kernel.astype(np.uint8), connectivity=4) contours, _ = cv2.findContours((kernel * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) kernel_contours = np.zeros(text.shape, dtype='uint8') cv2.drawContours(kernel_contours, contours, -1, 255) text_points = pixel_group(text_score, text, embeddings, labels, kernel_contours, region_num, min_text_avg_confidence) boundaries = [] for text_inx, text_point in enumerate(text_points): text_confidence = text_point[0] text_point = text_point[2:] text_point = np.array(text_point, dtype=int).reshape(-1, 2) area = text_point.shape[0] if filter_instance(area, text_confidence, min_text_area, min_text_avg_confidence): continue vertices_confidence = points2boundary(text_point, text_repr_type, text_confidence) if vertices_confidence is not None: boundaries.append(vertices_confidence) return boundaries
def db_decode(preds, text_repr_type='poly', mask_thr=0.3, min_text_score=0.3, min_text_width=5, unclip_ratio=1.5, max_candidates=3000): """Decoding predictions of DbNet to instances. This is partially adapted from https://github.com/MhLiao/DB. Args: preds (Tensor): The head output tensor of size nxHxW. text_repr_type (str): The boundary encoding type 'poly' or 'quad'. mask_thr (float): The mask threshold value for binarization. min_text_score (float): The threshold value for converting binary map to shrink text regions. min_text_width (int): The minimum width of boundary polygon/box predicted. unclip_ratio (float): The unclip ratio for text regions dilation. max_candidates (int): The maximum candidate number. Returns: boundaries: (list[list[float]]): The predicted text boundaries. """ prob_map = preds[0, :, :] text_mask = prob_map > mask_thr score_map = prob_map.data.cpu().numpy().astype(np.float32) text_mask = text_mask.data.cpu().numpy().astype(np.uint8) # to numpy contours, _ = cv2.findContours((text_mask * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) boundaries = [] for i, poly in enumerate(contours): if i > max_candidates: break epsilon = 0.01 * cv2.arcLength(poly, True) approx = cv2.approxPolyDP(poly, epsilon, True) points = approx.reshape((-1, 2)) if points.shape[0] < 4: continue score = box_score_fast(score_map, points) if score < min_text_score: continue poly = unclip(points, unclip_ratio=unclip_ratio) if len(poly) == 0 or isinstance(poly[0], list): continue poly = poly.reshape(-1, 2) poly = points2boundary(poly, text_repr_type, score, min_text_width) if poly is not None: boundaries.append(poly) return boundaries
def __call__(self, preds): """ Args: preds (Tensor): Prediction map with shape :math:`(C, H, W)`. Returns: list[list[float]]: The predicted text boundaries. """ assert preds.dim() == 3 prob_map = preds[0, :, :] text_mask = prob_map > self.mask_thr score_map = prob_map.data.cpu().numpy().astype(np.float32) text_mask = text_mask.data.cpu().numpy().astype(np.uint8) # to numpy contours, _ = cv2.findContours((text_mask * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) boundaries = [] for i, poly in enumerate(contours): if i > self.max_candidates: break epsilon = 0.01 * cv2.arcLength(poly, True) approx = cv2.approxPolyDP(poly, epsilon, True) points = approx.reshape((-1, 2)) if points.shape[0] < 4: continue score = box_score_fast(score_map, points) if score < self.min_text_score: continue poly = unclip(points, unclip_ratio=self.unclip_ratio) if len(poly) == 0 or isinstance(poly[0], list): continue poly = poly.reshape(-1, 2) if self.text_repr_type == 'quad': poly = points2boundary(poly, self.text_repr_type, score, self.min_text_width) elif self.text_repr_type == 'poly': poly = poly.flatten().tolist() if score is not None: poly = poly + [score] if len(poly) < 8: poly = None if poly is not None: boundaries.append(poly) return boundaries
def __call__(self, preds): """ Args: preds (Tensor): Prediction map with shape :math:`(C, H, W)`. Returns: list[list[float]]: The instance boundary and its confidence. """ assert preds.dim() == 3 preds[:2, :, :] = torch.sigmoid(preds[:2, :, :]) preds = preds.detach().cpu().numpy() text_score = preds[0].astype(np.float32) text = preds[0] > self.min_text_confidence kernel = (preds[1] > self.min_kernel_confidence) * text embeddings = preds[2:].transpose((1, 2, 0)) # (h, w, 4) region_num, labels = cv2.connectedComponents( kernel.astype(np.uint8), connectivity=4) contours, _ = cv2.findContours((kernel * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) kernel_contours = np.zeros(text.shape, dtype='uint8') cv2.drawContours(kernel_contours, contours, -1, 255) text_points = pixel_group(text_score, text, embeddings, labels, kernel_contours, region_num, self.min_text_avg_confidence) boundaries = [] for text_point in text_points: text_confidence = text_point[0] text_point = text_point[2:] text_point = np.array(text_point, dtype=int).reshape(-1, 2) area = text_point.shape[0] if not self.is_valid_instance(area, text_confidence, self.min_text_area, self.min_text_avg_confidence): continue vertices_confidence = points2boundary(text_point, self.text_repr_type, text_confidence) if vertices_confidence is not None: boundaries.append(vertices_confidence) return boundaries
def __call__(self, preds): """ Args: preds (Tensor): Prediction map with shape :math:`(C, H, W)`. Returns: list[list[float]]: The instance boundary and its confidence. """ assert preds.dim() == 3 preds = torch.sigmoid(preds) # text confidence score = preds[0, :, :] masks = preds > self.min_kernel_confidence text_mask = masks[0, :, :] kernel_masks = masks[0:, :, :] * text_mask score = score.data.cpu().numpy().astype(np.float32) kernel_masks = kernel_masks.data.cpu().numpy().astype(np.uint8) region_num, labels = cv2.connectedComponents(kernel_masks[-1], connectivity=4) labels = contour_expand(kernel_masks, labels, self.min_kernel_area, region_num) labels = np.array(labels) label_num = np.max(labels) boundaries = [] for i in range(1, label_num + 1): points = np.array(np.where(labels == i)).transpose((1, 0))[:, ::-1] area = points.shape[0] score_instance = np.mean(score[labels == i]) if not self.is_valid_instance(area, score_instance, self.min_text_area, self.min_text_avg_confidence): continue vertices_confidence = points2boundary(points, self.text_repr_type, score_instance) if vertices_confidence is not None: boundaries.append(vertices_confidence) return boundaries
def pan_decode(preds, text_repr_type='poly', min_text_confidence=0.5, min_kernel_confidence=0.5, min_text_avg_confidence=0.85, min_kernel_area=0, min_text_area=16): """Convert scores to quadrangles via post processing in PANet. This is partially adapted from https://github.com/WenmuZhou/PAN.pytorch. Args: preds (tensor): The head output tensor of size 6xHxW. text_repr_type (str): The boundary encoding type 'poly' or 'quad'. min_text_confidence (float): The minimal text confidence. min_kernel_confidence (float): The minimal kernel confidence. min_text_avg_confidence (float): The minimal text average confidence. min_kernel_area (int): The minimal text kernel area. min_text_area (int): The minimal text instance region area. Returns: boundaries: (list[list[float]]): The instance boundary and its instance confidence list. """ from .pan import assign_pixels, estimate_text_confidence, get_pixel_num preds[:2, :, :] = torch.sigmoid(preds[:2, :, :]) preds = preds.detach().cpu().numpy() text_score = preds[0].astype(np.float32) text = preds[0] > min_text_confidence kernel = (preds[1] > min_kernel_confidence) * text embeddings = preds[2:].transpose((1, 2, 0)) # (h, w, 4) region_num, labels = cv2.connectedComponents(kernel.astype(np.uint8), connectivity=4) valid_kernel_inx = [] region_pixel_num = get_pixel_num(labels, region_num) # from inx 1. 0: meaningless. for region_idx in range(1, region_num): if region_pixel_num[region_idx] < min_kernel_area: continue valid_kernel_inx.append(region_idx) # assign pixels to valid kernels assignment = assign_pixels(text.astype(np.uint8), embeddings, labels, region_num, 0.8) assignment = assignment.reshape(text.shape) boundaries = [] # compute text avg confidence text_points = estimate_text_confidence(assignment, text_score, region_num) for text_inx, text_point in text_points.items(): if text_inx not in valid_kernel_inx: continue text_confidence = text_point[0] text_point = text_point[2:] text_point = np.array(text_point, dtype=int).reshape(-1, 2) area = text_point.shape[0] if filter_instance(area, text_confidence, min_text_area, min_text_avg_confidence): continue vertices_confidence = points2boundary(text_point, text_repr_type, text_confidence) if vertices_confidence is not None: boundaries.append(vertices_confidence) return boundaries