Beispiel #1
0
def pse_decode(preds,
               text_repr_type='poly',
               min_kernel_confidence=0.5,
               min_text_avg_confidence=0.85,
               min_kernel_area=0,
               min_text_area=16):
    """Decoding predictions of PSENet to instances. This is partially adapted
    from https://github.com/whai362/PSENet.

    Args:
        preds (tensor): The head output tensor of size nxHxW.
        text_repr_type (str): The boundary encoding type 'poly' or 'quad'.
        min_text_confidence (float): The minimal text confidence.
        min_kernel_confidence (float): The minimal kernel confidence.
        min_text_avg_confidence (float): The minimal text average confidence.
        min_kernel_area (int): The minimal text kernel area.
        min_text_area (int): The minimal text instance region area.
    Returns:
        boundaries: (list[list[float]]): The instance boundary and its
            instance confidence list.
    """
    preds = torch.sigmoid(preds)  # text confidence

    score = preds[0, :, :]
    masks = preds > min_kernel_confidence
    text_mask = masks[0, :, :]
    kernel_masks = masks[0:, :, :] * text_mask

    score = score.data.cpu().numpy().astype(np.float32)  # to numpy

    kernel_masks = kernel_masks.data.cpu().numpy().astype(np.uint8)  # to numpy
    from .pse import pse

    region_num, labels = cv2.connectedComponents(kernel_masks[-1],
                                                 connectivity=4)

    # labels = pse(kernel_masks, min_kernel_area)
    labels = pse(kernel_masks, min_kernel_area, labels, region_num)
    labels = np.array(labels)
    label_num = np.max(labels) + 1
    boundaries = []
    for i in range(1, label_num):
        points = np.array(np.where(labels == i)).transpose((1, 0))[:, ::-1]
        area = points.shape[0]
        score_instance = np.mean(score[labels == i])
        if filter_instance(area, score_instance, min_text_area,
                           min_text_avg_confidence):
            continue

        vertices_confidence = points2boundary(points, text_repr_type,
                                              score_instance)
        if vertices_confidence is not None:
            boundaries.append(vertices_confidence)

    return boundaries
Beispiel #2
0
def pan_decode(preds,
               text_repr_type='poly',
               min_text_confidence=0.5,
               min_kernel_confidence=0.5,
               min_text_avg_confidence=0.85,
               min_text_area=16):
    """Convert scores to quadrangles via post processing in PANet. This is
    partially adapted from https://github.com/WenmuZhou/PAN.pytorch.

    Args:
        preds (tensor): The head output tensor of size 6xHxW.
        text_repr_type (str): The boundary encoding type 'poly' or 'quad'.
        min_text_confidence (float): The minimal text confidence.
        min_kernel_confidence (float): The minimal kernel confidence.
        min_text_avg_confidence (float): The minimal text average confidence.
        min_text_area (int): The minimal text instance region area.
    Returns:
        boundaries: (list[list[float]]): The instance boundary and its
            instance confidence list.
    """
    preds[:2, :, :] = torch.sigmoid(preds[:2, :, :])
    preds = preds.detach().cpu().numpy()

    text_score = preds[0].astype(np.float32)
    text = preds[0] > min_text_confidence
    kernel = (preds[1] > min_kernel_confidence) * text
    embeddings = preds[2:].transpose((1, 2, 0))  # (h, w, 4)

    region_num, labels = cv2.connectedComponents(
        kernel.astype(np.uint8), connectivity=4)
    contours, _ = cv2.findContours((kernel * 255).astype(np.uint8),
                                   cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
    kernel_contours = np.zeros(text.shape, dtype='uint8')
    cv2.drawContours(kernel_contours, contours, -1, 255)
    text_points = pixel_group(text_score, text, embeddings, labels,
                              kernel_contours, region_num,
                              min_text_avg_confidence)

    boundaries = []
    for text_inx, text_point in enumerate(text_points):
        text_confidence = text_point[0]
        text_point = text_point[2:]
        text_point = np.array(text_point, dtype=int).reshape(-1, 2)
        area = text_point.shape[0]

        if filter_instance(area, text_confidence, min_text_area,
                           min_text_avg_confidence):
            continue
        vertices_confidence = points2boundary(text_point, text_repr_type,
                                              text_confidence)
        if vertices_confidence is not None:
            boundaries.append(vertices_confidence)

    return boundaries
Beispiel #3
0
def db_decode(preds,
              text_repr_type='poly',
              mask_thr=0.3,
              min_text_score=0.3,
              min_text_width=5,
              unclip_ratio=1.5,
              max_candidates=3000):
    """Decoding predictions of DbNet to instances. This is partially adapted
    from https://github.com/MhLiao/DB.

    Args:
        preds (Tensor): The head output tensor of size nxHxW.
        text_repr_type (str): The boundary encoding type 'poly' or 'quad'.
        mask_thr (float): The mask threshold value for binarization.
        min_text_score (float): The threshold value for converting binary map
            to shrink text regions.
        min_text_width (int): The minimum width of boundary polygon/box
            predicted.
        unclip_ratio (float): The unclip ratio for text regions dilation.
        max_candidates (int): The maximum candidate number.

    Returns:
        boundaries: (list[list[float]]): The predicted text boundaries.
    """
    prob_map = preds[0, :, :]
    text_mask = prob_map > mask_thr

    score_map = prob_map.data.cpu().numpy().astype(np.float32)
    text_mask = text_mask.data.cpu().numpy().astype(np.uint8)  # to numpy

    contours, _ = cv2.findContours((text_mask * 255).astype(np.uint8),
                                   cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    boundaries = []
    for i, poly in enumerate(contours):
        if i > max_candidates:
            break
        epsilon = 0.01 * cv2.arcLength(poly, True)
        approx = cv2.approxPolyDP(poly, epsilon, True)
        points = approx.reshape((-1, 2))
        if points.shape[0] < 4:
            continue
        score = box_score_fast(score_map, points)
        if score < min_text_score:
            continue
        poly = unclip(points, unclip_ratio=unclip_ratio)
        if len(poly) == 0 or isinstance(poly[0], list):
            continue
        poly = poly.reshape(-1, 2)
        poly = points2boundary(poly, text_repr_type, score, min_text_width)
        if poly is not None:
            boundaries.append(poly)
    return boundaries
Beispiel #4
0
    def __call__(self, preds):
        """
        Args:
            preds (Tensor): Prediction map with shape :math:`(C, H, W)`.

        Returns:
            list[list[float]]: The predicted text boundaries.
        """
        assert preds.dim() == 3

        prob_map = preds[0, :, :]
        text_mask = prob_map > self.mask_thr

        score_map = prob_map.data.cpu().numpy().astype(np.float32)
        text_mask = text_mask.data.cpu().numpy().astype(np.uint8)  # to numpy

        contours, _ = cv2.findContours((text_mask * 255).astype(np.uint8),
                                       cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

        boundaries = []
        for i, poly in enumerate(contours):
            if i > self.max_candidates:
                break
            epsilon = 0.01 * cv2.arcLength(poly, True)
            approx = cv2.approxPolyDP(poly, epsilon, True)
            points = approx.reshape((-1, 2))
            if points.shape[0] < 4:
                continue
            score = box_score_fast(score_map, points)
            if score < self.min_text_score:
                continue
            poly = unclip(points, unclip_ratio=self.unclip_ratio)
            if len(poly) == 0 or isinstance(poly[0], list):
                continue
            poly = poly.reshape(-1, 2)

            if self.text_repr_type == 'quad':
                poly = points2boundary(poly, self.text_repr_type, score,
                                       self.min_text_width)
            elif self.text_repr_type == 'poly':
                poly = poly.flatten().tolist()
                if score is not None:
                    poly = poly + [score]
                if len(poly) < 8:
                    poly = None

            if poly is not None:
                boundaries.append(poly)

        return boundaries
Beispiel #5
0
    def __call__(self, preds):
        """
        Args:
            preds (Tensor): Prediction map with shape :math:`(C, H, W)`.

        Returns:
            list[list[float]]: The instance boundary and its confidence.
        """
        assert preds.dim() == 3

        preds[:2, :, :] = torch.sigmoid(preds[:2, :, :])
        preds = preds.detach().cpu().numpy()

        text_score = preds[0].astype(np.float32)
        text = preds[0] > self.min_text_confidence
        kernel = (preds[1] > self.min_kernel_confidence) * text
        embeddings = preds[2:].transpose((1, 2, 0))  # (h, w, 4)

        region_num, labels = cv2.connectedComponents(
            kernel.astype(np.uint8), connectivity=4)
        contours, _ = cv2.findContours((kernel * 255).astype(np.uint8),
                                       cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
        kernel_contours = np.zeros(text.shape, dtype='uint8')
        cv2.drawContours(kernel_contours, contours, -1, 255)
        text_points = pixel_group(text_score, text, embeddings, labels,
                                  kernel_contours, region_num,
                                  self.min_text_avg_confidence)

        boundaries = []
        for text_point in text_points:
            text_confidence = text_point[0]
            text_point = text_point[2:]
            text_point = np.array(text_point, dtype=int).reshape(-1, 2)
            area = text_point.shape[0]

            if not self.is_valid_instance(area, text_confidence,
                                          self.min_text_area,
                                          self.min_text_avg_confidence):
                continue

            vertices_confidence = points2boundary(text_point,
                                                  self.text_repr_type,
                                                  text_confidence)
            if vertices_confidence is not None:
                boundaries.append(vertices_confidence)

        return boundaries
Beispiel #6
0
    def __call__(self, preds):
        """
        Args:
            preds (Tensor): Prediction map with shape :math:`(C, H, W)`.

        Returns:
            list[list[float]]: The instance boundary and its confidence.
        """
        assert preds.dim() == 3

        preds = torch.sigmoid(preds)  # text confidence

        score = preds[0, :, :]
        masks = preds > self.min_kernel_confidence
        text_mask = masks[0, :, :]
        kernel_masks = masks[0:, :, :] * text_mask

        score = score.data.cpu().numpy().astype(np.float32)

        kernel_masks = kernel_masks.data.cpu().numpy().astype(np.uint8)

        region_num, labels = cv2.connectedComponents(kernel_masks[-1],
                                                     connectivity=4)

        labels = contour_expand(kernel_masks, labels, self.min_kernel_area,
                                region_num)
        labels = np.array(labels)
        label_num = np.max(labels)
        boundaries = []
        for i in range(1, label_num + 1):
            points = np.array(np.where(labels == i)).transpose((1, 0))[:, ::-1]
            area = points.shape[0]
            score_instance = np.mean(score[labels == i])
            if not self.is_valid_instance(area, score_instance,
                                          self.min_text_area,
                                          self.min_text_avg_confidence):
                continue

            vertices_confidence = points2boundary(points, self.text_repr_type,
                                                  score_instance)
            if vertices_confidence is not None:
                boundaries.append(vertices_confidence)

        return boundaries
Beispiel #7
0
def pan_decode(preds,
               text_repr_type='poly',
               min_text_confidence=0.5,
               min_kernel_confidence=0.5,
               min_text_avg_confidence=0.85,
               min_kernel_area=0,
               min_text_area=16):
    """Convert scores to quadrangles via post processing in PANet. This is
    partially adapted from https://github.com/WenmuZhou/PAN.pytorch.

    Args:
        preds (tensor): The head output tensor of size 6xHxW.
        text_repr_type (str): The boundary encoding type 'poly' or 'quad'.
        min_text_confidence (float): The minimal text confidence.
        min_kernel_confidence (float): The minimal kernel confidence.
        min_text_avg_confidence (float): The minimal text average confidence.
        min_kernel_area (int): The minimal text kernel area.
        min_text_area (int): The minimal text instance region area.
    Returns:
        boundaries: (list[list[float]]): The instance boundary and its
            instance confidence list.
    """
    from .pan import assign_pixels, estimate_text_confidence, get_pixel_num
    preds[:2, :, :] = torch.sigmoid(preds[:2, :, :])
    preds = preds.detach().cpu().numpy()

    text_score = preds[0].astype(np.float32)
    text = preds[0] > min_text_confidence
    kernel = (preds[1] > min_kernel_confidence) * text
    embeddings = preds[2:].transpose((1, 2, 0))  # (h, w, 4)

    region_num, labels = cv2.connectedComponents(kernel.astype(np.uint8),
                                                 connectivity=4)
    valid_kernel_inx = []
    region_pixel_num = get_pixel_num(labels, region_num)

    # from inx 1. 0: meaningless.
    for region_idx in range(1, region_num):
        if region_pixel_num[region_idx] < min_kernel_area:
            continue
        valid_kernel_inx.append(region_idx)

    # assign pixels to valid kernels
    assignment = assign_pixels(text.astype(np.uint8), embeddings, labels,
                               region_num, 0.8)
    assignment = assignment.reshape(text.shape)

    boundaries = []

    # compute text avg confidence

    text_points = estimate_text_confidence(assignment, text_score, region_num)
    for text_inx, text_point in text_points.items():
        if text_inx not in valid_kernel_inx:
            continue
        text_confidence = text_point[0]
        text_point = text_point[2:]
        text_point = np.array(text_point, dtype=int).reshape(-1, 2)
        area = text_point.shape[0]

        if filter_instance(area, text_confidence, min_text_area,
                           min_text_avg_confidence):
            continue
        vertices_confidence = points2boundary(text_point, text_repr_type,
                                              text_confidence)
        if vertices_confidence is not None:
            boundaries.append(vertices_confidence)

    return boundaries