Esempio n. 1
0
def get_cls_reg_ctr_targets(points, gt_bboxes, bbox_scale = 0.25):
    """
        Compute regression, classification targets for points in multiple images.
        Args:
            points (Tensor): (1, 2, 19, 19).
            gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format.
        Returns:
            cls_labels (Tensor): Labels. (B, 1, 19, 19)   0 or 1, 0 means background, 1 means in the box.
            bbox_targets (Tensor): BBox targets. (B, 4, 19, 19)  only consider the foreground, for the background should set loss as 0!
            centerness_targets (Tensor): (B, 1, 19, 19)  only consider the foreground, for the background should set loss as 0!
    """
    gt_bboxes = F.add_axis(gt_bboxes, axis=-1)
    gt_bboxes = F.add_axis(gt_bboxes, axis=-1)  # (B,4,1,1)
    # cls_labels
    # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/2
    gap = (gt_bboxes[:, 2, ...] - gt_bboxes[:, 0, ...]) * (1-bbox_scale) / 2
    up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap
    left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap
    down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap
    right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap
    cls_labels = up_bound * left_bound * down_bound * right_bound
    cls_labels = F.add_axis(cls_labels, axis=1)  # (B,1,19,19)

    # bbox_targets
    # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数)
    up_left = points - gt_bboxes[:, 0:2, ...]  # (B, 2, 19, 19)
    bottom_right = gt_bboxes[:, 2:4, ...] - points
    bbox_targets = F.concat([up_left, bottom_right], axis = 1)  # (B, 4, 19, 19)

    # centerness_targets
    up_bottom = F.minimum(up_left[:, 0, ...], bottom_right[:, 0, ...]) / F.maximum(up_left[:, 0, ...], bottom_right[:, 0, ...])
    left_right = F.minimum(up_left[:, 1, ...], bottom_right[:, 1, ...]) / F.maximum(up_left[:, 1, ...], bottom_right[:, 1, ...])
    centerness_targets = F.sqrt(F.abs(up_bottom * left_right))
    return cls_labels, bbox_targets, centerness_targets
Esempio n. 2
0
def mask_anchor_opr(gtboxes, im_info, anchors, labels):

    eps = 1e-6
    gtboxes = gtboxes[:im_info[5].astype(np.int32), :]
    ignore_mask = (gtboxes[:, 4] < 0).astype(np.float32)

    mask_flag = F.zeros(labels.shape[0])
    N, K = anchors.shape[0], gtboxes.shape[0]
    p_pred = F.broadcast_to(F.expand_dims(anchors, 1),
                            (N, K, anchors.shape[1]))
    p_gt = F.broadcast_to(F.expand_dims(gtboxes, 0), (N, K, gtboxes.shape[1]))

    max_off = F.concat([
        F.maximum(p_pred[:, :, :2], p_gt[:, :, :2]),
        F.minimum(p_pred[:, :, 2:4], p_gt[:, :, 2:4])
    ],
                       axis=2)

    I = F.maximum(max_off[:, :, 2] - max_off[:, :, 0] + 1, 0) * F.maximum(
        max_off[:, :, 3] - max_off[:, :, 1] + 1, 0)
    A = F.maximum(p_pred[:, :, 2] - p_pred[:, :, 0] + 1, 0) * F.maximum(
        p_pred[:, :, 3] - p_pred[:, :, 1] + 1, 0)

    # I = F.maximum(I, 0)
    # A = F.maximum(A, 0)
    IoA = I / (A + eps)
    IoA = IoA * F.expand_dims(ignore_mask, 0)
    mask_flag = (IoA > 0.5).sum(axis=1) > 0

    labels = labels - F.equal(labels, 0).astype(np.float32) * mask_flag.astype(
        np.float32)
    return labels
Esempio n. 3
0
def get_smooth_l1_loss(
    pred_bbox: Tensor,
    gt_bbox: Tensor,
    label: Tensor,
    sigma: int = 3,
    background: int = 0,
    ignore_label: int = -1,
    fix_smooth_l1: bool = False,
    norm_type: str = "fg",
) -> Tensor:
    r"""Smooth l1 loss used in RetinaNet.

    Args:
        pred_bbox (Tensor):
            the predicted bbox with the shape of :math:`(B, A, 4)`
        gt_bbox (Tensor):
            the ground-truth bbox with the shape of :math:`(B, A, 4)`
        label (Tensor):
            the assigned label of boxes with shape of :math:`(B, A)`
        sigma (int):
            the parameter of smooth l1 loss. Default: 1
        background (int):
            the value of background class. Default: 0
        ignore_label (int):
            the value of ignore class. Default: -1
        fix_smooth_l1 (bool):
            is to use huber loss, default is False to use original smooth-l1
        norm_type (str): current support 'fg', 'all', 'none':
            'fg': loss will be normalized by number of fore-ground samples
            'all': loss will be normalized by number of all samples
            'none': not norm
    Returns:
        the calculated smooth l1 loss.
    """
    pred_bbox = pred_bbox.reshape(-1, 4)
    gt_bbox = gt_bbox.reshape(-1, 4)
    label = label.reshape(-1)

    fg_mask = (label != background) * (label != ignore_label)

    losses = get_smooth_l1_base(pred_bbox,
                                gt_bbox,
                                sigma,
                                is_fix=fix_smooth_l1)
    if norm_type == "fg":
        loss = (losses.sum(axis=1) * fg_mask).sum() / F.maximum(
            fg_mask.sum(), 1)
    elif norm_type == "all":
        all_mask = (label != ignore_label)
        loss = (losses.sum(axis=1) * fg_mask).sum() / F.maximum(
            all_mask.sum(), 1)
    else:
        raise NotImplementedError

    return loss
Esempio n. 4
0
def get_smooth_l1_loss(
    pred_bbox: Tensor,
    gt_bbox: Tensor,
    labels: Tensor,
    beta: int = 1,
    background: int = 0,
    ignore_label: int = -1,
    norm_type: str = "fg",
) -> Tensor:
    r"""Smooth l1 loss used in RetinaNet.

    Args:
        pred_bbox (Tensor):
            the predicted bbox with the shape of :math:`(B, A, 4)`
        gt_bbox (Tensor):
            the ground-truth bbox with the shape of :math:`(B, A, 4)`
        labels (Tensor):
            the assigned labels of boxes with shape of :math:`(B, A)`
        beta (int):
            the parameter of smooth l1 loss. Default: 1
        background (int):
            the value of background class. Default: 0
        ignore_label (int):
            the value of ignore class. Default: -1
        norm_type (str): current support "fg", "all", "none":
            "fg": loss will be normalized by number of fore-ground samples
            "all": loss will be normalized by number of all samples
            "none": not norm
    Returns:
        the calculated smooth l1 loss.
    """
    pred_bbox = pred_bbox.reshape(-1, 4)
    gt_bbox = gt_bbox.reshape(-1, 4)
    labels = labels.reshape(-1)

    fg_mask = (labels != background) * (labels != ignore_label)

    loss = get_smooth_l1_base(pred_bbox, gt_bbox, beta)
    loss = (loss.sum(axis=1) * fg_mask).sum()
    if norm_type == "fg":
        loss = loss / F.maximum(fg_mask.sum(), 1)
    elif norm_type == "all":
        all_mask = labels != ignore_label
        loss = loss / F.maximum(all_mask.sum(), 1)
    elif norm_type == "none":
        return loss
    else:
        raise NotImplementedError

    return loss
Esempio n. 5
0
def get_clipped_box(boxes, hw):
    """ Clip the boxes into the image region."""
    # x1 >=0
    box_x1 = F.maximum(F.minimum(boxes[:, 0::4], hw[1]), 0)
    # y1 >=0
    box_y1 = F.maximum(F.minimum(boxes[:, 1::4], hw[0]), 0)
    # x2 < im_info[1]
    box_x2 = F.maximum(F.minimum(boxes[:, 2::4], hw[1]), 0)
    # y2 < im_info[0]
    box_y2 = F.maximum(F.minimum(boxes[:, 3::4], hw[0]), 0)

    clip_box = F.concat([box_x1, box_y1, box_x2, box_y2], axis=1)

    return clip_box
Esempio n. 6
0
 def forward(self, x):
     x1 = self.conv_frelu1(x)
     x1 = self.bn1(x1)
     x2 = self.conv_frelu2(x)
     x2 = self.bn2(x2)
     x = F.maximum(x, x1 + x2)
     return x
Esempio n. 7
0
def _bce_loss_with_logits(output, labels, **kwargs):
    r"""
    Sigmoid cross entropy with logits, see tensorflow
    https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
    """
    loss = F.maximum(output, 0) - output * labels + F.log(1 + F.exp(-F.abs(output)))
    return loss.mean()
Esempio n. 8
0
    def forward(self, pred, target, weight=None):
        """
            pred: (B*H*W, 4)
            weight: (B*H*W, )
        """
        pred_left = pred[:, 1]
        pred_top = pred[:, 0]
        pred_right = pred[:, 3]
        pred_bottom = pred[:, 2]

        target_left = target[:, 1]
        target_top = target[:, 0]
        target_right = target[:, 3]
        target_bottom = target[:, 2]

        target_aera = (target_left + target_right) * (target_top +
                                                      target_bottom)
        pred_aera = (pred_left + pred_right) * (pred_top + pred_bottom)

        w_intersect = F.minimum(pred_left, target_left) + F.minimum(
            pred_right, target_right)
        h_intersect = F.minimum(pred_bottom, target_bottom) + F.minimum(
            pred_top, target_top)
        g_w_intersect = F.maximum(pred_left, target_left) + F.maximum(
            pred_right, target_right)
        g_h_intersect = F.maximum(pred_bottom, target_bottom) + F.maximum(
            pred_top, target_top)
        ac_uion = g_w_intersect * g_h_intersect

        area_intersect = w_intersect * h_intersect
        area_union = target_aera + pred_aera - area_intersect

        ious = (area_intersect + 1.0) / (area_union + 1.0)
        gious = ious - (ac_uion - area_union) / ac_uion
        if self.loc_loss_type == 'iou':
            losses = -F.log(ious)
        elif self.loc_loss_type == 'linear_iou':
            losses = 1 - ious
        elif self.loc_loss_type == 'giou':
            losses = 1 - gious
        else:
            raise NotImplementedError

        if weight is not None:
            return (losses * weight).sum()
        else:
            return losses.sum()
Esempio n. 9
0
    def get_cls_reg_ctr_targets(self, points, gt_bboxes, bbox_scale=0.15):
        """
            Compute regression, classification targets for points in multiple images.
            Args:
                points (Tensor): (1, 2, 37, 37). 每个点在原图上对应的点的位置
                gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format. 左上角右下角 原图上的bbox框
            Returns:
                cls_labels (Tensor): Labels. (B, 1, 37, 37)   0 or 1, 0 means background, 1 means in the box.
                bbox_targets (Tensor): BBox targets. (B, 4, 37, 37)  only consider the foreground, for the background should set loss as 0!
                centerness_targets (Tensor): (B, 1, 37, 37)  only consider the foreground, for the background should set loss as 0!
        """
        B, _ = gt_bboxes.shape
        gt_bboxes = F.add_axis(gt_bboxes, axis=-1)
        gt_bboxes = F.add_axis(gt_bboxes, axis=-1)  # (B,4,1,1)
        # cls_labels
        # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/4
        gap = (gt_bboxes[:, 2, ...] -
               gt_bboxes[:, 0, ...]) * (1 - bbox_scale) / 2  #求出bbox的边长
        up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap
        left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap
        down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap
        right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap
        cls_labels = up_bound * left_bound * down_bound * right_bound
        cls_labels = F.add_axis(cls_labels, axis=1)  # (B, 1, 37, 37)
        cls_labels.requires_grad = False

        # bbox_targets
        # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数)
        up_left = points - gt_bboxes[:, 0:2,
                                     ...]  # (B, 2, 37, 37) score map每个点和左上角点的差
        bottom_right = gt_bboxes[:, 2:4, ...] - points
        bbox_targets = F.concat([up_left, bottom_right],
                                axis=1)  # (B, 4, 37, 37)
        bbox_targets.requires_grad = False

        # centerness_targets
        up_bottom = F.minimum(up_left[:, 0, ...],
                              bottom_right[:, 0, ...]) / F.maximum(
                                  up_left[:, 0, ...], bottom_right[:, 0, ...])
        left_right = F.minimum(up_left[:, 1, ...],
                               bottom_right[:, 1, ...]) / F.maximum(
                                   up_left[:, 1, ...], bottom_right[:, 1, ...])
        centerness_targets = F.sqrt(F.abs(up_bottom * left_right))
        centerness_targets = F.add_axis(centerness_targets,
                                        axis=1)  # (B,1,37,37)
        centerness_targets.requires_grad = False
        return cls_labels, bbox_targets, centerness_targets
Esempio n. 10
0
def roi_pool(
    rpn_fms,
    rois,
    stride,
    pool_shape,
    pooler_type="roi_align",
):
    rois = rois.detach()
    assert len(stride) == len(rpn_fms)
    canonical_level = 4
    canonical_box_size = 224
    min_level = int(math.log2(stride[0]))
    max_level = int(math.log2(stride[-1]))

    num_fms = len(rpn_fms)
    box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])
    assigned_level = F.floor(canonical_level +
                             F.log(F.sqrt(box_area) / canonical_box_size) /
                             np.log(2)).astype("int32")
    assigned_level = F.minimum(assigned_level, max_level)
    assigned_level = F.maximum(assigned_level, min_level)
    assigned_level = assigned_level - min_level

    # avoid empty assignment
    assigned_level = F.concat([
        assigned_level,
        F.arange(num_fms, dtype="int32", device=assigned_level.device)
    ], )
    rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))])

    pool_list, inds_list = [], []
    for i in range(num_fms):
        _, inds = F.cond_take(assigned_level == i, assigned_level)
        level_rois = rois[inds]

        if pooler_type == "roi_pool":
            pool_fm = F.nn.roi_pooling(rpn_fms[i],
                                       level_rois,
                                       pool_shape,
                                       mode="max",
                                       scale=1.0 / stride[i])
        elif pooler_type == "roi_align":
            pool_fm = F.nn.roi_align(
                rpn_fms[i],
                level_rois,
                pool_shape,
                mode="average",
                spatial_scale=1.0 / stride[i],
                sample_points=2,
                aligned=True,
            )
        pool_list.append(pool_fm)
        inds_list.append(inds)

    fm_order = F.argsort(F.concat(inds_list, axis=0))
    pool_feature = F.concat(pool_list, axis=0)
    pool_feature = pool_feature[fm_order][:-num_fms]

    return pool_feature
Esempio n. 11
0
 def forward(self, x):
     B, C, _, _ = x.shape
     # avg_dims = tuple(range(2, len(x.shape)))  # [2 ,3 ]
     nu2 = F.expand_dims(F.pow(x, 2).reshape(B, C, -1).mean(axis=-1,
                                                            keepdims=True),
                         axis=-1)  # [B, C, 1, 1]
     x = x / F.sqrt(nu2 + F.abs(self.eps))
     return F.maximum(self.gamma * x + self.beta, self.tau)
Esempio n. 12
0
def layernorm(x):
    original_shape = x.shape
    x = x.reshape(original_shape[0], -1)
    m = F.mean(x, axis=1, keepdims=True)
    v = F.mean((x - m)**2, axis=1, keepdims=True)
    x = (x - m) / F.maximum(F.sqrt(v), 1e-6)
    x = x.reshape(original_shape)
    return x
Esempio n. 13
0
 def forward(self, input):
     """
     Forward pass of the function.
     """
     tau = self.conv_frelu(input)
     tau = self.bn_frelu(tau)
     output = F.maximum(input, tau)
     return output
Esempio n. 14
0
def roi_pool(
    rpn_fms,
    rois,
    stride,
    pool_shape,
    roi_type="roi_align",
):
    assert len(stride) == len(rpn_fms)
    canonical_level = 4
    canonical_box_size = 224
    min_level = math.log2(stride[0])
    max_level = math.log2(stride[-1])

    num_fms = len(rpn_fms)
    box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])
    level_assignments = F.floor(canonical_level +
                                F.log(box_area.sqrt() / canonical_box_size) /
                                np.log(2))
    level_assignments = F.minimum(level_assignments, max_level)
    level_assignments = F.maximum(level_assignments, min_level)
    level_assignments = level_assignments - min_level

    # avoid empty assignment
    level_assignments = F.concat(
        [level_assignments,
         mge.tensor(np.arange(num_fms, dtype=np.int32))], )
    rois = F.concat([rois, mge.zeros((num_fms, rois.shapeof(-1)))])

    pool_list, inds_list = [], []
    for i in range(num_fms):
        mask = level_assignments == i
        _, inds = F.cond_take(mask == 1, mask)
        level_rois = rois.ai[inds]
        if roi_type == "roi_pool":
            pool_fm = F.roi_pooling(rpn_fms[i],
                                    level_rois,
                                    pool_shape,
                                    mode="max",
                                    scale=1.0 / stride[i])
        elif roi_type == "roi_align":
            pool_fm = F.roi_align(
                rpn_fms[i],
                level_rois,
                pool_shape,
                mode="average",
                spatial_scale=1.0 / stride[i],
                sample_points=2,
                aligned=True,
            )
        pool_list.append(pool_fm)
        inds_list.append(inds)

    fm_order = F.concat(inds_list, axis=0)
    fm_order = F.argsort(fm_order.reshape(1, -1))[1].reshape(-1)
    pool_feature = F.concat(pool_list, axis=0)
    pool_feature = pool_feature.ai[fm_order][:-num_fms]

    return pool_feature
Esempio n. 15
0
def box_overlap_opr(box: Tensor, gt: Tensor) -> Tensor:
    """
    Given two lists of boxes of size N and M,
    compute the IoU (intersection over union)
    between __all__ N x M pairs of boxes.
    The box order must be (xmin, ymin, xmax, ymax).

    Args:
        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.

    Returns:
        Tensor: IoU, sized [N,M].
    """
    # box = boxes1
    # gt = boxes2
    # target_shape = (boxes1.shape[0], boxes2.shape[0], 4)

    N, K = box.shape[0], gt.shape[0]
    b_box = F.broadcast_to(F.expand_dims(box, 1), (N, K, box.shape[1]))
    b_gt = F.broadcast_to(F.expand_dims(gt, 0), (N, K, gt.shape[1]))
    # b_gt = F.expand_dims(gt, 0).broadcast_to(N, K, gt.shape[1])

    # b_box = F.expand_dims(boxes1, 1).broadcast(*target_shape)
    # b_gt = F.expand_dims(boxes2, 0).broadcast(*target_shape)

    iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum(
        b_box[:, :, 0], b_gt[:, :, 0])
    ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum(
        b_box[:, :, 1], b_gt[:, :, 1])
    inter = F.maximum(iw, 0) * F.maximum(ih, 0)

    area_box = F.maximum(box[:, 2] - box[:, 0], 0) * F.maximum(
        box[:, 3] - box[:, 1], 0)
    area_gt = F.maximum(gt[:, 2] - gt[:, 0], 0) * F.maximum(
        gt[:, 3] - gt[:, 1], 0)

    # area_target_shape = (box.shape[0], gt.shapeof()[0])
    b_area_box = F.broadcast_to(F.expand_dims(area_box, 1), (N, K))
    b_area_gt = F.broadcast_to(F.expand_dims(area_gt, 0), (N, K))
    # b_area_box = F.expand_dims(area_box, 1).broadcast_to(N, K)
    # b_area_gt = F.expand_dims(area_gt, 0).broadcast_to(N, K)
    # b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape)
    # b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape)

    union = b_area_box + b_area_gt - inter
    overlaps = F.maximum(inter / union, 0)

    return overlaps
Esempio n. 16
0
def softmax_cross_entropy(pred, label, axis=1, ignore_index=255):
    offset = F.zero_grad(pred.max(axis=axis, keepdims=True))
    pred = pred - offset
    log_prob = pred - F.log(F.exp(pred).sum(axis=axis, keepdims=True))

    mask = 1 - F.equal(label, ignore_index)
    vlabel = label * mask
    loss = -(F.indexing_one_hot(log_prob, vlabel, axis) *
             mask).sum() / F.maximum(mask.sum(), 1)
    return loss
Esempio n. 17
0
def softmax_loss(score, label, ignore_label=-1):
    max_score = F.zero_grad(score.max(axis=1, keepdims=True))
    score -= max_score
    log_prob = score - F.log(F.exp(score).sum(axis=1, keepdims=True))
    mask = (label != ignore_label)
    vlabel = label * mask
    loss = -(F.indexing_one_hot(log_prob, vlabel.astype("int32"), 1) *
             mask).sum()
    loss = loss / F.maximum(mask.sum(), 1)
    return loss
Esempio n. 18
0
def get_focal_loss(
    logits: Tensor,
    labels: Tensor,
    ignore_label: int = -1,
    background: int = 0,
    alpha: float = 0.5,
    gamma: float = 0,
    norm_type: str = "fg",
) -> Tensor:
    r"""Focal Loss for Dense Object Detection:
    <https://arxiv.org/pdf/1708.02002.pdf>

    .. math::

        FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t)

    Args:
        logits (Tensor):
            the predicted logits with the shape of :math:`(B, A, C)`
        labels (Tensor):
            the assigned labels of boxes with shape of :math:`(B, A)`
        ignore_label (int):
            the value of ignore class. Default: -1
        background (int):
            the value of background class. Default: 0
        alpha (float):
            parameter to mitigate class imbalance. Default: 0.5
        gamma (float):
            parameter to mitigate easy/hard loss imbalance. Default: 0
        norm_type (str): current support "fg", "none":
            "fg": loss will be normalized by number of fore-ground samples
            "none": not norm

    Returns:
        the calculated focal loss.
    """
    class_range = F.arange(1, logits.shape[2] + 1)

    labels = F.add_axis(labels, axis=2)
    scores = F.sigmoid(logits)
    pos_part = (1 - scores)**gamma * layers.logsigmoid(logits)
    neg_part = scores**gamma * layers.logsigmoid(-logits)

    pos_loss = -(labels == class_range) * pos_part * alpha
    neg_loss = (-(labels != class_range) * (labels != ignore_label) *
                neg_part * (1 - alpha))
    loss = (pos_loss + neg_loss).sum()

    if norm_type == "fg":
        fg_mask = (labels != background) * (labels != ignore_label)
        return loss / F.maximum(fg_mask.sum(), 1)
    elif norm_type == "none":
        return loss
    else:
        raise NotImplementedError
Esempio n. 19
0
def softmax_loss(pred, label, ignore_label=-1):

    max_pred = pred.max(axis=1, keepdims=True).detach()
    pred -= max_pred
    log_prob = pred - F.log(F.exp(pred).sum(axis=1, keepdims=True))
    mask = 1 - F.equal(label, ignore_label)
    vlabel = label * mask.astype(np.float32)
    loss = -(F.nn.indexing_one_hot(log_prob, vlabel.astype(np.int32),
                                   1).flatten() * mask)
    loss = loss.sum() / F.maximum(mask.sum(), 1)
    return loss
Esempio n. 20
0
def iou_l1_loss(pred, max_overlaps, gt, ignore_label=-1, background=0):

    pred = pred.reshape(pred.shape[0], -1, max_overlaps.shape[2])
    abs_x = F.abs(pred - max_overlaps)
    mask_bg = 1 - F.equal(gt, background).astype(np.float32)
    mask_ig = 1 - F.equal(gt, ignore_label).astype(np.float32)
    mask = mask_bg * mask_ig

    mask = mask.reshape(mask.shape[0], -1, pred.shape[2])
    loss = (abs_x * mask).sum() / F.maximum(mask.sum(), 1)
    return loss
Esempio n. 21
0
def smooth_l1_loss_retina(pred,
                          gt,
                          label,
                          sigma=3,
                          background=0,
                          ignore_label=-1,
                          axis=2):
    value = _smooth_l1_base(pred, gt, sigma)
    mask, mask_ig = _get_mask_of_label(label, background, ignore_label)
    loss = (value.sum(axis=axis) * mask).sum() / F.maximum(mask.sum(), 1)
    return loss
Esempio n. 22
0
    def compute_gemini_loss(self, prob, bbox_targets, labels):

        c = prob.shape[1]
        prob = prob.reshape(-1, 2, c).transpose(1, 0, 2)
        a, b = prob[0], prob[1]
        loss0 = self.compute_emd_loss(a, b, bbox_targets, labels)
        loss1 = self.compute_emd_loss(b, a, bbox_targets, labels)
        loss = F.stack([loss0, loss1], axis=1)
        vlabel = (labels > -1).reshape(-1, 2).sum(axis=1) > 1
        emd_loss = loss.min(axis=1).sum() / F.maximum(vlabel.sum(), 1)
        return emd_loss
Esempio n. 23
0
def softmax_loss(scores: Tensor,
                 labels: Tensor,
                 ignore_label: int = -1) -> Tensor:
    max_scores = F.zero_grad(scores.max(axis=1, keepdims=True))
    scores -= max_scores
    log_prob = scores - F.log(F.exp(scores).sum(axis=1, keepdims=True))
    mask = labels != ignore_label
    vlabels = labels * mask
    loss = -(F.indexing_one_hot(log_prob, vlabels.astype("int32"), 1) *
             mask).sum()
    loss = loss / F.maximum(mask.sum(), 1)
    return loss
Esempio n. 24
0
    def compute_gemini_loss_opr(self, prob, bbox_targets, labels):

        prob = prob.reshape(prob.shape[0], 2, -1)
        n, _, c = prob.shape
        prob = prob.transpose(1, 0, 2)
        a, b = prob[0], prob[1]
        loss0 = self.compute_emd_loss_opr(a, b, bbox_targets, labels)
        loss1 = self.compute_emd_loss_opr(b, a, bbox_targets, labels)
        loss = F.stack([loss0, loss1], dim=1)
        emd_loss = loss.min(axis=1)[0].sum() / F.maximum(loss.shape[0], 1)
        loss = {'rcnn_emd_loss': emd_loss}
        return loss
Esempio n. 25
0
def get_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor:
    """
    Given two lists of boxes of size N and M,
    compute the IoU (intersection over union)
    between __all__ N x M pairs of boxes.
    The box order must be (xmin, ymin, xmax, ymax).

    Args:
        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.

    Returns:
        Tensor: IoU, sized [N,M].
    """
    box = boxes1
    gt = boxes2
    target_shape = (boxes1.shape[0], boxes2.shapeof()[0], 4)

    b_box = F.add_axis(boxes1, 1).broadcast(*target_shape)
    b_gt = F.add_axis(boxes2, 0).broadcast(*target_shape)

    iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum(
        b_box[:, :, 0], b_gt[:, :, 0]
    )
    ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum(
        b_box[:, :, 1], b_gt[:, :, 1]
    )
    inter = F.maximum(iw, 0) * F.maximum(ih, 0)

    area_box = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
    area_gt = (gt[:, 2] - gt[:, 0]) * (gt[:, 3] - gt[:, 1])

    area_target_shape = (box.shape[0], gt.shapeof()[0])

    b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape)
    b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape)

    union = b_area_box + b_area_gt - inter
    overlaps = F.maximum(inter / union, 0)

    return overlaps
Esempio n. 26
0
def roi_pool(rpn_fms, rois, stride, pool_shape, roi_type='roi_align', 
             labels=None, bbox_targets=None):
    assert len(stride) == len(rpn_fms)
    canonical_level = 4
    canonical_box_size = 224
    min_level = math.log2(stride[0])
    max_level = math.log2(stride[-1])

    num_fms = len(rpn_fms)
    box_sizes = F.sqrt((rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]))
    level_assignments = F.floor(
	canonical_level + F.log(box_sizes / canonical_box_size) / np.log(2)
    )
    level_assignments = F.minimum(level_assignments, max_level)
    level_assignments = F.maximum(level_assignments, min_level)
    level_assignments = level_assignments - min_level
    available_masks = F.concat(
        [mge.ones(level_assignments.shapeof()[0]), mge.zeros(num_fms)], axis=0)
    level_assignments = F.concat([level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], axis=0)
    rois = F.concat([rois, mge.zeros((num_fms, rois.shapeof()[-1]))], axis=0)
    if labels is not None:
        labels = F.concat([labels, mge.ones((num_fms, labels.shapeof()[-1]))], axis=0)
        bbox_targets = F.concat([bbox_targets, mge.zeros((num_fms, bbox_targets.shapeof()[-1]))], axis=0)
    pool_list, inds_list = [], []
    for i in range(len(rpn_fms)):
        mask = level_assignments == i
        inds = mask_to_inds(mask)
        rois_fm = rois.ai[inds]
        if roi_type == 'roi_pool':
            pool_fm = F.roi_pooling(
                    rpn_fms[i], rois_fm, pool_shape, mode='max', scale=1.0/stride[i])
        elif roi_type == 'roi_align':
            pool_fm = F.roi_align(
                    rpn_fms[i], rois_fm, pool_shape, mode='average', 
                    spatial_scale=1.0/stride[i], sample_points=2, aligned=True)
        pool_list.append(pool_fm)
        inds_list.append(inds)

    fm_order = F.concat(inds_list, axis=0)
    pool_feature = F.concat(pool_list, axis=0)

    ordered_available_masks = available_masks.ai[fm_order]
    available_inds = mask_to_inds(ordered_available_masks)
    pool_feature = pool_feature.ai[available_inds]
    rois = rois.ai[fm_order, :].ai[available_inds, :]
    if labels is not None:
        labels = labels.ai[fm_order].ai[available_inds]
        bbox_targets = bbox_targets.ai[fm_order, :].ai[available_inds, :]
        return pool_feature, rois, F.zero_grad(labels), F.zero_grad(bbox_targets)
    else:
        return pool_feature, rois, None, None
Esempio n. 27
0
def smooth_l1_loss_rcnn(pred,
                        gt,
                        label,
                        sigma=1,
                        background=0,
                        ignore_label=-1):
    """
        pred    : (minibatch, class_num, 4)
        gt      : (minibatch, 4)
        label   : (minibatch,  )
    """
    loss = smooth_l1_loss_rcnn_opr(pred, gt, label, sigma)
    loss = loss.sum() / F.maximum((label > 0).sum(), 1)
    return loss
Esempio n. 28
0
    def compute_regular_loss(self, prob, bbox_targets, labels):

        offsets, cls_scores = prob[:, :-self.n], prob[:, -self.n:]
        n = offsets.shape[0]
        offsets = offsets.reshape(n, -1, 4)
        cls_loss = softmax_loss(cls_scores, labels)

        bbox_loss = smooth_l1_loss_rcnn_opr(offsets, bbox_targets, labels,
                                            config.rcnn_smooth_l1_beta)

        bbox_loss = bbox_loss.sum() / F.maximum((labels > 0).sum(), 1)
        loss = {}
        loss['{}_cls_loss'.format(self.name)] = cls_loss
        loss['{}_bbox_loss'.format(self.name)] = bbox_loss
        return loss
Esempio n. 29
0
    def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None):
        rcnn_rois, labels, bbox_targets = self.get_ground_truth(
            rcnn_rois, im_info, gt_boxes)

        fpn_fms = [fpn_fms[x] for x in self.in_features]
        pool_features = layers.roi_pool(
            fpn_fms,
            rcnn_rois,
            self.stride,
            self.pooling_size,
            self.pooling_method,
        )
        flatten_feature = F.flatten(pool_features, start_axis=1)
        roi_feature = F.relu(self.fc1(flatten_feature))
        roi_feature = F.relu(self.fc2(roi_feature))
        pred_logits = self.pred_cls(roi_feature)
        pred_offsets = self.pred_delta(roi_feature)

        if self.training:
            # loss for rcnn classification
            loss_rcnn_cls = F.loss.cross_entropy(pred_logits, labels, axis=1)
            # loss for rcnn regression
            pred_offsets = pred_offsets.reshape(-1, self.cfg.num_classes, 4)
            num_samples = labels.shape[0]
            fg_mask = labels > 0
            loss_rcnn_bbox = layers.smooth_l1_loss(
                pred_offsets[fg_mask, labels[fg_mask] - 1],
                bbox_targets[fg_mask],
                self.cfg.rcnn_smooth_l1_beta,
            ).sum() / F.maximum(num_samples, 1)

            loss_dict = {
                "loss_rcnn_cls": loss_rcnn_cls,
                "loss_rcnn_bbox": loss_rcnn_bbox,
            }
            return loss_dict
        else:
            # slice 1 for removing background
            pred_scores = F.softmax(pred_logits, axis=1)[:, 1:]
            pred_offsets = pred_offsets.reshape(-1, 4)
            target_shape = (rcnn_rois.shape[0], self.cfg.num_classes, 4)
            # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4)
            base_rois = F.broadcast_to(
                F.expand_dims(rcnn_rois[:, 1:5], axis=1),
                target_shape).reshape(-1, 4)
            pred_bbox = self.box_coder.decode(base_rois, pred_offsets)
            return pred_bbox, pred_scores
Esempio n. 30
0
 def forward(self, data, idx, roi):
     N, H, W, C = data.shape
     xmax = roi[:, 1, 0]
     xmin = roi[:, 0, 0]
     ymax = roi[:, 1, 1]
     ymin = roi[:, 0, 1]
     scale = F.maximum((xmax - xmin) / W, (ymax - ymin) / H)
     I = F.broadcast_to(self.I, (N, ))
     M = F.broadcast_to(self.M, (N, 3, 3))
     M[:, 0, 0] = scale
     M[:, 0, 2] = xmin
     M[:, 1, 1] = scale
     M[:, 1, 2] = ymin
     M[:, 2, 2] = I
     resized = (F.warp_perspective(data,
                                   M, (H, W),
                                   mat_idx=idx,
                                   border_mode="CONSTANT",
                                   format="NHWC").transpose(
                                       0, 3, 1, 2).astype(np.float32))
     return resized