def bbox_transform_inv_opr(bbox, deltas): max_delta = math.log(1000.0 / 16) """ Transforms the learned deltas to the final bbox coordinates, the axis is 1""" bbox_width = bbox[:, 2] - bbox[:, 0] + 1 bbox_height = bbox[:, 3] - bbox[:, 1] + 1 bbox_ctr_x = bbox[:, 0] + 0.5 * bbox_width bbox_ctr_y = bbox[:, 1] + 0.5 * bbox_height pred_ctr_x = bbox_ctr_x + deltas[:, 0] * bbox_width pred_ctr_y = bbox_ctr_y + deltas[:, 1] * bbox_height dw = deltas[:, 2] dh = deltas[:, 3] dw = F.minimum(dw, max_delta) dh = F.minimum(dh, max_delta) pred_width = bbox_width * F.exp(dw) pred_height = bbox_height * F.exp(dh) pred_x1 = pred_ctr_x - 0.5 * pred_width pred_y1 = pred_ctr_y - 0.5 * pred_height pred_x2 = pred_ctr_x + 0.5 * pred_width pred_y2 = pred_ctr_y + 0.5 * pred_height # pred_boxes = F.concat((pred_x1.reshape(-1, 1), pred_y1.reshape(-1, 1), # pred_x2.reshape(-1, 1), pred_y2.reshape(-1, 1)), axis=1) pred_boxes = F.stack([pred_x1, pred_y1, pred_x2, pred_y2], axis=1) return pred_boxes
def get_cls_reg_ctr_targets(points, gt_bboxes, bbox_scale = 0.25): """ Compute regression, classification targets for points in multiple images. Args: points (Tensor): (1, 2, 19, 19). gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format. Returns: cls_labels (Tensor): Labels. (B, 1, 19, 19) 0 or 1, 0 means background, 1 means in the box. bbox_targets (Tensor): BBox targets. (B, 4, 19, 19) only consider the foreground, for the background should set loss as 0! centerness_targets (Tensor): (B, 1, 19, 19) only consider the foreground, for the background should set loss as 0! """ gt_bboxes = F.add_axis(gt_bboxes, axis=-1) gt_bboxes = F.add_axis(gt_bboxes, axis=-1) # (B,4,1,1) # cls_labels # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/2 gap = (gt_bboxes[:, 2, ...] - gt_bboxes[:, 0, ...]) * (1-bbox_scale) / 2 up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap cls_labels = up_bound * left_bound * down_bound * right_bound cls_labels = F.add_axis(cls_labels, axis=1) # (B,1,19,19) # bbox_targets # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数) up_left = points - gt_bboxes[:, 0:2, ...] # (B, 2, 19, 19) bottom_right = gt_bboxes[:, 2:4, ...] - points bbox_targets = F.concat([up_left, bottom_right], axis = 1) # (B, 4, 19, 19) # centerness_targets up_bottom = F.minimum(up_left[:, 0, ...], bottom_right[:, 0, ...]) / F.maximum(up_left[:, 0, ...], bottom_right[:, 0, ...]) left_right = F.minimum(up_left[:, 1, ...], bottom_right[:, 1, ...]) / F.maximum(up_left[:, 1, ...], bottom_right[:, 1, ...]) centerness_targets = F.sqrt(F.abs(up_bottom * left_right)) return cls_labels, bbox_targets, centerness_targets
def box_overlap_ignore_opr(box: Tensor, gt: Tensor, ignore_label=-1) -> Tensor: """ Given two lists of boxes of size N and M, compute the IoU (intersection over union) between __all__ N x M pairs of boxes. The box order must be (xmin, ymin, xmax, ymax). Args: boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. Returns: Tensor: IoU, sized [N,M]. """ # box = boxes1 # gt = boxes2 # target_shape = (boxes1.shapeof()[0], boxes2.shapeof()[0], 4) eps = 1e-5 N, K = box.shape[0], gt.shape[0] b_box = F.broadcast_to(F.expand_dims(box, 1), (N, K, box.shape[1])) b_gt = F.broadcast_to(F.expand_dims(gt, 0), (N, K, gt.shape[1])) # b_box = F.add_axis(boxes1, 1).broadcast(*target_shape) # b_gt = F.add_axis(boxes2[:, :4], 0).broadcast(*target_shape) iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum( b_box[:, :, 0], b_gt[:, :, 0]) ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum( b_box[:, :, 1], b_gt[:, :, 1]) inter = F.maximum(iw, 0) * F.maximum(ih, 0) area_box = F.maximum(box[:, 2] - box[:, 0], 0) * F.maximum( box[:, 3] - box[:, 1], 0) area_gt = F.maximum(gt[:, 2] - gt[:, 0], 0) * F.maximum( gt[:, 3] - gt[:, 1], 0) # area_target_shape = (box.shapeof()[0], gt.shapeof()[0]) # b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape) # b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape) b_area_box = F.broadcast_to(F.expand_dims(area_box, 1), (N, K)) + eps b_area_gt = F.broadcast_to(F.expand_dims(area_gt, 0), (N, K)) union = b_area_box + b_area_gt - inter + eps overlaps_normal = F.maximum(inter / union, 0) overlaps_ignore = F.maximum(inter / b_area_box, 0) overlaps = F.maximum(inter / union, 0) # gt_ignore_mask = F.add_axis(F.equal(gt[:, 4], ignore_label), 0).broadcast(*area_target_shape) ignore_mask = F.equal(gt[:, 4], ignore_label) gt_ignore_mask = F.expand_dims(ignore_mask, 0) overlaps_normal *= (1 - gt_ignore_mask) overlaps_ignore *= gt_ignore_mask return overlaps_normal, overlaps_ignore
def get_clipped_box(boxes, hw): """ Clip the boxes into the image region.""" # x1 >=0 box_x1 = F.maximum(F.minimum(boxes[:, 0::4], hw[1]), 0) # y1 >=0 box_y1 = F.maximum(F.minimum(boxes[:, 1::4], hw[0]), 0) # x2 < im_info[1] box_x2 = F.maximum(F.minimum(boxes[:, 2::4], hw[1]), 0) # y2 < im_info[0] box_y2 = F.maximum(F.minimum(boxes[:, 3::4], hw[0]), 0) clip_box = F.concat([box_x1, box_y1, box_x2, box_y2], axis=1) return clip_box
def box_overlap_opr(box: Tensor, gt: Tensor) -> Tensor: """ Given two lists of boxes of size N and M, compute the IoU (intersection over union) between __all__ N x M pairs of boxes. The box order must be (xmin, ymin, xmax, ymax). Args: boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. Returns: Tensor: IoU, sized [N,M]. """ # box = boxes1 # gt = boxes2 # target_shape = (boxes1.shape[0], boxes2.shape[0], 4) N, K = box.shape[0], gt.shape[0] b_box = F.broadcast_to(F.expand_dims(box, 1), (N, K, box.shape[1])) b_gt = F.broadcast_to(F.expand_dims(gt, 0), (N, K, gt.shape[1])) # b_gt = F.expand_dims(gt, 0).broadcast_to(N, K, gt.shape[1]) # b_box = F.expand_dims(boxes1, 1).broadcast(*target_shape) # b_gt = F.expand_dims(boxes2, 0).broadcast(*target_shape) iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum( b_box[:, :, 0], b_gt[:, :, 0]) ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum( b_box[:, :, 1], b_gt[:, :, 1]) inter = F.maximum(iw, 0) * F.maximum(ih, 0) area_box = F.maximum(box[:, 2] - box[:, 0], 0) * F.maximum( box[:, 3] - box[:, 1], 0) area_gt = F.maximum(gt[:, 2] - gt[:, 0], 0) * F.maximum( gt[:, 3] - gt[:, 1], 0) # area_target_shape = (box.shape[0], gt.shapeof()[0]) b_area_box = F.broadcast_to(F.expand_dims(area_box, 1), (N, K)) b_area_gt = F.broadcast_to(F.expand_dims(area_gt, 0), (N, K)) # b_area_box = F.expand_dims(area_box, 1).broadcast_to(N, K) # b_area_gt = F.expand_dims(area_gt, 0).broadcast_to(N, K) # b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape) # b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape) union = b_area_box + b_area_gt - inter overlaps = F.maximum(inter / union, 0) return overlaps
def mask_anchor_opr(gtboxes, im_info, anchors, labels): eps = 1e-6 gtboxes = gtboxes[:im_info[5].astype(np.int32), :] ignore_mask = (gtboxes[:, 4] < 0).astype(np.float32) mask_flag = F.zeros(labels.shape[0]) N, K = anchors.shape[0], gtboxes.shape[0] p_pred = F.broadcast_to(F.expand_dims(anchors, 1), (N, K, anchors.shape[1])) p_gt = F.broadcast_to(F.expand_dims(gtboxes, 0), (N, K, gtboxes.shape[1])) max_off = F.concat([ F.maximum(p_pred[:, :, :2], p_gt[:, :, :2]), F.minimum(p_pred[:, :, 2:4], p_gt[:, :, 2:4]) ], axis=2) I = F.maximum(max_off[:, :, 2] - max_off[:, :, 0] + 1, 0) * F.maximum( max_off[:, :, 3] - max_off[:, :, 1] + 1, 0) A = F.maximum(p_pred[:, :, 2] - p_pred[:, :, 0] + 1, 0) * F.maximum( p_pred[:, :, 3] - p_pred[:, :, 1] + 1, 0) # I = F.maximum(I, 0) # A = F.maximum(A, 0) IoA = I / (A + eps) IoA = IoA * F.expand_dims(ignore_mask, 0) mask_flag = (IoA > 0.5).sum(axis=1) > 0 labels = labels - F.equal(labels, 0).astype(np.float32) * mask_flag.astype( np.float32) return labels
def get_iou(boxes1: Tensor, boxes2: Tensor, return_ignore=False) -> Tensor: """ Given two lists of boxes of size N and M, compute the IoU (intersection over union) between __all__ N x M pairs of boxes. The box order must be (xmin, ymin, xmax, ymax). Args: boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. Returns: Tensor: IoU, sized [N,M]. """ box = boxes1 gt = boxes2 target_shape = (boxes1.shapeof(0), boxes2.shapeof(0), 4) b_box = F.add_axis(boxes1, 1).broadcast(*target_shape) b_gt = F.add_axis(boxes2[:, :4], 0).broadcast(*target_shape) iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum( b_box[:, :, 0], b_gt[:, :, 0] ) ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum( b_box[:, :, 1], b_gt[:, :, 1] ) inter = F.maximum(iw, 0) * F.maximum(ih, 0) area_box = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]) area_gt = (gt[:, 2] - gt[:, 0]) * (gt[:, 3] - gt[:, 1]) area_target_shape = (box.shapeof(0), gt.shapeof(0)) b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape) b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape) union = b_area_box + b_area_gt - inter overlaps = F.maximum(inter / union, 0) if return_ignore: overlaps_ignore = F.maximum(inter / b_area_box, 0) gt_ignore_mask = F.add_axis((gt[:, 4] == -1), 0).broadcast(*area_target_shape) overlaps *= (1 - gt_ignore_mask) overlaps_ignore *= gt_ignore_mask return overlaps, overlaps_ignore return overlaps
def get_cls_reg_ctr_targets(self, points, gt_bboxes, bbox_scale=0.15): """ Compute regression, classification targets for points in multiple images. Args: points (Tensor): (1, 2, 37, 37). 每个点在原图上对应的点的位置 gt_bboxes (Tensor): Ground truth bboxes of each image, (B,4), in [tl_x, tl_y, br_x, br_y] format. 左上角右下角 原图上的bbox框 Returns: cls_labels (Tensor): Labels. (B, 1, 37, 37) 0 or 1, 0 means background, 1 means in the box. bbox_targets (Tensor): BBox targets. (B, 4, 37, 37) only consider the foreground, for the background should set loss as 0! centerness_targets (Tensor): (B, 1, 37, 37) only consider the foreground, for the background should set loss as 0! """ B, _ = gt_bboxes.shape gt_bboxes = F.add_axis(gt_bboxes, axis=-1) gt_bboxes = F.add_axis(gt_bboxes, axis=-1) # (B,4,1,1) # cls_labels # 计算四个值以确定是否在内部,由于template比较大,于是缩小bbox为之前的1/4 gap = (gt_bboxes[:, 2, ...] - gt_bboxes[:, 0, ...]) * (1 - bbox_scale) / 2 #求出bbox的边长 up_bound = points[:, 0, ...] > gt_bboxes[:, 0, ...] + gap left_bound = points[:, 1, ...] > gt_bboxes[:, 1, ...] + gap down_bound = points[:, 0, ...] < gt_bboxes[:, 2, ...] - gap right_bound = points[:, 1, ...] < gt_bboxes[:, 3, ...] - gap cls_labels = up_bound * left_bound * down_bound * right_bound cls_labels = F.add_axis(cls_labels, axis=1) # (B, 1, 37, 37) cls_labels.requires_grad = False # bbox_targets # 对于points中的每个坐标,计算偏离情况(这里每个坐标都会计算,所以会有负数) up_left = points - gt_bboxes[:, 0:2, ...] # (B, 2, 37, 37) score map每个点和左上角点的差 bottom_right = gt_bboxes[:, 2:4, ...] - points bbox_targets = F.concat([up_left, bottom_right], axis=1) # (B, 4, 37, 37) bbox_targets.requires_grad = False # centerness_targets up_bottom = F.minimum(up_left[:, 0, ...], bottom_right[:, 0, ...]) / F.maximum( up_left[:, 0, ...], bottom_right[:, 0, ...]) left_right = F.minimum(up_left[:, 1, ...], bottom_right[:, 1, ...]) / F.maximum( up_left[:, 1, ...], bottom_right[:, 1, ...]) centerness_targets = F.sqrt(F.abs(up_bottom * left_right)) centerness_targets = F.add_axis(centerness_targets, axis=1) # (B,1,37,37) centerness_targets.requires_grad = False return cls_labels, bbox_targets, centerness_targets
def forward(self, pred, target, weight=None): """ pred: (B*H*W, 4) weight: (B*H*W, ) """ pred_left = pred[:, 1] pred_top = pred[:, 0] pred_right = pred[:, 3] pred_bottom = pred[:, 2] target_left = target[:, 1] target_top = target[:, 0] target_right = target[:, 3] target_bottom = target[:, 2] target_aera = (target_left + target_right) * (target_top + target_bottom) pred_aera = (pred_left + pred_right) * (pred_top + pred_bottom) w_intersect = F.minimum(pred_left, target_left) + F.minimum( pred_right, target_right) h_intersect = F.minimum(pred_bottom, target_bottom) + F.minimum( pred_top, target_top) g_w_intersect = F.maximum(pred_left, target_left) + F.maximum( pred_right, target_right) g_h_intersect = F.maximum(pred_bottom, target_bottom) + F.maximum( pred_top, target_top) ac_uion = g_w_intersect * g_h_intersect area_intersect = w_intersect * h_intersect area_union = target_aera + pred_aera - area_intersect ious = (area_intersect + 1.0) / (area_union + 1.0) gious = ious - (ac_uion - area_union) / ac_uion if self.loc_loss_type == 'iou': losses = -F.log(ious) elif self.loc_loss_type == 'linear_iou': losses = 1 - ious elif self.loc_loss_type == 'giou': losses = 1 - gious else: raise NotImplementedError if weight is not None: return (losses * weight).sum() else: return losses.sum()
def roi_pool( rpn_fms, rois, stride, pool_shape, pooler_type="roi_align", ): rois = rois.detach() assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = int(math.log2(stride[0])) max_level = int(math.log2(stride[-1])) num_fms = len(rpn_fms) box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]) assigned_level = F.floor(canonical_level + F.log(F.sqrt(box_area) / canonical_box_size) / np.log(2)).astype("int32") assigned_level = F.minimum(assigned_level, max_level) assigned_level = F.maximum(assigned_level, min_level) assigned_level = assigned_level - min_level # avoid empty assignment assigned_level = F.concat([ assigned_level, F.arange(num_fms, dtype="int32", device=assigned_level.device) ], ) rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))]) pool_list, inds_list = [], [] for i in range(num_fms): _, inds = F.cond_take(assigned_level == i, assigned_level) level_rois = rois[inds] if pooler_type == "roi_pool": pool_fm = F.nn.roi_pooling(rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]) elif pooler_type == "roi_align": pool_fm = F.nn.roi_align( rpn_fms[i], level_rois, pool_shape, mode="average", spatial_scale=1.0 / stride[i], sample_points=2, aligned=True, ) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.argsort(F.concat(inds_list, axis=0)) pool_feature = F.concat(pool_list, axis=0) pool_feature = pool_feature[fm_order][:-num_fms] return pool_feature
def roi_pool( rpn_fms, rois, stride, pool_shape, roi_type="roi_align", ): assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = math.log2(stride[0]) max_level = math.log2(stride[-1]) num_fms = len(rpn_fms) box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]) level_assignments = F.floor(canonical_level + F.log(box_area.sqrt() / canonical_box_size) / np.log(2)) level_assignments = F.minimum(level_assignments, max_level) level_assignments = F.maximum(level_assignments, min_level) level_assignments = level_assignments - min_level # avoid empty assignment level_assignments = F.concat( [level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], ) rois = F.concat([rois, mge.zeros((num_fms, rois.shapeof(-1)))]) pool_list, inds_list = [], [] for i in range(num_fms): mask = level_assignments == i _, inds = F.cond_take(mask == 1, mask) level_rois = rois.ai[inds] if roi_type == "roi_pool": pool_fm = F.roi_pooling(rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]) elif roi_type == "roi_align": pool_fm = F.roi_align( rpn_fms[i], level_rois, pool_shape, mode="average", spatial_scale=1.0 / stride[i], sample_points=2, aligned=True, ) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.concat(inds_list, axis=0) fm_order = F.argsort(fm_order.reshape(1, -1))[1].reshape(-1) pool_feature = F.concat(pool_list, axis=0) pool_feature = pool_feature.ai[fm_order][:-num_fms] return pool_feature
def _bernoulli_sample_masks(self, masks, num_samples, sample_value): """ Using the bernoulli sampling method""" sample_mask = masks == sample_value num_mask = sample_mask.sum() num_final_samples = F.minimum(num_mask, num_samples) # here, we use the bernoulli probability to sample the anchors sample_prob = num_final_samples / num_mask uniform_rng = mge.random.uniform(sample_mask.shapeof(0)) after_sampled_mask = (uniform_rng <= sample_prob) * sample_mask return after_sampled_mask
def iou_loss( pred: Tensor, target: Tensor, box_mode: str = "xyxy", loss_type: str = "iou", eps: float = 1e-8, ) -> Tensor: if box_mode == "ltrb": pred = F.concat([-pred[..., :2], pred[..., 2:]], axis=-1) target = F.concat([-target[..., :2], target[..., 2:]], axis=-1) elif box_mode != "xyxy": raise NotImplementedError pred_area = F.maximum(pred[..., 2] - pred[..., 0], 0) * F.maximum( pred[..., 3] - pred[..., 1], 0 ) target_area = F.maximum(target[..., 2] - target[..., 0], 0) * F.maximum( target[..., 3] - target[..., 1], 0 ) w_intersect = F.maximum( F.minimum(pred[..., 2], target[..., 2]) - F.maximum(pred[..., 0], target[..., 0]), 0 ) h_intersect = F.maximum( F.minimum(pred[..., 3], target[..., 3]) - F.maximum(pred[..., 1], target[..., 1]), 0 ) area_intersect = w_intersect * h_intersect area_union = pred_area + target_area - area_intersect ious = area_intersect / F.maximum(area_union, eps) if loss_type == "iou": loss = -F.log(F.maximum(ious, eps)) elif loss_type == "linear_iou": loss = 1 - ious elif loss_type == "giou": g_w_intersect = F.maximum(pred[..., 2], target[..., 2]) - F.minimum( pred[..., 0], target[..., 0] ) g_h_intersect = F.maximum(pred[..., 3], target[..., 3]) - F.minimum( pred[..., 1], target[..., 1] ) ac_union = g_w_intersect * g_h_intersect gious = ious - (ac_union - area_union) / F.maximum(ac_union, eps) loss = 1 - gious return loss
def _bernoulli_sample_masks(masks, num_samples, sample_value): """ Using the bernoulli sampling method""" sample_mask = F.equal(masks, sample_value) num_mask = sample_mask.sum() num_final_samples = F.minimum(num_mask, num_samples) # here, we use the bernoulli probability to sample the anchors sample_prob = num_final_samples / num_mask # uniform_rng = rand.uniform(sample_mask.shapeof()[0]) uniform_rng = rand.uniform(0, 1, sample_mask.shape) after_sampled_mask = (uniform_rng <= sample_prob) * sample_mask return after_sampled_mask
def roi_pool(rpn_fms, rois, stride, pool_shape, roi_type='roi_align', labels=None, bbox_targets=None): assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = math.log2(stride[0]) max_level = math.log2(stride[-1]) num_fms = len(rpn_fms) box_sizes = F.sqrt((rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])) level_assignments = F.floor( canonical_level + F.log(box_sizes / canonical_box_size) / np.log(2) ) level_assignments = F.minimum(level_assignments, max_level) level_assignments = F.maximum(level_assignments, min_level) level_assignments = level_assignments - min_level available_masks = F.concat( [mge.ones(level_assignments.shapeof()[0]), mge.zeros(num_fms)], axis=0) level_assignments = F.concat([level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], axis=0) rois = F.concat([rois, mge.zeros((num_fms, rois.shapeof()[-1]))], axis=0) if labels is not None: labels = F.concat([labels, mge.ones((num_fms, labels.shapeof()[-1]))], axis=0) bbox_targets = F.concat([bbox_targets, mge.zeros((num_fms, bbox_targets.shapeof()[-1]))], axis=0) pool_list, inds_list = [], [] for i in range(len(rpn_fms)): mask = level_assignments == i inds = mask_to_inds(mask) rois_fm = rois.ai[inds] if roi_type == 'roi_pool': pool_fm = F.roi_pooling( rpn_fms[i], rois_fm, pool_shape, mode='max', scale=1.0/stride[i]) elif roi_type == 'roi_align': pool_fm = F.roi_align( rpn_fms[i], rois_fm, pool_shape, mode='average', spatial_scale=1.0/stride[i], sample_points=2, aligned=True) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.concat(inds_list, axis=0) pool_feature = F.concat(pool_list, axis=0) ordered_available_masks = available_masks.ai[fm_order] available_inds = mask_to_inds(ordered_available_masks) pool_feature = pool_feature.ai[available_inds] rois = rois.ai[fm_order, :].ai[available_inds, :] if labels is not None: labels = labels.ai[fm_order].ai[available_inds] bbox_targets = bbox_targets.ai[fm_order, :].ai[available_inds, :] return pool_feature, rois, F.zero_grad(labels), F.zero_grad(bbox_targets) else: return pool_feature, rois, None, None
def _bernoulli_sample_labels( self, labels, num_samples, sample_value, ignore_label=-1 ): """ Using the bernoulli sampling method""" sample_label_mask = (labels == sample_value) num_mask = sample_label_mask.sum() num_final_samples = F.minimum(num_mask, num_samples) # here, we use the bernoulli probability to sample the anchors sample_prob = num_final_samples / num_mask uniform_rng = rand.uniform(sample_label_mask.shapeof(0)) to_ignore_mask = (uniform_rng >= sample_prob) * sample_label_mask labels = labels * (1 - to_ignore_mask) + to_ignore_mask * ignore_label return labels
def get_iou(boxes1: Tensor, boxes2: Tensor, return_ioa=False) -> Tensor: """ Given two lists of boxes of size N and M, compute the IoU (intersection over union) between __all__ N x M pairs of boxes. The box order must be (xmin, ymin, xmax, ymax). Args: boxes1 (Tensor): boxes tensor with shape (N, 4) boxes2 (Tensor): boxes tensor with shape (M, 4) return_ioa (Bool): wheather return Intersection over Boxes1 or not, default: False Returns: iou (Tensor): IoU matrix, shape (N,M). """ b_box1 = F.expand_dims(boxes1, axis=1) b_box2 = F.expand_dims(boxes2, axis=0) iw = F.minimum(b_box1[:, :, 2], b_box2[:, :, 2]) - F.maximum( b_box1[:, :, 0], b_box2[:, :, 0] ) ih = F.minimum(b_box1[:, :, 3], b_box2[:, :, 3]) - F.maximum( b_box1[:, :, 1], b_box2[:, :, 1] ) inter = F.maximum(iw, 0) * F.maximum(ih, 0) area_box1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) area_box2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) union = F.expand_dims(area_box1, axis=1) + F.expand_dims(area_box2, axis=0) - inter overlaps = F.maximum(inter / union, 0) if return_ioa: ioa = F.maximum(inter / area_box1, 0) return overlaps, ioa return overlaps
def _bernoulli_sample_labels(labels, num_samples, sample_value, ignore_label=-1): """ Using the bernoulli sampling method""" sample_label_mask = F.equal(labels, sample_value) num_mask = sample_label_mask.sum() num_final_samples = F.minimum(num_mask, num_samples) # here, we use the bernoulli probability to sample the anchors sample_prob = num_final_samples / num_mask uniform_rng = rand.uniform(sample_label_mask.shapeof()[0]) disable_mask = (uniform_rng >= sample_prob) * sample_label_mask #TODO check cudaerror: illegal memory access was encountered labels = labels * (1 - disable_mask) + disable_mask * ignore_label return labels
def find_top_rpn_proposals( self, rpn_bbox_offsets_list, rpn_cls_prob_list, all_anchors_list, im_info ): prev_nms_top_n = self.cfg.train_prev_nms_top_n \ if self.training else self.cfg.test_prev_nms_top_n post_nms_top_n = self.cfg.train_post_nms_top_n \ if self.training else self.cfg.test_post_nms_top_n batch_per_gpu = self.cfg.batch_per_gpu if self.training else 1 nms_threshold = self.cfg.rpn_nms_threshold list_size = len(rpn_bbox_offsets_list) return_rois = [] for bid in range(batch_per_gpu): batch_proposals_list = [] batch_probs_list = [] batch_level_list = [] for l in range(list_size): # get proposals and probs offsets = rpn_bbox_offsets_list[l][bid].dimshuffle(2, 3, 0, 1).reshape(-1, 4) all_anchors = all_anchors_list[l] proposals = self.box_coder.decode(all_anchors, offsets) probs = rpn_cls_prob_list[l][bid, 1].dimshuffle(1, 2, 0).reshape(1, -1) # prev nms top n probs, order = F.argsort(probs, descending=True) num_proposals = F.minimum(probs.shapeof(1), prev_nms_top_n) probs = probs.reshape(-1)[:num_proposals] order = order.reshape(-1)[:num_proposals] proposals = proposals.ai[order, :] batch_proposals_list.append(proposals) batch_probs_list.append(probs) batch_level_list.append(mge.ones(probs.shapeof(0)) * l) proposals = F.concat(batch_proposals_list, axis=0) scores = F.concat(batch_probs_list, axis=0) level = F.concat(batch_level_list, axis=0) proposals = layers.get_clipped_box(proposals, im_info[bid, :]) # filter empty keep_mask = layers.filter_boxes(proposals) _, keep_inds = F.cond_take(keep_mask == 1, keep_mask) proposals = proposals.ai[keep_inds, :] scores = scores.ai[keep_inds] level = level.ai[keep_inds] # gather the proposals and probs # sort nms by scores scores, order = F.argsort(scores.reshape(1, -1), descending=True) order = order.reshape(-1) proposals = proposals.ai[order, :] level = level.ai[order] # apply total level nms rois = F.concat([proposals, scores.reshape(-1, 1)], axis=1) keep_inds = batched_nms(proposals, scores, level, nms_threshold, post_nms_top_n) rois = rois.ai[keep_inds] # rois shape (N, 5), info [batch_id, x1, y1, x2, y2] batch_inds = mge.ones((rois.shapeof(0), 1)) * bid batch_rois = F.concat([batch_inds, rois[:, :4]], axis=1) return_rois.append(batch_rois) return F.zero_grad(F.concat(return_rois, axis=0))
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] ctrness_list = [] all_level_anchors = F.concat(anchors_list, axis=0) for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] offsets = self.point_coder.encode( all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)) object_sizes_of_interest = F.concat([ F.broadcast_to( F.expand_dims(mge.tensor(size, dtype=np.float32), axis=0), (anchors_i.shape[0], 2)) for anchors_i, size in zip( anchors_list, self.cfg.object_sizes_of_interest) ], axis=0) max_offsets = F.max(offsets, axis=2) is_cared_in_the_level = ( (max_offsets >= F.expand_dims(object_sizes_of_interest[:, 0], axis=0)) & (max_offsets <= F.expand_dims(object_sizes_of_interest[:, 1], axis=0))) if self.cfg.center_sampling_radius > 0: gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2 is_in_boxes = [] for stride, anchors_i in zip(self.cfg.stride, anchors_list): radius = stride * self.cfg.center_sampling_radius center_boxes = F.concat([ F.maximum(gt_centers - radius, gt_boxes[:, :2]), F.minimum(gt_centers + radius, gt_boxes[:, 2:4]), ], axis=1) center_offsets = self.point_coder.encode( anchors_i, F.expand_dims(center_boxes, axis=1)) is_in_boxes.append(F.min(center_offsets, axis=2) > 0) is_in_boxes = F.concat(is_in_boxes, axis=1) else: is_in_boxes = F.min(offsets, axis=2) > 0 gt_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] - gt_boxes[:, 1]) # FIXME: use repeat instead of broadcast_to areas = F.broadcast_to(F.expand_dims(gt_area, axis=1), offsets.shape[:2]) areas[~is_cared_in_the_level] = float("inf") areas[~is_in_boxes] = float("inf") match_indices = F.argmin(areas, axis=0) gt_boxes_matched = gt_boxes[match_indices] anchor_min_area = F.indexing_one_hot(areas, match_indices, axis=0) labels = gt_boxes_matched[:, 4].astype(np.int32) labels[anchor_min_area == float("inf")] = 0 offsets = self.point_coder.encode(all_level_anchors, gt_boxes_matched[:, :4]) left_right = offsets[:, [0, 2]] top_bottom = offsets[:, [1, 3]] ctrness = F.sqrt( F.maximum( F.min(left_right, axis=1) / F.max(left_right, axis=1), 0) * F.maximum( F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), 0)) labels_list.append(labels) offsets_list.append(offsets) ctrness_list.append(ctrness) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), F.stack(ctrness_list, axis=0).detach(), )
def find_top_rpn_proposals(is_train, rpn_bbox_offsets_list, rpn_cls_prob_list, all_anchors_list, im_info): prev_nms_top_n = config.train_prev_nms_top_n \ if is_train else config.test_prev_nms_top_n post_nms_top_n = config.train_post_nms_top_n \ if is_train else config.test_post_nms_top_n batch_per_gpu = config.batch_per_gpu if is_train else 1 nms_threshold = config.rpn_nms_threshold box_min_size = config.rpn_min_box_size bbox_normalize_targets = config.rpn_bbox_normalize_targets bbox_normalize_means = config.bbox_normalize_means bbox_normalize_stds = config.bbox_normalize_stds list_size = len(rpn_bbox_offsets_list) return_rois, return_probs = [], [] batch_per_gpu = rpn_cls_prob_list[0].shape[0] for bid in range(batch_per_gpu): batch_proposals_list = [] batch_probs_list = [] for l in range(list_size): # get proposals and probs offsets = rpn_bbox_offsets_list[l][bid] \ .transpose(1, 2, 0).reshape(-1, 4) if bbox_normalize_targets: std_opr = tensor(config.bbox_normalize_stds[None, :]) mean_opr = tensor(config.bbox_normalize_means[None, :]) pred_offsets = pred_offsets * std_opr pred_offsets = pred_offsets + mean_opr all_anchors = all_anchors_list[l] proposals = bbox_transform_inv_opr(all_anchors, offsets) if config.anchor_within_border: proposals = clip_boxes_opr(proposals, im_info[bid, :]) probs = rpn_cls_prob_list[l][bid] \ .transpose(1,2,0).reshape(-1, 2) probs = F.softmax(probs)[:, 1] # gather the proposals and probs batch_proposals_list.append(proposals) batch_probs_list.append(probs) batch_proposals = F.concat(batch_proposals_list, axis=0) batch_probs = F.concat(batch_probs_list, axis=0) # filter the boxes with small size. wh = batch_proposals[:, 2:4] - batch_proposals[:, :2] + 1 thresh = box_min_size * im_info[bid, 2] keep_mask = F.prod((wh >= thresh), axis=1) keep_mask = keep_mask + F.equal(keep_mask.sum(), 0) keep_mask, inds = F.cond_take(keep_mask > 0, keep_mask) inds = inds.astype(np.int32) # batch_proposals = F.nn.indexing_one_hot(batch_proposals, inds, 0) # batch_probs = F.nn.indexing_one_hot(batch_probs, inds, 0) batch_proposals, batch_probs = batch_proposals[inds], batch_probs[inds] # prev_nms_top_n num_proposals = F.minimum(prev_nms_top_n, batch_proposals.shape[0]) idx = F.argsort(batch_probs, descending=True) topk_idx = idx[:num_proposals].reshape(-1) batch_proposals = batch_proposals[topk_idx].detach() batch_probs = batch_probs[topk_idx].detach() # For each image, run a total-level NMS, and choose topk results. keep_inds = nms(batch_proposals, batch_probs, nms_threshold, max_output=2000) # num = F.minimum(post_nms_top_n, keep_inds.shape[0]) # keep_inds = keep_inds[:num] batch_rois, batch_probs = batch_proposals[keep_inds], batch_probs[ keep_inds] # cons the rois batch_inds = F.ones((batch_rois.shape[0], 1)) * bid batch_rois = F.concat([batch_inds, batch_rois[:, :4]], axis=1) return_rois.append(batch_rois) return_probs.append(batch_probs) if batch_per_gpu == 1: return batch_rois, batch_probs else: concated_rois = F.concat(return_rois, axis=0) concated_probs = F.concat(return_probs, axis=0) return concated_rois, concated_probs
def fake_quant_tensor_gt(inp, scale, zero_point, qmin, qmax): oup = Round()(inp / scale) + zero_point oup = F.minimum(F.maximum(oup, qmin), qmax) oup = (oup - zero_point) * scale return oup
def find_top_rpn_proposals(is_train, rpn_bbox_offsets_list, rpn_cls_prob_list, all_anchors_list, im_info): prev_nms_top_n = config.train_prev_nms_top_n \ if is_train else config.test_prev_nms_top_n post_nms_top_n = config.train_post_nms_top_n \ if is_train else config.test_post_nms_top_n batch_per_gpu = config.batch_per_gpu if is_train else 1 nms_threshold = config.rpn_nms_threshold box_min_size = config.rpn_min_box_size bbox_normalize_targets = config.rpn_bbox_normalize_targets bbox_normalize_means = config.bbox_normalize_means bbox_normalize_stds = config.bbox_normalize_stds list_size = len(rpn_bbox_offsets_list) return_rois = [] return_probs = [] for bid in range(batch_per_gpu): batch_proposals_list = [] batch_probs_list = [] for l in range(list_size): # get proposals and probs offsets = rpn_bbox_offsets_list[l][bid] \ .dimshuffle(1, 2, 0).reshape(-1, 4) if bbox_normalize_targets: std_opr = tensor(config.bbox_normalize_stds[None, :]) mean_opr = tensor(config.bbox_normalize_means[None, :]) pred_offsets = pred_offsets * std_opr pred_offsets = pred_offsets + mean_opr all_anchors = all_anchors_list[l] proposals = bbox_transform_inv_opr(all_anchors, offsets) if config.anchor_within_border: proposals = clip_boxes_opr(proposals, im_info[bid, :]) probs = rpn_cls_prob_list[l][bid] \ .dimshuffle(1,2,0).reshape(-1, 2) probs = F.softmax(probs)[:, 1] # gather the proposals and probs batch_proposals_list.append(proposals) batch_probs_list.append(probs) batch_proposals = F.concat(batch_proposals_list, axis=0) batch_probs = F.concat(batch_probs_list, axis=0) # filter the zero boxes. batch_keep_mask = filter_boxes_opr(batch_proposals, box_min_size * im_info[bid, 2]) batch_probs = batch_probs * batch_keep_mask # prev_nms_top_n num_proposals = F.minimum(prev_nms_top_n, batch_probs.shapeof()[0]) batch_probs, idx = F.argsort(batch_probs, descending=True) batch_probs = batch_probs[:num_proposals].reshape(-1, 1) topk_idx = idx[:num_proposals].reshape(-1) batch_proposals = batch_proposals.ai[topk_idx] batch_rois = F.concat([batch_proposals, batch_probs], axis=1) # For each image, run a total-level NMS, and choose topk results. keep_inds = gpu_nms(batch_rois, nms_threshold, post_nms_top_n) batch_rois = batch_rois.ai[keep_inds] batch_probs = batch_rois[:, -1] # cons the rois batch_inds = mge.ones((batch_rois.shapeof()[0], 1)) * bid batch_rois = F.concat([batch_inds, batch_rois[:, :-1]], axis=1) return_rois.append(batch_rois) return_probs.append(batch_probs) if batch_per_gpu == 1: return batch_rois, batch_probs else: concated_rois = F.concat(return_rois, axis=0) concated_probs = F.concat(return_probs, axis=0) return concated_rois, concated_probs
def get_ground_truth(self, rpn_rois, im_info, gt_boxes): if not self.training: return rpn_rois, None, None return_rois = [] return_labels = [] return_bbox_targets = [] # get per image proposals and gt_boxes for bid in range(self.cfg.batch_per_gpu): num_valid_boxes = im_info[bid, 4] gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :] batch_inds = mge.ones((gt_boxes_per_img.shapeof(0), 1)) * bid # if config.proposal_append_gt: gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1) batch_roi_mask = rpn_rois[:, 0] == bid _, batch_roi_inds = F.cond_take(batch_roi_mask == 1, batch_roi_mask) # all_rois : [batch_id, x1, y1, x2, y2] all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois]) overlaps_normal, overlaps_ignore = layers.get_iou( all_rois[:, 1:5], gt_boxes_per_img, return_ignore=True, ) max_overlaps_normal = overlaps_normal.max(axis=1) gt_assignment_normal = F.argmax(overlaps_normal, axis=1) max_overlaps_ignore = overlaps_ignore.max(axis=1) gt_assignment_ignore = F.argmax(overlaps_ignore, axis=1) ignore_assign_mask = (max_overlaps_normal < self.cfg.fg_threshold) * ( max_overlaps_ignore > max_overlaps_normal ) max_overlaps = ( max_overlaps_normal * (1 - ignore_assign_mask) + max_overlaps_ignore * ignore_assign_mask ) gt_assignment = ( gt_assignment_normal * (1 - ignore_assign_mask) + gt_assignment_ignore * ignore_assign_mask ) gt_assignment = gt_assignment.astype("int32") labels = gt_boxes_per_img.ai[gt_assignment, 4] # ---------------- get the fg/bg labels for each roi ---------------# fg_mask = (max_overlaps >= self.cfg.fg_threshold) * ( labels != self.cfg.ignore_label ) bg_mask = (max_overlaps < self.cfg.bg_threshold_high) * ( max_overlaps >= self.cfg.bg_threshold_low ) num_fg_rois = self.cfg.num_rois * self.cfg.fg_ratio fg_inds_mask = self._bernoulli_sample_masks(fg_mask, num_fg_rois, 1) num_bg_rois = self.cfg.num_rois - fg_inds_mask.sum() bg_inds_mask = self._bernoulli_sample_masks(bg_mask, num_bg_rois, 1) labels = labels * fg_inds_mask keep_mask = fg_inds_mask + bg_inds_mask _, keep_inds = F.cond_take(keep_mask == 1, keep_mask) # Add next line to avoid memory exceed keep_inds = keep_inds[: F.minimum(self.cfg.num_rois, keep_inds.shapeof(0))] # labels labels = labels.ai[keep_inds].astype("int32") rois = all_rois.ai[keep_inds] target_boxes = gt_boxes_per_img.ai[gt_assignment.ai[keep_inds], :4] bbox_targets = self.box_coder.encode(rois[:, 1:5], target_boxes) bbox_targets = bbox_targets.reshape(-1, 4) return_rois.append(rois) return_labels.append(labels) return_bbox_targets.append(bbox_targets) return ( F.zero_grad(F.concat(return_rois, axis=0)), F.zero_grad(F.concat(return_labels, axis=0)), F.zero_grad(F.concat(return_bbox_targets, axis=0)), )
def forward(self, a): # add if self.mode == "add": x = a + mge.tensor(np.float32(10)) y = a + mge.tensor(self.data1) z = x + y # sub elif self.mode == "sub": x = a - mge.tensor(np.float32(10)) y = a - mge.tensor(self.data1) z = x - y # mul elif self.mode == "mul": x = a * mge.tensor(np.float32(10)) y = mge.tensor(self.data1) * a z = x * y # div elif self.mode == "max": x = a + mge.tensor(self.data) y = a + mge.tensor(self.data2) z = F.maximum(x, y) elif self.mode == "min": x = a + mge.tensor(self.data) y = a + mge.tensor(self.data2) z = F.minimum(x, y) elif self.mode == "pow": z = a**2 elif self.mode == "ceil": z = F.ceil(a) elif self.mode == "floor": z = F.floor(a) elif self.mode == "div": y = mge.tensor(self.data1) / a x = a / mge.tensor(np.float32(2)) z = y / x # cycle_div elif self.mode == "cycle_div": z = a / mge.tensor(self.data1) # abs elif self.mode == "abs": z = F.abs(a) # exp elif self.mode == "exp": z = F.exp(a) # log elif self.mode == "log": z = F.log(a) elif self.mode == "fuse_add_relu": y = a + mge.tensor(self.data2) z = F.relu(y) elif self.mode == "fuse_mul_add3": y = a * mge.tensor(self.data1) z = y + mge.tensor(self.data2) elif self.mode == "fuse_add_sigmoid": y = a + mge.tensor(self.data2) z = F.sigmoid(y) else: raise NotImplementedError('no such elemwise mode "%s"' % self.mode) return z