def anchor_iou_target_opr(self, boxes, im_info, all_anchors, rpn_bbox_offsets): n = rpn_bbox_offsets.shape[0] res = [] for i in range(n): gtboxes = boxes[i, :im_info[i, 5].astype(np.int32)] offsets = rpn_bbox_offsets[i].reshape(-1, 4).detach() m = offsets.shape[0] an, ac = all_anchors.shape[0], all_anchors.shape[1] anchors = F.broadcast_to(F.expand_dims(all_anchors, 1), (an, 1, ac)).reshape(-1, ac) dtboxes = bbox_transform_inv_opr(anchors[:, :4], offsets[:, :4]) overlaps = box_overlap_opr(dtboxes, gtboxes[:, :4]) ignore_mask = 1 - F.equal( gtboxes[:, 4], config.anchor_ignore_label).astype(np.float32) ignore_mask = F.expand_dims(ignore_mask, axis=0) overlaps = overlaps * ignore_mask index = F.argmax(overlaps, axis=1) value = F.nn.indexing_one_hot(overlaps, index, 1) value = F.expand_dims(F.expand_dims(value, axis=1), axis=0) res.append(value) result = F.concat(res, 0) return result
def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor: return F.stack([ F.expand_dims(anchors[:, 0], axis=1) - deltas[:, 0::4], F.expand_dims(anchors[:, 1], axis=1) - deltas[:, 1::4], F.expand_dims(anchors[:, 0], axis=1) + deltas[:, 2::4], F.expand_dims(anchors[:, 1], axis=1) + deltas[:, 3::4], ], axis=2).reshape(deltas.shape)
def generate_anchors_opr(self, fm_3x3, fm_stride, anchor_scales=(8, 16, 32, 64, 128), anchor_ratios=(1, 2, 3), base_size=4): np_anchors = generate_anchors(base_size=base_size, ratios=np.array(anchor_ratios), scales=np.array(anchor_scales)) device = fm_3x3.device anchors = mge.tensor(np_anchors).to(device) height, width = fm_3x3.shape[2], fm_3x3.shape[3] shift_x = F.linspace(0, width - 1, width).to(device) * fm_stride shift_y = F.linspace(0, height - 1, height).to(device) * fm_stride broad_shift_x = F.broadcast_to(shift_x.reshape(1, -1), (height, width)).flatten() broad_shift_y = F.broadcast_to(shift_y.reshape(-1, 1), (height, width)).flatten() shifts = F.stack( [broad_shift_x, broad_shift_y, broad_shift_x, broad_shift_y], axis=1) c = anchors.shape[1] all_anchors = F.expand_dims(anchors, axis=0) + F.expand_dims(shifts, axis=1) all_anchors = all_anchors.reshape(-1, c).detach() return all_anchors
def forward(self, inps): src = inps[0] weight = inps[1] try: bias = inps[2] except IndexError: bias = None if bias is not None: if bias.shape.ndim == 3: bias = F.expand_dims(bias, axis=0) elif bias.shape.ndim == 1: bias = F.expand_dims(bias, axis=[0, 2, 3]) else: raise Exception(f"Invalid Conv2d bias's shape {bias.shape}") if self.param["groups"] != 1: groups = self.param["groups"] IC = src.shape.numpy()[1] OC = weight.shape.numpy()[0] FH = weight.shape.numpy()[2] FW = weight.shape.numpy()[3] target_shape = [groups, int(OC / groups), int(IC / groups), FH, FW] weight = F.reshape(weight, target_shape) return F.conv2d( src, weight, bias, stride=self.param["stride"], padding=self.param["padding"], dilation=self.param["dilation"], groups=self.param["groups"], )
def mask_anchor_opr(gtboxes, im_info, anchors, labels): eps = 1e-6 gtboxes = gtboxes[:im_info[5].astype(np.int32), :] ignore_mask = (gtboxes[:, 4] < 0).astype(np.float32) mask_flag = F.zeros(labels.shape[0]) N, K = anchors.shape[0], gtboxes.shape[0] p_pred = F.broadcast_to(F.expand_dims(anchors, 1), (N, K, anchors.shape[1])) p_gt = F.broadcast_to(F.expand_dims(gtboxes, 0), (N, K, gtboxes.shape[1])) max_off = F.concat([ F.maximum(p_pred[:, :, :2], p_gt[:, :, :2]), F.minimum(p_pred[:, :, 2:4], p_gt[:, :, 2:4]) ], axis=2) I = F.maximum(max_off[:, :, 2] - max_off[:, :, 0] + 1, 0) * F.maximum( max_off[:, :, 3] - max_off[:, :, 1] + 1, 0) A = F.maximum(p_pred[:, :, 2] - p_pred[:, :, 0] + 1, 0) * F.maximum( p_pred[:, :, 3] - p_pred[:, :, 1] + 1, 0) # I = F.maximum(I, 0) # A = F.maximum(A, 0) IoA = I / (A + eps) IoA = IoA * F.expand_dims(ignore_mask, 0) mask_flag = (IoA > 0.5).sum(axis=1) > 0 labels = labels - F.equal(labels, 0).astype(np.float32) * mask_flag.astype( np.float32) return labels
def get_plane_anchors(self, anchor_scales: np.ndarray): """get anchors per location on feature map. The anchor number is anchor_scales x anchor_ratios """ base_anchor = Tensor([0, 0, self.base_size - 1, self.base_size - 1]) base_anchor = base_anchor.reshape(1, -1) w, h, x_ctr, y_ctr = self._whctrs(base_anchor) # ratio enumerate size = w * h size_ratios = size / self.anchor_ratios #pdb.set_trace() ws = F.sqrt(size_ratios) hs = ws * self.anchor_ratios # ws = size_ratios.sqrt().round() # hs = (ws * self.anchor_ratios).round() # scale enumerate anchor_scales = anchor_scales.reshape(1, -1).astype(np.float32) ws = F.expand_dims(ws, 1) hs = F.expand_dims(hs, 1) ws = (ws * anchor_scales).reshape(-1, 1) hs = (hs * anchor_scales).reshape(-1, 1) # make anchors anchors = F.concat( [ x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1), ], axis=1, ) return anchors.astype(np.float32)
def get_anchors_by_feature(self, featmap, stride): # shifts shape: [A, 4] shifts = self.get_center_offsets(featmap, stride) # plane_anchors shape: [B, 4], e.g. B=3 plane_anchors = self.get_plane_anchors(self.base_scale * stride) # all_anchors = shifts.repeat(1,3) + cell_anchors.flatten() all_anchors = F.expand_dims(plane_anchors, 0) + F.expand_dims( shifts, 1) all_anchors = all_anchors.reshape(-1, 4) return all_anchors
def generate_anchors_by_features(self, sizes, device): all_anchors = [] assert len(sizes) == self.num_features, ( "input features expected {}, got {}".format(self.num_features, len(sizes)) ) for size, stride, base_anchor in zip(sizes, self.strides, self.base_anchors): grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device) grids = F.stack([grid_x, grid_y, grid_x, grid_y], axis=1) all_anchors.append( (F.expand_dims(grids, axis=1) + F.expand_dims(base_anchor, axis=0)).reshape(-1, 4) ) return all_anchors
def box_overlap_ignore_opr(box: Tensor, gt: Tensor, ignore_label=-1) -> Tensor: """ Given two lists of boxes of size N and M, compute the IoU (intersection over union) between __all__ N x M pairs of boxes. The box order must be (xmin, ymin, xmax, ymax). Args: boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. Returns: Tensor: IoU, sized [N,M]. """ # box = boxes1 # gt = boxes2 # target_shape = (boxes1.shapeof()[0], boxes2.shapeof()[0], 4) eps = 1e-5 N, K = box.shape[0], gt.shape[0] b_box = F.broadcast_to(F.expand_dims(box, 1), (N, K, box.shape[1])) b_gt = F.broadcast_to(F.expand_dims(gt, 0), (N, K, gt.shape[1])) # b_box = F.add_axis(boxes1, 1).broadcast(*target_shape) # b_gt = F.add_axis(boxes2[:, :4], 0).broadcast(*target_shape) iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum( b_box[:, :, 0], b_gt[:, :, 0]) ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum( b_box[:, :, 1], b_gt[:, :, 1]) inter = F.maximum(iw, 0) * F.maximum(ih, 0) area_box = F.maximum(box[:, 2] - box[:, 0], 0) * F.maximum( box[:, 3] - box[:, 1], 0) area_gt = F.maximum(gt[:, 2] - gt[:, 0], 0) * F.maximum( gt[:, 3] - gt[:, 1], 0) # area_target_shape = (box.shapeof()[0], gt.shapeof()[0]) # b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape) # b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape) b_area_box = F.broadcast_to(F.expand_dims(area_box, 1), (N, K)) + eps b_area_gt = F.broadcast_to(F.expand_dims(area_gt, 0), (N, K)) union = b_area_box + b_area_gt - inter + eps overlaps_normal = F.maximum(inter / union, 0) overlaps_ignore = F.maximum(inter / b_area_box, 0) overlaps = F.maximum(inter / union, 0) # gt_ignore_mask = F.add_axis(F.equal(gt[:, 4], ignore_label), 0).broadcast(*area_target_shape) ignore_mask = F.equal(gt[:, 4], ignore_label) gt_ignore_mask = F.expand_dims(ignore_mask, 0) overlaps_normal *= (1 - gt_ignore_mask) overlaps_ignore *= gt_ignore_mask return overlaps_normal, overlaps_ignore
def box_overlap_opr(box: Tensor, gt: Tensor) -> Tensor: """ Given two lists of boxes of size N and M, compute the IoU (intersection over union) between __all__ N x M pairs of boxes. The box order must be (xmin, ymin, xmax, ymax). Args: boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. Returns: Tensor: IoU, sized [N,M]. """ # box = boxes1 # gt = boxes2 # target_shape = (boxes1.shape[0], boxes2.shape[0], 4) N, K = box.shape[0], gt.shape[0] b_box = F.broadcast_to(F.expand_dims(box, 1), (N, K, box.shape[1])) b_gt = F.broadcast_to(F.expand_dims(gt, 0), (N, K, gt.shape[1])) # b_gt = F.expand_dims(gt, 0).broadcast_to(N, K, gt.shape[1]) # b_box = F.expand_dims(boxes1, 1).broadcast(*target_shape) # b_gt = F.expand_dims(boxes2, 0).broadcast(*target_shape) iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum( b_box[:, :, 0], b_gt[:, :, 0]) ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum( b_box[:, :, 1], b_gt[:, :, 1]) inter = F.maximum(iw, 0) * F.maximum(ih, 0) area_box = F.maximum(box[:, 2] - box[:, 0], 0) * F.maximum( box[:, 3] - box[:, 1], 0) area_gt = F.maximum(gt[:, 2] - gt[:, 0], 0) * F.maximum( gt[:, 3] - gt[:, 1], 0) # area_target_shape = (box.shape[0], gt.shapeof()[0]) b_area_box = F.broadcast_to(F.expand_dims(area_box, 1), (N, K)) b_area_gt = F.broadcast_to(F.expand_dims(area_gt, 0), (N, K)) # b_area_box = F.expand_dims(area_box, 1).broadcast_to(N, K) # b_area_gt = F.expand_dims(area_gt, 0).broadcast_to(N, K) # b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape) # b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape) union = b_area_box + b_area_gt - inter overlaps = F.maximum(inter / union, 0) return overlaps
def inference(self, img): img_info = {"id": 0} if isinstance(img, str): img_info["file_name"] = os.path.basename(img) img = cv2.imread(img) if img is None: raise ValueError("test image path is invalid!") else: img_info["file_name"] = None height, width = img.shape[:2] img_info["height"] = height img_info["width"] = width img_info["raw_img"] = img img, ratio = preprocess(img, self.test_size, self.rgb_means, self.std) img_info["ratio"] = ratio img = F.expand_dims(mge.tensor(img), 0) t0 = time.time() outputs = self.model(img) outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) logger.info("Infer time: {:.4f}s".format(time.time() - t0)) return outputs, img_info
def _box_ltrb_to_cs_opr(bbox, addaxis=None): """ transform the left-top right-bottom encoding bounding boxes to center and size encodings""" bbox_width = bbox[:, 2] - bbox[:, 0] bbox_height = bbox[:, 3] - bbox[:, 1] bbox_ctr_x = bbox[:, 0] + 0.5 * bbox_width bbox_ctr_y = bbox[:, 1] + 0.5 * bbox_height if addaxis is None: return bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y else: return ( F.expand_dims(bbox_width, addaxis), F.expand_dims(bbox_height, addaxis), F.expand_dims(bbox_ctr_x, addaxis), F.expand_dims(bbox_ctr_y, addaxis), )
def test_expand_dims(): x = np.arange(6, dtype="float32").reshape(2, 3) xx = tensor(x) for axis in [2, -3, (3, -4), (1, -4)]: y = np.expand_dims(x, axis) yy = F.expand_dims(xx, axis) np.testing.assert_equal(y, yy.numpy())
def forward(self, x): B, C, _, _ = x.shape # avg_dims = tuple(range(2, len(x.shape))) # [2 ,3 ] nu2 = F.expand_dims(F.pow(x, 2).reshape(B, C, -1).mean(axis=-1, keepdims=True), axis=-1) # [B, C, 1, 1] x = x / F.sqrt(nu2 + F.abs(self.eps)) return F.maximum(self.gamma * x + self.beta, self.tau)
def forward(self, in_tensor): avg_pool = F.avg_pool2d(in_tensor, (in_tensor.shape[2], in_tensor.shape[3]), stride=(in_tensor.shape[2], in_tensor.shape[3])) x = self.gate_c(avg_pool) x = F.expand_dims(x, axis=[2, 3]) # b,48,1 x = F.broadcast_to(x, in_tensor.shape) # b,48,h,w return x
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:] fpn_fms.reverse() stride = [4, 8, 16, 32] poo5, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) poo5 = F.flatten(poo5, start_axis=1) fc1 = F.relu(self.fc1(poo5)) fc2 = F.relu(self.fc2(fc1)) cls_scores = self.cls(fc2) pred_boxes = self.bbox(fc2) # a = self.a(fc2) # b = self.b(fc2) # prob = F.stack([a, b], axis=1).reshape(-1, a.shape[1]) prob = F.concat([pred_boxes, cls_scores], axis=1) if self.training: # emd_loss = self.compute_gemini_loss(prob, bbox_targets, labels) bbox_targets, labels = bbox_targets.reshape(-1, 4), labels.flatten() cls_loss = softmax_loss(cls_scores, labels) pred_boxes = pred_boxes.reshape(-1, self.n, 4) bbox_loss = smooth_l1_loss_rcnn(pred_boxes, bbox_targets, labels, \ config.rcnn_smooth_l1_beta) loss_dict = {} loss_dict['cls_loss'] = cls_loss loss_dict['bbox_loss'] = bbox_loss return loss_dict else: offsets, cls_scores = prob[:, :-self.n], prob[:, -self.n:] pred_bbox = offsets.reshape(-1, self.n, 4) cls_prob = F.softmax(cls_scores, axis=1) n = rcnn_rois.shape[0] rois = F.broadcast_to(F.expand_dims(rcnn_rois[:, 1:5], axis=1), (n, 1, 4)).reshape(-1, 4) normalized = config.rcnn_bbox_normalize_targets pred_boxes = restore_bbox(rois, pred_bbox, normalized, config) pred_bbox = F.concat([pred_boxes, F.expand_dims(cls_prob, axis=2)], axis=2) return pred_bbox
def test_AxisAddRemove(): x_np = np.random.rand(1, 5).astype("float32") x = TensorWrapper(x_np) grad = Grad().wrt(x, callback=save_to(x)) y = F.squeeze(F.expand_dims(x, 2), 0) grad(y, F.ones_like(y)) np.testing.assert_equal(np.array([[1, 1, 1, 1, 1]], dtype=np.float32), x.grad.numpy())
def get_flow_mge(H_mat_mul, patch_indices, image_size_h=600, image_size_w=800): # (N, 6, 3, 3) batch_size = H_mat_mul.shape[0] divide = H_mat_mul.shape[1] H_mat_mul = mge.Tensor(H_mat_mul.reshape(batch_size, divide, 3, 3)) small_patch_sz = [image_size_h // divide, image_size_w] small = 1e-7 H_mat_pool = F.zeros((batch_size, image_size_h, image_size_w, 3, 3)) for i in range(divide): H_mat = H_mat_mul[:, i, :, :] if i == divide - 1: H_mat = F.broadcast_to(F.expand_dims(F.expand_dims(H_mat, 1), 1), (batch_size, image_size_h - i * small_patch_sz[0], image_size_w, 3, 3)) H_mat_pool[:, i * small_patch_sz[0]:, ...] = H_mat continue H_mat = F.broadcast_to(F.expand_dims(F.expand_dims( H_mat, 1), 1), (batch_size, small_patch_sz[0], image_size_w, 3, 3)) H_mat_pool[:, i * small_patch_sz[0]:(i + 1) * small_patch_sz[0], ...] = H_mat pred_I2_index_warp = F.expand_dims(patch_indices.transpose(0, 2, 3, 1), 4) pred_I2_index_warp = F.matmul(H_mat_pool, pred_I2_index_warp)[:, :, :, :, 0].transpose(0, 3, 1, 2) T_t = pred_I2_index_warp[:, 2:3, ...] smallers = 1e-6 T_t = T_t + smallers v1 = pred_I2_index_warp[:, 0:1, ...] v2 = pred_I2_index_warp[:, 1:2, ...] v1 = v1 / T_t v2 = v2 / T_t warp_index = F.concat((v1, v2), 1) vgrid = patch_indices[:, :2, ...] flow = warp_index - vgrid return flow
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:] fpn_fms.reverse() stride = [4, 8, 16, 32] poo5, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) poo5 = F.flatten(poo5, start_axis=1) fc1 = F.relu(self.fc1(poo5)) fc2 = F.relu(self.fc2(fc1)) a = self.a(fc2) b = self.b(fc2) prob = F.stack([a, b], axis=1).reshape(-1, a.shape[1]) if self.refinement: final_prob = self.refinement_module(prob, fc2) if self.training: emd_loss = self.compute_gemini_loss(prob, bbox_targets, labels) loss_dict = {} loss_dict['loss_rcnn_emd'] = emd_loss if self.refinement_module: final_emd_loss = self.compute_gemini_loss( final_prob, bbox_targets, labels) loss_dict['final_rcnn_emd'] = final_emd_loss return loss_dict else: offsets, cls_scores = prob[:, :-self.n], prob[:, -self.n:] pred_bbox = offsets.reshape(-1, self.n, 4) cls_prob = F.softmax(cls_scores, axis=1) n = rcnn_rois.shape[0] rois = F.broadcast_to(F.expand_dims(rcnn_rois[:, 1:5], axis=1), (n, 2, 4)).reshape(-1, 4) normalized = config.rcnn_bbox_normalize_targets pred_boxes = restore_bbox(rois, pred_bbox, normalized, config) pred_bbox = F.concat( [pred_boxes, F.expand_dims(cls_prob, axis=2)], axis=2) return pred_bbox
def test_expand_dims_for_scalar(): x = np.array(1, dtype="float32") xx = make_tensor(x, None) for axis in [0, -1, (0, 1), (-1, -2), (0, -1)]: y = np.expand_dims(x, axis) yy = F.expand_dims(xx, axis) np.testing.assert_equal(y, yy.numpy()) for axis in [1, -2, (1, 2), (-2, -3)]: np.testing.assert_raises(np.AxisError, np.expand_dims, x, axis) np.testing.assert_raises(RuntimeError, F.expand_dims, xx, axis)
def recover_pred_boxes(self, rcnn_rois, prob, nhead): n = prob.shape[0] prob = prob.reshape(n, nhead, -1) prob = prob.reshape(-1, prob.shape[2]) cls_score, bbox_pred = prob[:, -self.n:], prob[:, :-self.n] cls_prob = F.softmax(cls_score, axis=1) m, c = rcnn_rois.shape rois = F.broadcast_to(F.expand_dims(rcnn_rois, axis = 1), (m, nhead, c)).reshape(-1, c) bbox_pred = bbox_pred.reshape(n * nhead, -1, 4) pred_boxes = restore_bbox(rois[:, 1:5], bbox_pred, config = config) cls_prob = F.expand_dims(cls_prob, axis=2) pred_boxes = F.concat([pred_boxes, cls_prob], axis=2) n, c = bbox_pred.shape[:2] bid = F.broadcast_to(F.expand_dims(rois[:, :1], axis=1), (n, c, 1)) pred_boxes = F.concat([pred_boxes, bid], axis = 2) return pred_boxes.detach()
def refinement_module(self, prob, fc2): m = prob.reshape(-1, 5*self.n) offsets, scores = m[:, :-self.n], m[:, -self.n:] n = offsets.shape[0] offsets = offsets.reshape(-1, self.n, 4) cls_scores = F.expand_dims(F.softmax(scores, axis=1), axis=2) pred_boxes = F.concat([offsets, cls_scores], axis=2)[:, 1] n, c = pred_boxes.shape pred_boxes = F.broadcast_to(F.expand_dims(pred_boxes, axis=1), (n, 6, c)).reshape(n,-1) n, c = fc2.shape fc3 = F.broadcast_to(F.expand_dims(fc2, axis=1), (n, 2, c)).reshape(-1, c) fc3 = F.concat([fc3, pred_boxes], axis=1) fc3 = self.relu(self.fc3(fc3)) fc3 = fc3.reshape(n, 2, -1).transpose(1, 0, 2) a = self.q(fc3[0]) b = self.r(fc3[1]) prob = F.stack([a, b], axis=1).reshape(-1, 10*self.n) return prob
def test_expand_dims(is_varnode): if is_varnode: network = Network() else: network = None x = np.arange(6, dtype="float32").reshape(2, 3) xx = make_tensor(x, network) for axis in [2, -3, (3, -4), (1, -4)]: y = np.expand_dims(x, axis) yy = F.expand_dims(xx, axis) np.testing.assert_equal(y, yy.numpy())
def generate_anchors_by_features(self, sizes, device): all_anchors = [] assert len(sizes) == self.num_features, ( "input features expected {}, got {}".format(self.num_features, len(sizes)) ) for size, stride in zip(sizes, self.strides): grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device) grids = F.stack([grid_x, grid_y], axis=1) all_anchors.append( F.broadcast_to( F.expand_dims(grids, axis=1), (grids.shape[0], self.num_anchors, 2) ).reshape(-1, 2) ) # FIXME: need F.repeat return all_anchors
def get_iou(boxes1: Tensor, boxes2: Tensor, return_ioa=False) -> Tensor: """ Given two lists of boxes of size N and M, compute the IoU (intersection over union) between __all__ N x M pairs of boxes. The box order must be (xmin, ymin, xmax, ymax). Args: boxes1 (Tensor): boxes tensor with shape (N, 4) boxes2 (Tensor): boxes tensor with shape (M, 4) return_ioa (Bool): wheather return Intersection over Boxes1 or not, default: False Returns: iou (Tensor): IoU matrix, shape (N,M). """ b_box1 = F.expand_dims(boxes1, axis=1) b_box2 = F.expand_dims(boxes2, axis=0) iw = F.minimum(b_box1[:, :, 2], b_box2[:, :, 2]) - F.maximum( b_box1[:, :, 0], b_box2[:, :, 0] ) ih = F.minimum(b_box1[:, :, 3], b_box2[:, :, 3]) - F.maximum( b_box1[:, :, 1], b_box2[:, :, 1] ) inter = F.maximum(iw, 0) * F.maximum(ih, 0) area_box1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) area_box2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) union = F.expand_dims(area_box1, axis=1) + F.expand_dims(area_box2, axis=0) - inter overlaps = F.maximum(inter / union, 0) if return_ioa: ioa = F.maximum(inter / area_box1, 0) return overlaps, ioa return overlaps
def rpn_anchor_target_opr(gt_boxes, im_info, anchors): rpn_label_list, rpn_target_boxes_list, iou_thresh_list = [], [], [] for i in range(config.train_batch_per_gpu): rpn_labels, rpn_target_boxes, _ = _anchor_double_target( gt_boxes[i], im_info[i], anchors) rpn_labels = rpn_labels.reshape(-1, 2) c = rpn_target_boxes.shape[1] rpn_target_boxes = rpn_target_boxes.reshape(-1, 2, c) # mask the anchors overlapping with ignore regions ignore_label = mask_anchor_opr(gt_boxes[i], im_info[i], anchors, rpn_labels[:, 0]) rpn_labels = rpn_labels - F.equal(rpn_labels, 0).astype( np.float32) * F.expand_dims(ignore_label < 0, 1).astype(np.float32) # rpn_labels = rpn_labels - rpn_labels.eq(0).astype(np.float32) * (ignore_label < 0).unsqueeze(1).astype(np.float32) rpn_label_list.append(F.expand_dims(rpn_labels, 0)) rpn_target_boxes_list.append(F.expand_dims(rpn_target_boxes, 0)) rpn_labels = F.concat(rpn_label_list, axis=0) rpn_target_boxes = F.concat(rpn_target_boxes_list, axis=0) return rpn_labels, rpn_target_boxes
def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None): rcnn_rois, labels, bbox_targets = self.get_ground_truth( rcnn_rois, im_info, gt_boxes) fpn_fms = [fpn_fms[x] for x in self.in_features] pool_features = layers.roi_pool( fpn_fms, rcnn_rois, self.stride, self.pooling_size, self.pooling_method, ) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_logits = self.pred_cls(roi_feature) pred_offsets = self.pred_delta(roi_feature) if self.training: # loss for rcnn classification loss_rcnn_cls = F.loss.cross_entropy(pred_logits, labels, axis=1) # loss for rcnn regression pred_offsets = pred_offsets.reshape(-1, self.cfg.num_classes, 4) num_samples = labels.shape[0] fg_mask = labels > 0 loss_rcnn_bbox = layers.smooth_l1_loss( pred_offsets[fg_mask, labels[fg_mask] - 1], bbox_targets[fg_mask], self.cfg.rcnn_smooth_l1_beta, ).sum() / F.maximum(num_samples, 1) loss_dict = { "loss_rcnn_cls": loss_rcnn_cls, "loss_rcnn_bbox": loss_rcnn_bbox, } return loss_dict else: # slice 1 for removing background pred_scores = F.softmax(pred_logits, axis=1)[:, 1:] pred_offsets = pred_offsets.reshape(-1, 4) target_shape = (rcnn_rois.shape[0], self.cfg.num_classes, 4) # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4) base_rois = F.broadcast_to( F.expand_dims(rcnn_rois[:, 1:5], axis=1), target_shape).reshape(-1, 4) pred_bbox = self.box_coder.decode(base_rois, pred_offsets) return pred_bbox, pred_scores
def forward(self, input_ids, token_type_ids=None): seq_length = input_ids.shape[1] if token_type_ids is None: token_type_ids = F.zeros_like(input_ids) position_ids = F.linspace(0, seq_length - 1, seq_length).astype(np.int32) position_ids = F.broadcast_to(F.expand_dims(position_ids, 0), input_ids.shape) words_embeddings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings = words_embeddings + position_embeddings + token_type_embeddings embeddings = self.LayerNorm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def forward(self, pred_cls_list, rpn_num_prob_list, pred_reg_list, anchors_list, rpn_iou_list, boxes, im_info): all_anchors_list = [ F.concat([a, i * F.ones([a.shape[0], 1]).to(a.device)], axis=1) for i, a in enumerate(anchors_list) ] all_anchors_final = F.concat(all_anchors_list, axis=0) rpn_bbox_offset_final = F.concat(pred_reg_list, axis=1) rpn_cls_prob_final = F.concat(pred_cls_list, axis=1) rpn_iou_prob_final = F.concat(rpn_iou_list, axis=1) rpn_num_per_points_final = F.concat(rpn_num_prob_list, axis=1) rpn_labels, rpn_target_boxes = rpn_anchor_target_opr( boxes, im_info, all_anchors_final) ious_target = self.anchor_iou_target_opr(boxes, im_info, all_anchors_final, rpn_bbox_offset_final) n = rpn_labels.shape[0] target_boxes = rpn_target_boxes.reshape(n, -1, 2, 4).transpose(2, 0, 1, 3) rpn_cls_prob_final = rpn_cls_prob_final offsets_final = rpn_bbox_offset_final target_boxes = target_boxes[0] rpn_labels = rpn_labels.transpose(2, 0, 1) labels = rpn_labels[0] cls_loss = sigmoid_cross_entropy_retina(rpn_cls_prob_final, labels, alpha=config.focal_loss_alpha, gamma=config.focal_loss_gamma) rpn_bbox_loss = smooth_l1_loss_retina(offsets_final, target_boxes, labels) rpn_labels = F.expand_dims(labels, axis=2) rpn_iou_loss = iou_l1_loss(rpn_iou_prob_final, ious_target, rpn_labels) loss_dict = {} loss_dict['rpn_cls_loss'] = cls_loss loss_dict['rpn_bbox_loss'] = 2 * rpn_bbox_loss loss_dict['rpn_iou_loss'] = 2 * rpn_iou_loss return loss_dict
def restore_bbox(rois, deltas, unnormalize=True, config=None): assert deltas.ndim == 3 if unnormalize: std_opr = mge.tensor(config.bbox_normalize_stds.reshape(1, 1, -1)) mean_opr = mge.tensor(config.bbox_normalize_means.reshape(1, 1, -1)) deltas = deltas * std_opr deltas = deltas + mean_opr # n = deltas.shape[1] n, c = deltas.shape[0], deltas.shape[1] all_rois = F.broadcast_to(F.expand_dims(rois, 1), (n, c, rois.shape[1])).reshape( -1, rois.shape[1]) deltas = deltas.reshape(-1, deltas.shape[2]) pred_bbox = bbox_transform_inv_opr(all_rois, deltas) pred_bbox = pred_bbox.reshape(-1, c, pred_bbox.shape[1]) return pred_bbox