def __call__(self, bbox_head_out, rois, im_shape, scale_factor): bbox_pred, cls_prob = bbox_head_out roi, rois_num = rois origin_shape = paddle.floor(im_shape / scale_factor + 0.5) scale_list = [] origin_shape_list = [] for idx, roi_per_im in enumerate(roi): rois_num_per_im = rois_num[idx] expand_im_shape = paddle.expand(im_shape[idx, :], [rois_num_per_im, 2]) origin_shape_list.append(expand_im_shape) origin_shape = paddle.concat(origin_shape_list) # [N, C*4] bbox = paddle.concat(roi) bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var) scores = cls_prob[:, :-1] # [N*C, 4] bbox_num_class = bbox.shape[1] // 4 bbox = paddle.reshape(bbox, [-1, bbox_num_class, 4]) origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1) origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1) zeros = paddle.zeros_like(origin_h) x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros) bbox = paddle.stack([x1, y1, x2, y2], axis=-1) bboxes = (bbox, rois_num) return bboxes, scores
def get_bboxes_single(self, anchors, cls_scores_list, bbox_preds_list, im_shape, scale_factor, rescale=True): assert len(cls_scores_list) == len(bbox_preds_list) mlvl_bboxes = [] mlvl_scores = [] for anchor, cls_score, bbox_pred in zip(anchors, cls_scores_list, bbox_preds_list): cls_score = cls_score.reshape([-1, self.num_classes]) bbox_pred = bbox_pred.reshape([-1, 4]) if self.nms_pre is not None and cls_score.shape[0] > self.nms_pre: max_score = cls_score.max(axis=1) _, topk_inds = max_score.topk(self.nms_pre) bbox_pred = bbox_pred.gather(topk_inds) anchor = anchor.gather(topk_inds) cls_score = cls_score.gather(topk_inds) bbox_pred = delta2bbox(bbox_pred, anchor, self.weights).squeeze() mlvl_bboxes.append(bbox_pred) mlvl_scores.append(F.sigmoid(cls_score)) mlvl_bboxes = paddle.concat(mlvl_bboxes) mlvl_bboxes = paddle.squeeze(mlvl_bboxes) if rescale: mlvl_bboxes = mlvl_bboxes / paddle.concat( [scale_factor[::-1], scale_factor[::-1]]) mlvl_scores = paddle.concat(mlvl_scores) mlvl_scores = mlvl_scores.transpose([1, 0]) return mlvl_bboxes, mlvl_scores
def forward(self, features, bboxes, pro_features, pooler): """ :param bboxes: (N, nr_boxes, 4) :param pro_features: (N, nr_boxes, d_model) """ N, nr_boxes = bboxes.shape[:2] proposal_boxes = list() for b in range(N): proposal_boxes.append(bboxes[b]) roi_num = paddle.full([N], nr_boxes).astype("int32") roi_features = pooler(features, proposal_boxes, roi_num) roi_features = roi_features.reshape( [N * nr_boxes, self.d_model, -1]).transpose(perm=[2, 0, 1]) # self_att. pro_features = pro_features.reshape([N, nr_boxes, self.d_model]) pro_features2 = self.self_attn( pro_features, pro_features, value=pro_features) pro_features = pro_features.transpose(perm=[1, 0, 2]) + self.dropout1( pro_features2.transpose(perm=[1, 0, 2])) pro_features = self.norm1(pro_features) # inst_interact. pro_features = pro_features.reshape( [nr_boxes, N, self.d_model]).transpose(perm=[1, 0, 2]).reshape( [1, N * nr_boxes, self.d_model]) pro_features2 = self.inst_interact(pro_features, roi_features) pro_features = pro_features + self.dropout2(pro_features2) obj_features = self.norm2(pro_features) # obj_feature. obj_features2 = self.linear2( self.dropout(self.activation(self.linear1(obj_features)))) obj_features = obj_features + self.dropout3(obj_features2) obj_features = self.norm3(obj_features) fc_feature = obj_features.transpose(perm=[1, 0, 2]).reshape( [N * nr_boxes, -1]) cls_feature = fc_feature.clone() reg_feature = fc_feature.clone() for cls_layer in self.cls_module: cls_feature = cls_layer(cls_feature) for reg_layer in self.reg_module: reg_feature = reg_layer(reg_feature) class_logits = self.class_logits(cls_feature) bboxes_deltas = self.bboxes_delta(reg_feature) pred_bboxes = delta2bbox(bboxes_deltas, bboxes.reshape([-1, 4]), self.bbox_weights) return class_logits.reshape([N, nr_boxes, -1]), pred_bboxes.reshape( [N, nr_boxes, -1]), obj_features
def __call__(self, bbox_head_out, rois, im_shape, scale_factor): bbox_pred = bbox_head_out[0] cls_prob = bbox_head_out[1] roi = rois[0] rois_num = rois[1] origin_shape = paddle.floor(im_shape / scale_factor + 0.5) scale_list = [] origin_shape_list = [] for idx, roi_per_im in enumerate(roi): rois_num_per_im = rois_num[idx] expand_im_shape = paddle.expand(im_shape[idx, :], [rois_num_per_im, 2]) origin_shape_list.append(expand_im_shape) origin_shape = paddle.concat(origin_shape_list) # bbox_pred.shape: [N, C*4] # C=num_classes in faster/mask rcnn(bbox_head), C=1 in cascade rcnn(cascade_head) bbox = paddle.concat(roi) if bbox.shape[0] == 0: bbox = paddle.zeros([0, bbox_pred.shape[1]], dtype='float32') else: bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var) scores = cls_prob[:, :-1] # bbox.shape: [N, C, 4] # bbox.shape[1] must be equal to scores.shape[1] bbox_num_class = bbox.shape[1] if bbox_num_class == 1: bbox = paddle.tile(bbox, [1, self.num_classes, 1]) origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1) origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1) zeros = paddle.zeros_like(origin_h) x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros) y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros) x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros) y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros) bbox = paddle.stack([x1, y1, x2, y2], axis=-1) bboxes = (bbox, rois_num) return bboxes, scores