def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level): # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) num_images = proposals.shape[0] device = proposals.device # do not backprop throught objectness objectness = objectness.detach() objectness = objectness.reshape(num_images, -1) levels = [ torch.full((n, ), idx, dtype=torch.int64, device=device) for idx, n in enumerate(num_anchors_per_level) ] levels = torch.cat(levels, 0) levels = levels.reshape(1, -1).expand_as(objectness) # select top_n boxes independently per level before applying nms top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) image_range = torch.arange(num_images, device=device) batch_idx = image_range[:, None] objectness = objectness[batch_idx, top_n_idx] levels = levels[batch_idx, top_n_idx] proposals = proposals[batch_idx, top_n_idx] final_boxes = [] final_scores = [] for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, img_shape) keep = box_ops.remove_small_boxes(boxes, self.min_size) boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] # non-maximum suppression, independently done per level keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.post_nms_top_n()] boxes, scores = boxes[keep], scores[keep] final_boxes.append(boxes) final_scores.append(scores) return final_boxes, final_scores
def batched_nms( boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float ): """ Same as torchvision.ops.boxes.batched_nms, but safer. """ assert boxes.shape[-1] == 4 # TODO may need better strategy. # Investigate after having a fully-cuda NMS op. if len(boxes) < 40000: # fp16 does not have enough range for batched NMS return box_ops.batched_nms(boxes.float(), scores, idxs, iou_threshold) result_mask = scores.new_zeros(scores.size(), dtype=torch.bool) for id in torch.jit.annotate(List[int], torch.unique(idxs).cpu().tolist()): mask = (idxs == id).nonzero().view(-1) keep = nms(boxes[mask], scores[mask], iou_threshold) result_mask[mask[keep]] = True keep = result_mask.nonzero().view(-1) keep = keep[scores[keep].argsort(descending=True)] return keep
def postprocess(self, x, anchors, regression, classification): # modified from https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/blob/master/utils/utils.py transformed_anchors = self.regressBoxes(anchors, regression) transformed_anchors = self.clipBoxes(transformed_anchors, x) scores = torch.max(classification, dim=2, keepdim=True)[0] scores_over_thresh = (scores > self.nms_score_thresh)[:, :, 0] out = [] for i in range(x.shape[0]): if scores_over_thresh[i].sum() == 0: out.append({ 'boxes': torch.tensor(()), 'labels': torch.tensor(()), 'scores': torch.tensor(()), }) continue classification_per = classification[i, scores_over_thresh[i, :], ...].permute(1, 0) transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...] scores_per = scores[i, scores_over_thresh[i, :], ...] scores_, classes_ = classification_per.max(dim=0) anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=self.nms_iou_thresh) if anchors_nms_idx.shape[0] != 0: classes_ = classes_[anchors_nms_idx] + 1 # 0 is background and gets removed in metric, but is first class in model scores_ = scores_[anchors_nms_idx] boxes_ = transformed_anchors_per[anchors_nms_idx, :] out.append({ 'boxes': boxes_.cpu(), 'labels': classes_.cpu(), 'scores': scores_.cpu(), }) else: out.append({ 'boxes': torch.tensor(()), 'labels': torch.tensor(()), 'scores': torch.tensor(()), }) return out
def post_process(self, cls_predicts, box_predicts, valid_size): predicts = list() for cls, box, wh in zip(cls_predicts, box_predicts, valid_size): box[..., [0, 2]] = box[..., [0, 2]].clamp(min=0, max=wh[0]) box[..., [1, 3]] = box[..., [1, 3]].clamp(min=0, max=wh[1]) scores = cls.softmax(dim=-1) scores = scores[:, 1:] labels = torch.arange(scores.shape[-1], device=cls.device) labels = labels.view(1, -1).expand_as(scores) boxes = box.unsqueeze(1).repeat(1, scores.shape[-1], 1).reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) inds = torch.nonzero(scores > self.box_score_thresh, as_tuple=False).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] keep = ((boxes[..., 2] - boxes[..., 0]) > 1e-2) & ((boxes[..., 3] - boxes[..., 1]) > 1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] keep = batched_nms(boxes, scores, labels, self.box_nms_thresh) keep = keep[:self.box_detections_per_img] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] pred = torch.cat([boxes, scores[:, None], labels[:, None]], dim=-1) predicts.append(pred) return predicts
def __getitem__(self, idx): image, target, im_id = self.dataset[idx] boxes = torch.cat([target['boxes_h'], target['boxes_o']]) # Convert ground truth boxes to zero-based index and the # representation from pixel indices to coordinates boxes[:, :2] -= 1 labels = torch.cat( [49 * torch.ones_like(target['object']), target['object']]) # Remove overlapping ground truth boxes keep = batched_nms(boxes, torch.ones(len(boxes)), labels, iou_threshold=self.nms_thresh) boxes = boxes[keep] labels = labels[keep] # Convert HICODet object indices to COCO indices converted_labels = torch.as_tensor( [self.conversion[i.item()] for i in labels]) # Apply transform image, target = self.transforms( image, dict(boxes=boxes, labels=converted_labels)) return image, target, im_id
def postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold): transformed_anchors = regressBoxes(anchors, regression) transformed_anchors = clipBoxes(transformed_anchors, x) scores = torch.max(classification, dim=2, keepdim=True)[0] scores_over_thresh = (scores > threshold)[:, :, 0] out = [] for i in range(x.shape[0]): if scores_over_thresh[i].sum() == 0: out.append({ 'rois': np.array(()), 'class_ids': np.array(()), 'scores': np.array(()), }) continue classification_per = classification[i, scores_over_thresh[i, :], ...].permute(1, 0) transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...] scores_per = scores[i, scores_over_thresh[i, :], ...] scores_, classes_ = classification_per.max(dim=0) anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=iou_threshold) if anchors_nms_idx.shape[0] != 0: classes_ = classes_[anchors_nms_idx] scores_ = scores_[anchors_nms_idx] boxes_ = transformed_anchors_per[anchors_nms_idx, :] out.append({ 'rois': boxes_.cpu().numpy(), 'class_ids': classes_.cpu().numpy(), 'scores': scores_.cpu().numpy(), }) else: out.append({ 'rois': np.array(()), 'class_ids': np.array(()), 'scores': np.array(()), }) return out
def nms(rgb_info, thermal_info): # RGB boxes append thermal boxes # Order: len(RGB) | len(thermal) # Boxes boxes = rgb_info['bbox'].copy() boxes.extend(thermal_info['bbox']) boxes = torch.Tensor(boxes) # Scores scores = rgb_info['score'].copy() scores.extend(thermal_info['score']) scores = torch.Tensor(scores) # Classes classes = rgb_info['class'].copy() classes.extend(thermal_info['class']) classes = torch.Tensor(classes) # Perform nms iou_threshold = 0.7 keep_id = box_ops.batched_nms(boxes, scores, classes, iou_threshold) # Add to output out_boxes = Boxes(boxes[keep_id]) out_scores = torch.Tensor(scores[keep_id]) out_class = torch.Tensor(classes[keep_id]) return out_boxes, out_scores, out_class
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): # multiclass nms keep = batched_nms(boxlists[i].bbox, boxlists[i].get_field("scores"), boxlists[i].get_field("labels"), self.nms_thresh) result = boxlists[i][keep] number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def post_process(self, box_predicts, cls_predicts, shapes): ''' :param box_predicts(len=bs): list(box_predict) , box_predict=[n_p+n_n,4] 4==>x1,y1,x2,y2 :param cls_predicts(len=bs): list(cls_predict) , cls_predict=[n_p+n_n,num_cls] :param shapes(len=bs): list(shape) :return: ''' ret_dets = list() for box, cls, shape in zip(box_predicts, cls_predicts, shapes): score = cls.softmax(dim=-1) max_val, max_idx = score.max(dim=-1) thresh_mask = max_val > self.cfg['box_score_thresh'] positive_mask = max_idx > 0 # question: why >0? because 0th means background score valid_mask = thresh_mask & positive_mask if valid_mask.sum() == 0: ret_dets.append(None) continue nms_box = box[valid_mask] nms_scores = max_val[valid_mask] nms_label_idx = max_idx[valid_mask] - 1 idx = batched_nms(nms_box, nms_scores, nms_label_idx, self.cfg['box_nms_thresh']) valid_idx = idx[:self.cfg['box_detections_per_img']] detects = torch.cat([ nms_box[valid_idx], nms_scores[valid_idx].unsqueeze(-1), nms_label_idx[valid_idx].unsqueeze(-1) ], dim=-1) detects[..., [0, 2]] = detects[..., [0, 2]].clamp(min=0, max=shape[0]) detects[..., [1, 3]] = detects[..., [1, 3]].clamp(min=0, max=shape[1]) ret_dets.append(detects) return ret_dets
def forward(self, x): """Forward function for the BboxNMS layer :param x: tuple containing Custom SSD output data as well as the decoded boxes, scores and classes :type x: tuple(torch.Tensor, torch.Tensor, list, list, list) :return: tuple containing Custom SSD output data as well as the nms filtered boxes, scores and classes :rtype: tuple(torch.Tensor, torch.Tensor, list, list, list) """ total_nms_boxes = list() total_nms_scores = list() total_nms_classes = list() batch_encoded_cls, batch_encoded_reg, batch_boxes, batch_scores, batch_classes = x for scores, boxes, classes in zip(batch_scores, batch_boxes, batch_classes): boxes[:, 2] += boxes[:, 0] boxes[:, 3] += boxes[:, 1] chosen_ids = box_ops.batched_nms(boxes.float(), scores, classes, self.nms_thres) nms_scores = scores[chosen_ids] nms_boxes = boxes[chosen_ids] nms_boxes[:, 2] -= nms_boxes[:, 0] nms_boxes[:, 3] -= nms_boxes[:, 1] nms_classes = classes[chosen_ids] total_nms_boxes.append(nms_boxes) total_nms_scores.append(nms_scores) total_nms_classes.append(nms_classes) return batch_encoded_cls, batch_encoded_reg, total_nms_boxes, total_nms_scores, total_nms_classes
def postprocess_detections_spo(self, class_logits, # type: Tensor sbj_cls_scores, box_regression, # type: Tensor proposals, # type: List[Tensor] image_shapes # type: List[Tuple[int, int]] ): # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]] device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) sbj_cls_scores = F.softmax(sbj_cls_scores, -1) pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) sbj_cls_scores_list = sbj_cls_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] for boxes, scores, sbj_scores, image_shape in zip(pred_boxes_list, pred_scores_list, sbj_cls_scores_list, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] sbj_scores = sbj_scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) sbj_scores = sbj_scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, sbj_scores, labels = boxes[inds], scores[inds], sbj_scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, sbj_scores,labels = boxes[keep], scores[keep], sbj_scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, sbj_scores, labels = boxes[keep], scores[keep], sbj_scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(sbj_scores) all_labels.append(labels) return all_boxes, all_scores, all_labels
def generate_detections(cls_outputs, box_outputs, anchor_boxes, indices, classes, img_scale: Optional[torch.Tensor], img_size: Optional[torch.Tensor], max_det_per_image: int = MAX_DETECTIONS_PER_IMAGE, soft_nms: bool = False): """Generates detections with RetinaNet model outputs and anchors. Args: cls_outputs: a torch tensor with shape [N, 1], which has the highest class scores on all feature levels. The N is the number of selected top-K total anchors on all levels. (k being MAX_DETECTION_POINTS) box_outputs: a torch tensor with shape [N, 4], which stacks box regression outputs on all feature levels. The N is the number of selected top-k total anchors on all levels. (k being MAX_DETECTION_POINTS) anchor_boxes: a torch tensor with shape [N, 4], which stacks anchors on all feature levels. The N is the number of selected top-k total anchors on all levels. indices: a torch tensor with shape [N], which is the indices from top-k selection. classes: a torch tensor with shape [N], which represents the class prediction on all selected anchors from top-k selection. img_scale: a float tensor representing the scale between original image and input image for the detector. It is used to rescale detections for evaluating with the original groundtruth annotations. max_det_per_image: an int constant, added as argument to make torchscript happy Returns: detections: detection results in a tensor with shape [MAX_DETECTION_POINTS, 6], each row representing [x, y, width, height, score, class] """ assert box_outputs.shape[-1] == 4 assert anchor_boxes.shape[-1] == 4 assert cls_outputs.shape[-1] == 1 anchor_boxes = anchor_boxes[indices, :] # apply bounding box regression to anchors boxes = decode_box_outputs(box_outputs.float(), anchor_boxes, output_xyxy=True) if img_scale is not None and img_size is not None: boxes = clip_boxes_xyxy(boxes, img_size / img_scale) # clip before NMS better? scores = cls_outputs.sigmoid().squeeze(1).float() if soft_nms: top_detection_idx, soft_scores = batched_soft_nms(boxes, scores, classes, method_gaussian=True, iou_threshold=0.3, score_threshold=.001) scores[top_detection_idx] = soft_scores else: top_detection_idx = batched_nms(boxes, scores, classes, iou_threshold=0.5) # keep only topk scoring predictions top_detection_idx = top_detection_idx[:max_det_per_image] boxes = boxes[top_detection_idx] scores = scores[top_detection_idx, None] classes = classes[top_detection_idx, None] # xyxy to xywh & rescale to original image boxes[:, 2] -= boxes[:, 0] boxes[:, 3] -= boxes[:, 1] if img_scale is not None: boxes *= img_scale classes += 1 # back to class idx with background class = 0 # stack em and pad out to MAX_DETECTIONS_PER_IMAGE if necessary detections = torch.cat([boxes, scores, classes.float()], dim=1) if len(top_detection_idx) < max_det_per_image: detections = torch.cat([ detections, torch.zeros((max_det_per_image - len(top_detection_idx), 6), device=detections.device, dtype=detections.dtype) ], dim=0) return detections
def filter_proposals(self, proposals, objectness, num_anchors_per_layer, shapes): ''' :param proposals: shape=[bs,num_anchors,4] 4==>x1,y1,x2,y2 in input sizes :param objectness: shape=[bs,num_anchors,1] binary classification :param num_anchors_per_layer(list,len=fpn_layers): list(n) n=anchor_num of each featuremap :param shapes(list): len=bs :return: filtered_boxes(list, len=bs): list(boxes_one_image) boxes_one_image.shape=[n,4] filtered_scores(list, len=bs): list(scores_one_image) scores_one_image.shape=[n,1] ''' min_size = torch.tensor( self.cfg['min_size'], device=proposals.device) #define min proposal size pre_nms_top_n = self.cfg[ 'rpn_pre_nms_top_n_train'] if self.training else self.cfg[ 'rpn_pre_nms_top_n_test'] # 2000 post_nms_top_n = self.cfg[ 'rpn_post_nms_top_n_train'] if self.training else self.cfg[ 'rpn_post_nms_top_n_test'] # 1000 start_idx = 0 filtered_idx = list() levels = list() for ldx, layer_num in enumerate(num_anchors_per_layer): levels.append( torch.full(size=(layer_num, ), fill_value=ldx, dtype=torch.int64, device=proposals.device) ) # shape=[layer_num,], value=the i-th layer indices layer_objectness = objectness[:, start_idx:start_idx + layer_num, :] layer_top_n = min(layer_objectness.size(1), pre_nms_top_n) _, top_k_idx = layer_objectness.topk(dim=1, k=layer_top_n) filtered_idx.append(top_k_idx + start_idx) start_idx += layer_num levels = torch.cat(levels, dim=0).unsqueeze(0).repeat( proposals.size(0), 1) # shape=[bs,num_anchors] filtered_idx = torch.cat(filtered_idx, dim=1) # shape=[bs,sum_of_pre_topn,1] objectness = objectness.gather(dim=1, index=filtered_idx).squeeze(-1) proposals = proposals.gather(dim=1, index=filtered_idx.repeat(1, 1, 4)) levels = levels.gather(dim=1, index=filtered_idx[..., 0]) filtered_boxes = list() filtered_scores = list() # perform nms on each image, but do it on different fpn layer as lvl for box, scores, lvl, shape in zip(proposals, objectness, levels, shapes): # clip to img_size box[..., [0, 2]] = box[..., [0, 2]].clamp(min=0, max=shape[0]) box[..., [1, 3]] = box[..., [1, 3]].clamp(min=0, max=shape[1]) # remove small box dw = box[..., 2] - box[..., 0] dh = box[..., 3] - box[..., 1] keep = (dw > min_size) & (dh > min_size) box, scores, lvl = box[keep], scores[keep], lvl[keep] # perform nms on different layers by lvl keep = batched_nms(box, scores, lvl, self.cfg['rpn_nms_thresh']) keep = keep[:post_nms_top_n] box, scores = box[keep], scores[keep] # add it to list by bs filtered_boxes.append(box) filtered_scores.append(scores) # filtered_boxes = torch.stack(filtered_boxes, dim=0) # filtered_scores = torch.stack(filtered_scores, dim=0) return filtered_boxes, filtered_scores
def postprocess_hoi_flip(x, anchors, regression, obj_cls, act_cls, regressBoxes, clipBoxes, threshold, iou_threshold, mode="action", classwise=True): transformed_anchors = regressBoxes(anchors, regression) transformed_anchors = clipBoxes(transformed_anchors, x) if mode == "action": main_cls = act_cls # (bn, num_anchor, num_cls) other_cls = obj_cls # (bn, num_anchor, num_cls) else: main_cls = obj_cls other_cls = act_cls scores = torch.max(main_cls, dim=2, keepdim=True)[0] # (bn, num_anchor, 1) scores_over_thresh = (scores > threshold)[:, :, 0] # (bn, num_anchor) out = [] n = x.shape[0] // 2 for i in range(n): if scores_over_thresh.sum() == 0: out.append({ 'rois': np.array(()), # 'act_class_ids': np.array(()), 'act_scores': np.array(()), 'obj_class_ids': np.array(()), 'obj_scores': np.array(()) }) continue main_cls_per = torch.cat([ main_cls[i, scores_over_thresh[i, :], ...].permute(1, 0), main_cls[i + n, scores_over_thresh[i + n, :], ...].permute(1, 0) ], 1) # (num_cls, num_bbox) other_cls_per = torch.cat([ other_cls[i, scores_over_thresh[i, :], ...].permute(1, 0), other_cls[i + n, scores_over_thresh[i + n, :], ...].permute(1, 0) ], 1) # (num_cls, num_bbox) transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...] # (num_bbox, 4) transformed_anchors_per_flip = transformed_anchors[ i + n, scores_over_thresh[i + n, :], ...].clone() cols = x.shape[3] w = transformed_anchors_per_flip[:, 2] - transformed_anchors_per_flip[:, 0] transformed_anchors_per_flip[:, 2] = cols - transformed_anchors_per_flip[:, 0] transformed_anchors_per_flip[:, 0] = transformed_anchors_per_flip[:, 2] - w transformed_anchors_per = torch.cat( [transformed_anchors_per, transformed_anchors_per_flip], 0) scores_per = torch.cat([ scores[i, scores_over_thresh[i, :], ...], scores[i + n, scores_over_thresh[i + n, :], ...] ], 0) if classwise: scores_, classes_ = main_cls_per.max(dim=0) anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=iou_threshold) else: anchors_nms_idx = nms(transformed_anchors_per, scores_per[:, 0], iou_threshold=iou_threshold) if anchors_nms_idx.shape[0] != 0: main_scores_ = main_cls_per[:, anchors_nms_idx] # (num_cls, num_nms_bbox) # main_scores_, main_classes_ = main_cls_per[:, anchors_nms_idx] # (num_cls, num_nms_bbox) other_scores_ = other_cls_per[:, anchors_nms_idx] # (num_cls, num_nms_bbox) # other_scores_, other_classes_ = other_cls_per[:, anchors_nms_idx] # (num_cls, num_nms_bbox) boxes_ = transformed_anchors_per[ anchors_nms_idx, :] # (num_nms_bbox, 4) if mode == "action": # act_classes_ = main_classes_.permute(1, 0) # (num_nms_bbox, num_cls) act_scores_ = main_scores_.permute( 1, 0) # (num_nms_bbox, num_cls) # obj_classes_ = other_classes_.max(dim=0) # (num_nms_bbox) obj_scores_, obj_classes_ = other_scores_.max( dim=0) # (num_nms_bbox) # obj_classes_ = other_classes_.permute(1, 0) # (num_nms_bbox, num_cls) # obj_scores_ = other_scores_.permute(1, 0) # (num_nms_bbox, num_cls) else: # act_classes_ = other_classes_.permute(1, 0) act_scores_ = other_scores_.permute(1, 0) # obj_classes_ = main_classes_.max(dim=0) obj_scores_, obj_classes_ = main_scores_.max(dim=0) # obj_classes_ = main_classes_.permute(1, 0) # obj_scores_ = main_scores_.permute(1, 0) out.append({ 'rois': boxes_.cpu().numpy(), # 'act_class_ids': act_classes_.cpu().numpy(), 'act_scores': act_scores_.cpu().numpy(), 'obj_class_ids': obj_classes_.cpu().numpy(), 'obj_scores': obj_scores_.cpu().numpy() }) else: out.append({ 'rois': np.array(()), # 'act_class_ids': np.array(()), 'act_scores': np.array(()), 'obj_class_ids': np.array(()), 'obj_scores': np.array(()) }) return out
def decode_output_batch( self, boxes: Tensor, scores: Tensor, score_threhsold: float = 0.01, iou_threshold: float = 0.45, max_detections: int = 200, ) -> List[Tuple[Tensor, Tensor, Tensor]]: """ Decodes a batch detection model outputs from default box offsets and class scores to ltrb formatted bounding boxes, predicted labels, and scores for each image of the batch using non maximum suppression. :param boxes: Encoded default-box offsets. Expected shape: batch_size,4,num_default_boxes :param scores: Class scores for each image, class, box combination. Expected shape: batch_size,num_classes,num_default_boxes :param score_threhsold: minimum softmax score to be considered a positive prediction. Default is 0.01 following the SSD paper :param iou_threshold: The minimum IoU between two boxes to be considered the same object in non maximum suppression :param max_detections: the maximum number of detections to keep per image. Default is 200 :return: Detected object boudning boxes, predicted labels, and class score for each image in this batch """ if batched_nms is None: raise RuntimeError( "Unable to import batched_nms from torchvision.ops try upgrading your" " torch and torchvision versions") # Re-order so that dimensions are batch_size,num_default_boxes,{4,num_classes} boxes = boxes.permute(0, 2, 1) scores = scores.permute(0, 2, 1) # convert box offsets to bounding boxes and convert to ltrb form default_boxes = self._default_boxes.unsqueeze( 0) # extra dimension for math ops boxes[:, :, :2] = ( self.scale_xy * boxes[:, :, :2] * default_boxes[:, :, :2] + default_boxes[:, :, :2]) boxes[:, :, 2:] = (self._scale_wh * boxes[:, :, 2:]).exp() * default_boxes[:, :, 2:] _xywh_to_ltrb_batch(boxes) # take softmax of class scores scores = torch.nn.functional.softmax(scores, dim=-1) # class dimension # run non max suppression for each image in the batch and store outputs detection_outputs = [] for image_boxes, box_class_scores in zip(boxes.split(1, 0), scores.split(1, 0)): # strip batch dimension image_boxes = image_boxes.squeeze(0) box_class_scores = box_class_scores.squeeze(0) # get highest score per box and filter out background class box_class_scores[:, 0] = 0 box_scores, box_labels = box_class_scores.max(dim=1) # background_filter = torch.nonzero(box_labels, as_tuple=False).squeeze() background_filter = box_scores > score_threhsold image_boxes = image_boxes[background_filter] box_scores = box_scores[background_filter] box_labels = box_labels[background_filter] if image_boxes.dim() == 0: # nothing predicted, add empty result and continue detection_outputs.append( (torch.zeros(1, 4), torch.zeros(1), torch.zeros(1))) continue if image_boxes.dim() == 1: image_boxes = image_boxes.unsqueeze(0) box_scores = box_scores.unsqueeze(0) box_labels = box_labels.unsqueeze(0) # filter boxes, classes, and scores by nms results nms_filter = batched_nms(image_boxes, box_scores, box_labels, iou_threshold) if nms_filter.size(0) > max_detections: # update nms_filter to keep the boxes with top max_detections scores box_scores_nms = box_scores[nms_filter] sorted_scores_nms_idx = torch.argsort(box_scores_nms, descending=True) nms_filter = nms_filter[sorted_scores_nms_idx[:max_detections]] detection_outputs.append(( image_boxes[nms_filter], box_labels[nms_filter], box_scores[nms_filter], )) return detection_outputs
def postprocess_hoi(x, anchors, regression, obj_cls, act_cls, regressBoxes, clipBoxes, threshold, iou_threshold, mode="action", classwise=True): transformed_anchors = regressBoxes(anchors, regression) transformed_anchors = clipBoxes(transformed_anchors, x) if mode == "action": main_cls = act_cls # (bn, num_anchor, num_cls) other_cls = obj_cls # (bn, num_anchor, num_cls) else: main_cls = obj_cls other_cls = act_cls scores = torch.max(main_cls, dim=2, keepdim=True)[0] # (bn, num_anchor, 1) scores_over_thresh = (scores > threshold)[:, :, 0] # (bn, num_anchor) out = [] for i in range(x.shape[0]): if scores_over_thresh.sum() == 0: out.append({ 'rois': np.array(()), # 'act_class_ids': np.array(()), 'act_scores': np.array(()), 'obj_class_ids': np.array(()), 'obj_scores': np.array(()) }) continue main_cls_per = main_cls[i, scores_over_thresh[i, :], ...].permute(1, 0) # (num_cls, num_bbox) other_cls_per = other_cls[i, scores_over_thresh[i, :], ...].permute(1, 0) # (num_cls, num_bbox) transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...] # (num_bbox, 4) scores_per = scores[i, scores_over_thresh[i, :], ...] if classwise: scores_, classes_ = main_cls_per.max(dim=0) anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=iou_threshold) else: anchors_nms_idx = nms(transformed_anchors_per, scores_per[:, 0], iou_threshold=iou_threshold) if anchors_nms_idx.shape[0] != 0: main_scores_ = main_cls_per[:, anchors_nms_idx] # (num_cls, num_nms_bbox) # main_scores_, main_classes_ = main_cls_per[:, anchors_nms_idx] # (num_cls, num_nms_bbox) other_scores_ = other_cls_per[:, anchors_nms_idx] # (num_cls, num_nms_bbox) # other_scores_, other_classes_ = other_cls_per[:, anchors_nms_idx] # (num_cls, num_nms_bbox) boxes_ = transformed_anchors_per[ anchors_nms_idx, :] # (num_nms_bbox, 4) if mode == "action": # act_classes_ = main_classes_.permute(1, 0) # (num_nms_bbox, num_cls) act_scores_ = main_scores_.permute( 1, 0) # (num_nms_bbox, num_cls) # obj_classes_ = other_classes_.max(dim=0) # (num_nms_bbox) obj_scores_, obj_classes_ = other_scores_.max( dim=0) # (num_nms_bbox) # obj_classes_ = other_classes_.permute(1, 0) # (num_nms_bbox, num_cls) # obj_scores_ = other_scores_.permute(1, 0) # (num_nms_bbox, num_cls) else: # act_classes_ = other_classes_.permute(1, 0) act_scores_ = other_scores_.permute(1, 0) # obj_classes_ = main_classes_.max(dim=0) # arg_sort = torch.argsort(-1*main_scores_, 0) # for i in range(arg_sort.shape[1]): # print("object category") # for j in range(5): # print(obj_list[arg_sort[j, i]], main_scores_[arg_sort[j,i], i]) obj_scores_, obj_classes_ = main_scores_.max(dim=0) # obj_classes_ = main_classes_.permute(1, 0) # obj_scores_ = main_scores_.permute(1, 0) out.append({ 'rois': boxes_.cpu().numpy(), # 'act_class_ids': act_classes_.cpu().numpy(), 'act_scores': act_scores_.cpu().numpy(), 'obj_class_ids': obj_classes_.cpu().numpy(), 'obj_scores': obj_scores_.cpu().numpy() }) else: out.append({ 'rois': np.array(()), # 'act_class_ids': np.array(()), 'act_scores': np.array(()), 'obj_class_ids': np.array(()), 'obj_scores': np.array(()) }) return out
def batched_nms(boxes, n, threshold, mode='union'): """Applies NMS in a batched fashion, only comparing boxes from same item. NB: there is a (pretty sneaky) batched NMS function available at torchvision.ops.boxes.batched_nms However, since there are some differences between OP and torchvision NMS algorithms, and OP version loops in python anyway, we should use a simple version of our own Arguments --------- boxes : torch.Tensor size [num_boxes, 10] Each row is a single bounding box. Column 0 is batch index. Columns 1 - 4 are bounding box top left and bottom right coordinates. Column 5 is score for that box. Columns 6-10 are offset values. n : int number of items in a batch threshold : float IOU threshold for NMS mode : str 'union' | 'min' 'union': true IOU 'min': divide intersection by minimum of areas instead of union Returns ------ kept : torch.Tensor size [num_boxes, 10] Each row is a single bounding box. Column 0 is batch index. Columns 1 - 4 are bounding box top left and bottom right coordinates. Column 5 is score for that box. Columns 6-10 are offset values. """ kept = [] # For each batch item for bi in range(n): # Logical selector for batch item boxes selector = boxes[:, 0] == bi # Select boxes and scores for current item boxes_ = boxes[selector, 1:5] scores_ = boxes[selector, 5] keep = box_ops.batched_nms(boxes[:, 1:5], boxes[:, 5], selector, threshold) # Retain selected boxes for current item kept.append(boxes[selector, :][keep, :]) # Repack into original data format kept = torch.cat(kept, dim=0) return kept
def prepare_roi_batch_classifier(self, class_logits, box_regression, proposals, image_shapes): device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] #1. Reshape the box regression into num_predictions x num_classes (3) x 4 res_boxes = box_regression.view(boxes_per_image[0], num_classes, -1) # for the NMS and sorting, need to decode: decoded_boxes = self.box_coder.decode(res_boxes, proposals) scaled_boxes = box_ops.clip_boxes_to_image(decoded_boxes, image_shapes[0]) pred_scores = F.softmax(class_logits, -1) # 2. split reshaped boxes #res_boxes = res_boxes.split(boxes_per_image,0) #pred_scores = pred_scores.split(boxes_per_image, 0) all_scores = [] # 3. store reshaped boxes all_res_boxes = [] #all_rois_inds = [] all_areas = [] all_labels = [] # 3b: add scaled boxes for masks, all_scaled_boxes = [] #for res_box, scores, image_shape in zip(res_boxes, pred_scores, image_shapes): labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(pred_scores) res_boxes = res_boxes[:, 1:] scaled_boxes = scaled_boxes[:, 1:] # Alex: also, extract zero boxes pred_scores = pred_scores[:, 1:] pred_scores = pred_scores.flatten() res_boxes = res_boxes.reshape(-1, 4) scaled_boxes = scaled_boxes.reshape(-1, 4) labels = labels[:, 1:].flatten() # remove empty boxes area = (scaled_boxes[:, 2] - scaled_boxes[:, 0]) * (scaled_boxes[:, 3] - scaled_boxes[:, 1]) inds_area = torch.nonzero(area > 1e-5).squeeze(1) res_boxes, scaled_boxes, pred_scores, labels = res_boxes[ inds_area], scaled_boxes[inds_area], pred_scores[ inds_area], labels[inds_area] # Alex # for the S2 classifier: keep all boxes(score>-0.01) inds_classifier = torch.nonzero( pred_scores > self.score_thresh_classifier).squeeze(1) # non-maximum suppression, independently done per class # this returns the indices in the decreasing order of their confidence score keep = box_ops.batched_nms(scaled_boxes, pred_scores, labels, self.nms_thresh_classifier) # keep only topk scoring predictions keep = keep[:self.detections_per_img_s2new] # Alex: keep is a vector, keep.ndimension()==1 # if fewer than the RoI batch size, augment and keep the order! if keep.size().numel() < self.detections_per_img_s2new: keep_aug = torch.zeros(self.detections_per_img_s2new, dtype=torch.long) # get the indices to fill in with the values from the keep vector, make sure 0 is the first value, and add the last index=RoI detections_per_img inds_rand = torch.cat( (torch.tensor([0]), torch.randperm(self.detections_per_img_s2new - 2)[:keep.size().numel() - 1].sort().values + 1, torch.tensor([self.detections_per_img_s2new])), 0).unique() # keep_aug.index_copy_(0, inds_rand[:-1], keep) for idxs, posts in enumerate(inds_rand[:-1]): keep_aug[posts:inds_rand[idxs + 1]] = keep[idxs].expand( inds_rand[idxs + 1] - posts) keep = keep_aug # Alex # At this point the boxes and scores are sorted in the decreasing order # 05/12: add scaled boxes res_boxes, pred_scores, labels, scaled_boxes = res_boxes[ keep], pred_scores[keep], labels[keep], scaled_boxes[keep] # keep the lists all_res_boxes.append(res_boxes) all_scores.append(pred_scores) all_labels.append(labels) all_scaled_boxes.append(scaled_boxes) #all_rois_inds.append(all_rois_inds) return all_res_boxes, all_scores, all_labels, all_scaled_boxes
def detect_face(imgs, minsize, pnet, rnet, onet, threshold, factor, device): if isinstance(imgs, (np.ndarray, torch.Tensor)): imgs = torch.as_tensor(imgs, device=device) if len(imgs.shape) == 3: imgs = imgs.unsqueeze(0) else: if not isinstance(imgs, (list, tuple)): imgs = [imgs] if any(img.size != imgs[0].size for img in imgs): raise Exception( "MTCNN batch processing only compatible with equal-dimension images." ) imgs = np.stack([np.uint8(img) for img in imgs]) imgs = torch.as_tensor(imgs, device=device) model_dtype = next(pnet.parameters()).dtype imgs = imgs.permute(0, 3, 1, 2).type(model_dtype) batch_size = len(imgs) h, w = imgs.shape[2:4] m = 12.0 / minsize minl = min(h, w) minl = minl * m # Create scale pyramid scale_i = m scales = [] while minl >= 12: scales.append(scale_i) scale_i = scale_i * factor minl = minl * factor # First stage boxes = [] image_inds = [] all_inds = [] all_i = 0 for scale in scales: im_data = imresample(imgs, (int(h * scale + 1), int(w * scale + 1))) im_data = (im_data - 127.5) * 0.0078125 reg, probs = pnet(im_data) boxes_scale, image_inds_scale = generateBoundingBox( reg, probs[:, 1], scale, threshold[0]) boxes.append(boxes_scale) image_inds.append(image_inds_scale) all_inds.append(all_i + image_inds_scale) all_i += batch_size boxes = torch.cat(boxes, dim=0) image_inds = torch.cat(image_inds, dim=0).cpu() all_inds = torch.cat(all_inds, dim=0) # NMS within each scale + image pick = batched_nms(boxes[:, :4], boxes[:, 4], all_inds, 0.5) boxes, image_inds = boxes[pick], image_inds[pick] # NMS within each image pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) boxes, image_inds = boxes[pick], image_inds[pick] regw = boxes[:, 2] - boxes[:, 0] regh = boxes[:, 3] - boxes[:, 1] qq1 = boxes[:, 0] + boxes[:, 5] * regw qq2 = boxes[:, 1] + boxes[:, 6] * regh qq3 = boxes[:, 2] + boxes[:, 7] * regw qq4 = boxes[:, 3] + boxes[:, 8] * regh boxes = torch.stack([qq1, qq2, qq3, qq4, boxes[:, 4]]).permute(1, 0) boxes = rerec(boxes) y, ey, x, ex = pad(boxes, w, h) # Second stage if len(boxes) > 0: im_data = [] for k in range(len(y)): if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1): img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0) im_data.append(imresample(img_k, (24, 24))) im_data = torch.cat(im_data, dim=0) im_data = (im_data - 127.5) * 0.0078125 out = rnet(im_data) out0 = out[0].permute(1, 0) out1 = out[1].permute(1, 0) score = out1[1, :] ipass = score > threshold[1] boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1) image_inds = image_inds[ipass] mv = out0[:, ipass].permute(1, 0) # NMS within each image pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) boxes, image_inds, mv = boxes[pick], image_inds[pick], mv[pick] boxes = bbreg(boxes, mv) boxes = rerec(boxes) # Third stage points = torch.zeros(0, 5, 2, device=device) if len(boxes) > 0: y, ey, x, ex = pad(boxes, w, h) im_data = [] for k in range(len(y)): if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1): img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0) im_data.append(imresample(img_k, (48, 48))) im_data = torch.cat(im_data, dim=0) im_data = (im_data - 127.5) * 0.0078125 out = onet(im_data) out0 = out[0].permute(1, 0) out1 = out[1].permute(1, 0) out2 = out[2].permute(1, 0) score = out2[1, :] points = out1 ipass = score > threshold[2] points = points[:, ipass] boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1) image_inds = image_inds[ipass] mv = out0[:, ipass].permute(1, 0) w_i = boxes[:, 2] - boxes[:, 0] + 1 h_i = boxes[:, 3] - boxes[:, 1] + 1 points_x = w_i.repeat(5, 1) * points[:5, :] + boxes[:, 0].repeat(5, 1) - 1 points_y = h_i.repeat(5, 1) * points[5:10, :] + boxes[:, 1].repeat( 5, 1) - 1 points = torch.stack((points_x, points_y)).permute(2, 1, 0) boxes = bbreg(boxes, mv) # NMS within each image using "Min" strategy # pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) pick = batched_nms_numpy(boxes[:, :4], boxes[:, 4], image_inds, 0.7, 'Min') boxes, image_inds, points = boxes[pick], image_inds[pick], points[pick] boxes = boxes.cpu().numpy() points = points.cpu().numpy() batch_boxes = [] batch_points = [] for b_i in range(batch_size): b_i_inds = np.where(image_inds == b_i) batch_boxes.append(boxes[b_i_inds].copy()) batch_points.append(points[b_i_inds].copy()) batch_boxes, batch_points = np.array(batch_boxes), np.array(batch_points) return batch_boxes, batch_points
def getresult(img_path, outpath): NN_WEIGHT_FILE_PATH = 'dect/weight/efficient_rcnn_9.pth' VERSION_FAST = 49 NMS_PARAM = 0.35 CLASS_PROP_THR = 0.5 RUN_MODE = "NMS" #img_path = "../data/Images_test/test.jpg" imge = Image.open(img_path).convert('RGB') testtransform = Compose([ToTensor()]) img = testtransform(imge) model = get_model(VERSION_FAST) model.load_state_dict(torch.load(NN_WEIGHT_FILE_PATH)) model.eval() print("Run Mode = ", RUN_MODE) if "NMS" == RUN_MODE: start = time.time() print(img.size()) results = model([img]) open_cv_image = np.array(imge) open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR) boxes = [] for box, label, score, in zip(results[0]['boxes'], results[0]['labels'], results[0]["scores"]): boxes.append(box[:4].tolist() + [label] + [score]) # boxes = np.array(boxes) boxes = torch.as_tensor(boxes, dtype=torch.float32) if boxes.shape[0] != 0: # keep = py_cpu_nms(boxes, 0.35) keep = box_ops.batched_nms(boxes[:, :4], boxes[:, 5], boxes[:, 4], NMS_PARAM) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) boxes = boxes[keep, :] # count = 0 for box in boxes: if box[5] < CLASS_PROP_THR: continue box = box.tolist() score = float(box[5]) count += 1 label_id = int(box[4]) - 1 # label = CLASSES[label_id] label = 'Human' cv2.rectangle(open_cv_image, (int(box[0]), int( box[1]), int(box[2]) - int(box[0]), int(box[3]) - int(box[1])), (255, 225, 0), 2) cx = box[0] cy = box[1] + 12 cv2.putText(open_cv_image, "{}:{:.2f}".format(label, score), (int(cx), int(cy)), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 255, 0)) # cv2.imshow("sd", open_cv_image) # cv2.imwrite("result/{}".format(img_path.split("/")[-1]), open_cv_image) # cv2.imshow("sd", open_cv_image) cv2.imwrite(outpath, open_cv_image) # cv2.waitKey(30000) else: start = time.time() print("img.size = ", img.size()) results = model([img]) open_cv_image = np.array(imge) open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR) print for box in results[0]['boxes']: box = box[:4].tolist() cv2.rectangle(open_cv_image, (int(box[0]), int( box[1]), int(box[2]) - int(box[0]), int(box[3]) - int(box[1])), (255, 225, 0), 2) # cv2.imshow("sd", open_cv_image) cv2.imwrite(outpath, open_cv_image) # cv2.waitKey(30000) return count
def box_features_hook(self, module, input, output): ''' hook for extracting features from MaskRCNN ''' features, proposals, image_shapes, targets = input box_features = module.box_roi_pool(features, proposals, image_shapes) box_features = module.box_head(box_features) class_logits, box_regression = module.box_predictor(box_features) device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = module.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image pred_boxes = pred_boxes.split(boxes_per_image, 0) pred_scores = pred_scores.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] all_keeps = [] for boxes, scores, image_shape in zip(pred_boxes, pred_scores, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.flatten() labels = labels.flatten() # remove low scoring boxes inds = torch.nonzero(scores > module.score_thresh).squeeze(1) boxes, scores, labels = boxes[inds], scores[inds], labels[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels = boxes[keep], scores[keep], labels[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, module.nms_thresh) # keep only topk scoring predictions keep = keep[:self.mask_rcnn_top_k_boxes] boxes, scores, labels = boxes[keep], scores[keep], labels[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) all_keeps.append(keep) box_features_per_image = [] for keep in all_keeps: box_features_per_image.append(box_features[keep]) self.detection_box_features = box_features_per_image self.fpn_pooled_features = self.avg2dpool( features['pool']).squeeze(-1).squeeze(-1)
def generate_detections(cls_outputs, box_outputs, anchor_boxes, indices, classes, image_scale, nms_thres=0.5, max_dets=100): """Generates detections with RetinaNet model outputs and anchors. Args: cls_outputs: a torch tensor with shape [N, 1], which has the highest class scores on all feature levels. The N is the number of selected top-K total anchors on all levels. (k being MAX_DETECTION_POINTS) box_outputs: a torch tensor with shape [N, 4], which stacks box regression outputs on all feature levels. The N is the number of selected top-k total anchors on all levels. (k being MAX_DETECTION_POINTS) anchor_boxes: a torch tensor with shape [N, 4], which stacks anchors on all feature levels. The N is the number of selected top-k total anchors on all levels. indices: a torch tensor with shape [N], which is the indices from top-k selection. classes: a torch tensor with shape [N], which represents the class prediction on all selected anchors from top-k selection. image_scale: a float tensor representing the scale between original image and input image for the detector. It is used to rescale detections for evaluating with the original groundtruth annotations. Returns: detections: detection results in a tensor with shape [MAX_DETECTION_POINTS, 6], each row representing [x, y, width, height, score, class] """ anchor_boxes = anchor_boxes[indices, :] # apply bounding box regression to anchors boxes = decode_box_outputs(box_outputs.T.float(), anchor_boxes.T, output_xyxy=True) scores = cls_outputs.sigmoid().squeeze(1).float() human_idx = classes == 0 boxes = boxes[human_idx] scores = scores[human_idx] classes = classes[human_idx] top_detection_idx = batched_nms(boxes, scores, classes, iou_threshold=nms_thres) # keep only topk scoring predictions top_detection_idx = top_detection_idx[:max_dets] boxes = boxes[top_detection_idx] scores = scores[top_detection_idx, None] classes = classes[top_detection_idx, None] # xyxy to xywh & rescale to original image boxes[:, 2] -= boxes[:, 0] boxes[:, 3] -= boxes[:, 1] boxes *= image_scale classes += 1 # back to class idx with background class = 0 # stack em and pad out to MAX_DETECTIONS_PER_IMAGE if necessary detections = torch.cat([boxes, scores, classes.float()], dim=1) if len(top_detection_idx) < max_dets: detections = torch.cat([ detections, torch.zeros((max_dets - len(top_detection_idx), 6), device=detections.device, dtype=detections.dtype) ], dim=0) return detections
def ssm_postprocess_detections(self, class_logits, box_regression, proposals, image_shapes): device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image pred_boxes = pred_boxes.split(boxes_per_image, 0) pred_scores = pred_scores.split(boxes_per_image, 0) al_idx = 0 all_boxes = torch.empty([0, 4]).cuda() all_scores = torch.tensor([]).cuda() all_labels = [] CONF_THRESH = 0.5 # bigger leads more active learning samples for boxes, scores, image_shape in zip(pred_boxes, pred_scores, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] if torch.max(scores) < CONF_THRESH: # print(scores) al_idx = 1 continue for cls_ind in range(num_classes - 1): cls_boxes = boxes[:, cls_ind] cls_scores = scores[:, cls_ind] cls_labels = labels[:, cls_ind] # batch everything, by making every class prediction be a separate instance cls_boxes = cls_boxes.reshape(-1, 4) cls_scores = cls_scores.flatten() cls_labels = cls_labels.flatten() # remove low scoring boxes # non-maximum suppression, independently done per class # self.nms_thresh = 0.3 keep = box_ops.batched_nms(cls_boxes, cls_scores, cls_labels, 0.3) # keep only topk scoring predictions keep = keep[:self.detections_per_img] cls_boxes, cls_scores, cls_labels = cls_boxes[ keep], cls_scores[keep], cls_labels[keep] inds = torch.nonzero(cls_scores > self.score_thresh).squeeze(1) if len(inds) == 0: continue for j in inds: # boxes, scores, labels = boxes[inds], scores[inds], labels[inds] all_boxes = torch.cat( (all_boxes, cls_boxes[j].unsqueeze(0)), 0) k = keep[j] all_scores = torch.cat( (all_scores, scores[k].unsqueeze(0)), 0) all_labels.append(judge_y(scores[k])) # all_scores = [torch.cat(all_scores, 1)] return [all_boxes], [all_scores], [all_labels], al_idx
def postprocess_boxes( self, class_logits, box_regression, embeddings, proposals, image_shapes, fcs=None, gt_det=None, cws=True, ): """ Similar to RoIHeads.postprocess_detections, but can handle embeddings and implement First Classification Score (FCS). """ device = class_logits.device boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) if fcs is not None: # Fist Classification Score (FCS) pred_scores = fcs[0] else: pred_scores = torch.sigmoid(class_logits) if cws: # Confidence Weighted Similarity (CWS) embeddings = embeddings * pred_scores.view(-1, 1) # split boxes and scores per image pred_boxes = pred_boxes.split(boxes_per_image, 0) pred_scores = pred_scores.split(boxes_per_image, 0) pred_embeddings = embeddings.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] all_embeddings = [] for boxes, scores, embeddings, image_shape in zip( pred_boxes, pred_scores, pred_embeddings, image_shapes): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.ones(scores.size(0), device=device) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores.unsqueeze(1) labels = labels.unsqueeze(1) # batch everything, by making every class prediction be a separate instance boxes = boxes.reshape(-1, 4) scores = scores.flatten() labels = labels.flatten() embeddings = embeddings.reshape(-1, self.embedding_head.dim) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels, embeddings = ( boxes[inds], scores[inds], labels[inds], embeddings[inds], ) # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels, embeddings = ( boxes[keep], scores[keep], labels[keep], embeddings[keep], ) if gt_det is not None: # include GT into the detection results boxes = torch.cat((boxes, gt_det["boxes"]), dim=0) labels = torch.cat((labels, torch.tensor([1.0]).to(device)), dim=0) scores = torch.cat((scores, torch.tensor([1.0]).to(device)), dim=0) embeddings = torch.cat((embeddings, gt_det["embeddings"]), dim=0) # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels, embeddings = ( boxes[keep], scores[keep], labels[keep], embeddings[keep], ) all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) all_embeddings.append(embeddings) return all_boxes, all_scores, all_embeddings, all_labels
def compute_map(dataset, detection_dir, h_thresh, o_thresh, nms_thresh, max_human, max_object, human_idx=1, min_iou=0.5): num_pairs_object = torch.zeros(81) associate = BoxAssociation(min_iou=min_iou) meter = DetectionAPMeter(81, algorithm='INT', nproc=10) # Skip images without valid human-object pairs valid_idx = dataset._keep for i in tqdm(valid_idx): # Load annotation annotation = dataset.annotations[i] image_name = annotation.pop('file_name') target = to_tensor(annotation, input_format='dict') # Load detection detection_path = os.path.join(detection_dir, image_name.replace('jpg', 'json')) with open(detection_path, 'r') as f: detection = to_tensor(json.load(f), input_format='dict') boxes = detection['boxes'] labels = detection['labels'] scores = detection['scores'] # Filter out low scoring human boxes idx = torch.nonzero(labels == human_idx).squeeze(1) keep_idx = idx[torch.nonzero(scores[idx] >= h_thresh).squeeze(1)] # Filter out low scoring object boxes idx = torch.nonzero(labels != human_idx).squeeze(1) keep_idx = torch.cat( [keep_idx, idx[torch.nonzero(scores[idx] >= o_thresh).squeeze(1)]]) boxes = boxes[keep_idx].view(-1, 4) scores = scores[keep_idx].view(-1) labels = labels[keep_idx].view(-1) # Class-wise non-maximum suppression keep_idx = batched_nms(boxes, scores, labels, nms_thresh) boxes = boxes[keep_idx].view(-1, 4) scores = scores[keep_idx].view(-1) labels = labels[keep_idx].view(-1) sorted_idx = torch.argsort(scores, descending=True) boxes = boxes[sorted_idx] scores = scores[sorted_idx] labels = labels[sorted_idx] h_idx = torch.nonzero(labels == human_idx).squeeze(1) o_idx = torch.nonzero(labels != human_idx).squeeze(1) if len(h_idx) > max_human: h_idx = h_idx[:max_human] if len(o_idx) > max_object: o_idx = o_idx[:max_object] keep_idx = torch.cat([h_idx, o_idx]) boxes = boxes[keep_idx].view(-1, 4) scores = scores[keep_idx].view(-1) labels = labels[keep_idx].view(-1) # Format ground truth boxes gt_boxes = torch.cat([target['boxes_h'], target['boxes_o']]) gt_classes = torch.cat([ human_idx * torch.ones_like(target['objects']), target['objects'] ]) # Remove duplicates _, keep = np.unique(gt_boxes, return_index=True, axis=0) keep = torch.from_numpy(keep) gt_boxes = gt_boxes[keep] gt_classes = gt_classes[keep] # Update number of ground truth annotations for c in gt_classes: num_pairs_object[c] += 1 # Associate detections with ground truth binary_labels = torch.zeros_like(scores) unqiue_obj = labels.unique() for obj_idx in unqiue_obj: det_idx = torch.nonzero(labels == obj_idx).squeeze(1) gt_idx = torch.nonzero(gt_classes == obj_idx).squeeze(1) if len(gt_idx) == 0: continue binary_labels[det_idx] = associate(gt_boxes[gt_idx].view(-1, 4), boxes[det_idx].view(-1, 4), scores[det_idx].view(-1)) meter.append(scores, labels, binary_labels) meter.num_gt = num_pairs_object.tolist() ap = meter.eval() object_keep = dataset.present_objects ap_present = ap[object_keep] rec_present = meter.max_rec[object_keep] print("Mean average precision: {:.4f} |".format(ap_present.mean().item()), "Mean maximum recall: {:.4f}".format(rec_present.mean().item()))
def postprocess_dense_union(x, anchors, classification, sub_regression, obj_regression, regressBoxes, clipBoxes, threshold, iou_threshold=1, classwise=False): transformed_anchors = torch.zeros_like(anchors).cuda() transformed_anchors[:, :, 0] = anchors[:, :, 1] transformed_anchors[:, :, 1] = anchors[:, :, 0] transformed_anchors[:, :, 2] = anchors[:, :, 3] transformed_anchors[:, :, 3] = anchors[:, :, 2] transformed_anchors = clipBoxes(transformed_anchors, x) transformed_anchors_sub = regressBoxes(anchors, sub_regression) transformed_anchors_sub = clipBoxes(transformed_anchors_sub, x) transformed_anchors_obj = regressBoxes(anchors, obj_regression) transformed_anchors_obj = clipBoxes(transformed_anchors_obj, x) main_cls = classification # (bn, num_anchor, num_cls) # other_cls = obj_classication scores = torch.max(main_cls, dim=2, keepdim=True)[0] # (bn, num_anchor, 1) scores_over_thresh = (scores > threshold)[:, :, 0] # (bn, num_anchor) out = [] for i in range(x.shape[0]): if scores_over_thresh.sum() == 0: out.append({ 'rois': np.array(()), 'rois_sub': np.array(()), 'rois_obj': np.array(()), 'sp_vector': np.array(()), 'act_class_ids': np.array(()), 'act_scores': np.array(()), # 'obj_scores': np.array(()), }) continue main_cls_per = main_cls[i, scores_over_thresh[i, :], ...].permute(1, 0) # (num_cls, num_bbox) # other_cls_per = other_cls[i, scores_over_thresh[i, :], ...].permute(1, 0) # (num_cls, num_bbox) transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...] # (num_bbox, 4) transformed_anchors_sub_per = transformed_anchors_sub[ i, scores_over_thresh[i, :], ...] # (num_bbox, 4) transformed_anchors_obj_per = transformed_anchors_obj[ i, scores_over_thresh[i, :], ...] # (num_bbox, 4) scores_per = scores[i, scores_over_thresh[i, :], ...] if iou_threshold < 1: if classwise: scores_, classes_ = main_cls_per.max(dim=0) anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=iou_threshold) else: anchors_nms_idx = nms(transformed_anchors_per, scores_per[:, 0], iou_threshold=iou_threshold) else: anchors_nms_idx = np.arange(main_cls_per.shape[1]) if anchors_nms_idx.shape[0] > 0: main_scores_ = main_cls_per[:, anchors_nms_idx] # (num_cls, num_nms_bbox) # other_scores_ = other_cls_per[:, :] boxes_ = transformed_anchors_per[ anchors_nms_idx, :] # (num_nms_bbox, 4) boxes_sub_ = transformed_anchors_sub_per[ anchors_nms_idx, :] # (num_nms_bbox, 4) boxes_obj_ = transformed_anchors_obj_per[ anchors_nms_idx, :] # (num_nms_bbox, 4) sp_vector_x = (boxes_obj_[:, 0] + boxes_obj_[:, 2]) / 2 - ( boxes_sub_[:, 0] + boxes_sub_[:, 2]) / 2 sp_vector_y = (boxes_obj_[:, 1] + boxes_obj_[:, 3]) / 2 - ( boxes_sub_[:, 1] + boxes_sub_[:, 3]) / 2 sp_vector_x = sp_vector_x.reshape(-1, 1) sp_vector_y = sp_vector_y.reshape(-1, 1) sp_vector = torch.cat([sp_vector_x, sp_vector_y], 1) act_scores_ = main_scores_.permute(1, 0) # (num_nms_bbox, num_cls) act_classes_ = main_scores_.max(dim=0)[1] # obj_scores_ = other_scores_.permute(1, 0) # out.append({ 'rois': boxes_.cpu().numpy(), 'rois_sub': boxes_sub_.cpu().numpy(), 'rois_obj': boxes_obj_.cpu().numpy(), 'sp_vector': sp_vector.cpu().numpy(), 'act_class_ids': act_classes_.cpu().numpy(), 'act_scores': act_scores_.cpu().numpy(), # 'obj_scores': obj_scores_.cpu().numpy() }) else: out.append({ 'rois': np.array(()), 'rois_sub': np.array(()), 'rois_obj': np.array(()), 'sp_vector': np.array(()), 'act_class_ids': np.array(()), 'act_scores': np.array(()), # 'obj_scores': np.array(()) }) return out
def postprocess_detections(self, class_logits, box_regression, proposals, image_shapes, box_features): # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]]) device = class_logits.device num_classes = class_logits.shape[-1] boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals] pred_boxes = self.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # split boxes and scores per image if len(boxes_per_image) == 1: # TODO : remove this when ONNX support dynamic split sizes # and just assign to pred_boxes instead of pred_boxes_list pred_boxes_list = [pred_boxes] pred_scores_list = [pred_scores] pred_embeddings_list = [box_features] else: pred_boxes_list = pred_boxes.split(boxes_per_image, 0) pred_scores_list = pred_scores.split(boxes_per_image, 0) pred_embeddings_list = box_features.split(boxes_per_image, 0) all_boxes = [] all_scores = [] all_labels = [] all_embeddings = [] for boxes, scores, image_shape, embeddings in zip( pred_boxes_list, pred_scores_list, image_shapes, pred_embeddings_list): boxes = box_ops.clip_boxes_to_image(boxes, image_shape) # create labels for each prediction labels = torch.arange(num_classes, device=device) labels = labels.view(1, -1).expand_as(scores) # remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] labels = labels[:, 1:] # batch everything, by making every class prediction be a separate instance embeddings = torch.repeat_interleave(embeddings, scores.size(1), 0) boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) labels = labels.reshape(-1) # remove low scoring boxes inds = torch.nonzero(scores > self.score_thresh).squeeze(1) boxes, scores, labels, embeddings = boxes[inds], scores[ inds], labels[inds], embeddings[inds] # remove empty boxes keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) boxes, scores, labels, embeddings = boxes[keep], scores[ keep], labels[keep], embeddings[keep] # non-maximum suppression, independently done per class keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) # keep only topk scoring predictions keep = keep[:self.detections_per_img] boxes, scores, labels, embeddings = boxes[keep], scores[ keep], labels[keep], embeddings[keep] all_boxes.append(boxes) all_scores.append(scores) all_labels.append(labels) all_embeddings.append(embeddings) return all_boxes, all_scores, all_labels, all_embeddings
def postprocess_detections(self, head_outputs, anchors, image_shapes): # type: (Dict[str, List[Tensor]], List[List[Tensor]], List[Tuple[int, int]]) -> List[Dict[str, Tensor]] class_logits = head_outputs['cls_logits'] box_regression = head_outputs['bbox_regression'] num_images = len(image_shapes) detections: List[Dict[str, Tensor]] = [] for index in range(num_images): box_regression_per_image = [br[index] for br in box_regression] logits_per_image = [cl[index] for cl in class_logits] anchors_per_image, image_shape = anchors[index], image_shapes[ index] image_boxes = [] image_scores = [] image_labels = [] for box_regression_per_level, logits_per_level, anchors_per_level in \ zip(box_regression_per_image, logits_per_image, anchors_per_image): num_classes = logits_per_level.shape[-1] # remove low scoring boxes scores_per_level = torch.sigmoid(logits_per_level).flatten() keep_idxs = scores_per_level > self.score_thresh scores_per_level = scores_per_level[keep_idxs] topk_idxs = torch.where(keep_idxs)[0] # keep only topk scoring predictions num_topk = min(self.topk_candidates, topk_idxs.size(0)) scores_per_level, idxs = scores_per_level.topk(num_topk) topk_idxs = topk_idxs[idxs] anchor_idxs = torch.div(topk_idxs, num_classes, rounding_mode='floor') labels_per_level = topk_idxs % num_classes boxes_per_level = self.box_coder.decode_single( box_regression_per_level[anchor_idxs], anchors_per_level[anchor_idxs]) boxes_per_level = box_ops.clip_boxes_to_image( boxes_per_level, image_shape) image_boxes.append(boxes_per_level) image_scores.append(scores_per_level) image_labels.append(labels_per_level) image_boxes = torch.cat(image_boxes, dim=0) image_scores = torch.cat(image_scores, dim=0) image_labels = torch.cat(image_labels, dim=0) # non-maximum suppression keep = box_ops.batched_nms(image_boxes, image_scores, image_labels, self.nms_thresh) keep = keep[:self.detections_per_img] detections.append({ 'boxes': image_boxes[keep], 'scores': image_scores[keep], 'labels': image_labels[keep], }) return detections
def post_process(self, cls_logits: torch.Tensor, reg_deltas: torch.Tensor, batched_rois: List[torch.Tensor]): nms_threshold = self._params['nms_threshold'] conf_threshold = self._params['conf_threshold'] keep_top_n = self._params['keep_top_n'] batched_dets: List[torch.Tensor] = [] current = 0 for rois in batched_rois: N = rois.size(0) if N == 0: print("warning! found empty rois") batched_dets.append( torch.empty(0, 6, dtype=reg_deltas.dtype, device=reg_deltas.device)) continue logits = cls_logits[current:current + N] offsets = reg_deltas[current:current + N] current += N # logits: torch.Tensor(N,) # deltas: torch.Tensor(N,4) # rois: torch.Tensor(N,4) scores = torch.sigmoid(logits) preds = torch.zeros(scores.shape, dtype=torch.int64, device=scores.device) preds[scores >= 0.5] = 1 fg_preds_mask = preds != 0 # convert offsets to boxes # N,4 | N,4 => N,4 as xmin,ymin,xmax,ymax boxes = offsets2boxes(offsets.unsqueeze(0), rois).squeeze(0) # extract bg predictions boxes = boxes[fg_preds_mask] preds = preds[fg_preds_mask] scores = scores[fg_preds_mask] # apply conf threshold keep = scores >= conf_threshold scores, preds, boxes = scores[keep], preds[keep], boxes[keep] # remove small keep = box_ops.remove_small_boxes(boxes, 1e-3) # TODO try 1 scores, preds, boxes = scores[keep], preds[keep], boxes[keep] # batched nms keep = box_ops.batched_nms(boxes, scores, preds, nms_threshold) scores, preds, boxes = scores[keep], preds[keep], boxes[keep] # select top n keep_n = min(keep_top_n, scores.size(0)) _, selected_ids = scores.topk(keep_n) scores, preds, boxes = scores[selected_ids], preds[ selected_ids], boxes[selected_ids] scores.unsqueeze_(1) preds = preds.unsqueeze(1).to(boxes.dtype) dets = torch.cat([boxes, scores, preds], dim=-1) batched_dets.append(dets) return batched_dets
def nms_with_class(boxes, scores, idxs, iou_threshold): return batched_nms(boxes, scores, idxs, iou_threshold)