def generate_hn_pair(self, src_gt: BoxList, proposal: BoxList, src_h=None, proposal_h=None): """ Generate hard negative pair by sampling non-negative proposals """ proposal_ids = proposal.get_field('ids') src_id = src_gt.get_field('ids') scales = torch.ones_like(proposal_ids) if (src_h is not None) and (proposal_h is not None): scales = src_h / proposal_h # sample proposals with similar scales # and non-negative proposals hard_bb_idxs = ((proposal_ids >= 0) & (proposal_ids != src_id)) scale_idxs = (scales >= 0.5) & (scales <= 2) indices = (hard_bb_idxs & scale_idxs) unique_ids = torch.unique(proposal_ids[indices]) idxs = indices.nonzero() # avoid sampling redundant samples num_hn = min(idxs.numel(), unique_ids.numel()) sampled_hn_boxes = self.sample_boxlist(proposal, idxs, num_hn) return sampled_hn_boxes
def update(self, detections: BoxList, time: float): self.last_update = time assert detections.has_field('index') and detections.mode == 'xyxy' for i, ind in enumerate(detections.get_field('index')): ind = int(ind) box, label, mask, score = detections.bbox[i], detections.get_field('labels')[i], \ detections.get_field('mask')[i], detections.get_field('scores')[i] location = np.asarray([(box[0] + box[2]) / 2, box[-1]]).round().astype(np.int) # assumed car position region_code = self._object_region(location) region = CODE_TO_REGION[region_code] # position at the moment if ind in self.instances: self.instances[ind]['regions'].append(region) self.instances[ind]['labels'].append(int(label)) self.instances[ind]['scores'].append(float(score)) self.instances[ind]['locations'].append(location) self.instances[ind]['box'].append(box) self.instances[ind]['mask'].append(mask) self.instances[ind]['lost'] = self.last_update else: self.instances[ind] = { "regions": [region], "labels": [int(label)], "scores": [float(score)], "locations": [location], "box": [box], "mask": [mask], "appeared": self.last_update, "lost": self.last_update, }
def frame_vis_generator(self, frame, results: BoxList): frame, results = self.normalize_output(frame, results) ids = results.get_field('ids') results = results[ids >= 0] results = results.convert('xyxy') bbox = results.bbox.detach().cpu().numpy() ids = results.get_field('ids').tolist() labels = results.get_field('labels').tolist() for i, entity_id in enumerate(ids): color = self.colors[entity_id % self.num_colors] class_name = self.class_names[labels[i] - 1] text_width = len(class_name) * 20 x1, y1, x2, y2 = (np.round(bbox[i, :])).astype(np.int) cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness=3) cv2.putText(frame, str(entity_id), (x1 + 5, y1 + 40), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, thickness=3) # Draw black background rectangle for test cv2.rectangle(frame, (x1 - 5, y1 - 25), (x1 + text_width, y1), color, -1) cv2.putText(frame, '{}'.format(class_name), (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), thickness=2) return frame
def generate_pos(self, src_gt: BoxList, proposal: BoxList): assert (src_gt.mode == 'xyxy' and len(src_gt) == 1) proposal_ids = proposal.get_field('ids') src_id = src_gt.get_field('ids') pos_indices = (proposal_ids == src_id) pos_boxes = proposal[pos_indices] pos_boxes = pos_boxes.copy_with_fields(('ids', 'labels')) return pos_boxes
def filter_results_nms_on_max(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist # cpu version is faster than gpu. revert it to gpu only by verifying boxlist = boxlist.to('cpu') boxes = boxlist.bbox.reshape(-1, 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) result = [] max_scores, _ = scores[:, self.cls_start_idx:].max(dim=1, keepdim=False) keep = (max_scores > self.score_thresh).nonzero().squeeze(-1) if len(keep) == 0: return self.prepare_empty_boxlist(boxlist) boxes, scores, max_scores = boxes[keep], scores[keep], max_scores[keep] boxlist = BoxList(boxes, boxlist.size, mode=boxlist.mode) boxlist.add_field("scores", max_scores) boxlist.add_field('original_scores', scores) boxlist = self.nms_func(boxlist) scores = boxlist.get_field('original_scores') all_idxrow_idxcls = (scores[:, self.cls_start_idx:] > self.score_thresh).nonzero() all_idxrow_idxcls[:, 1] += self.cls_start_idx boxes = boxlist.bbox boxes = boxes[all_idxrow_idxcls[:, 0]] if boxes.dim() == 1: boxes = boxes[None, :] labels = all_idxrow_idxcls[:, 1] scores = scores[all_idxrow_idxcls[:, 0], all_idxrow_idxcls[:, 1]] result = BoxList(boxes, boxlist.size, mode=boxlist.mode) result.add_field("labels", labels) result.add_field("scores", scores) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): scores = boxlists[i].get_field("scores") coeffs = boxlists[i].get_field("coeffs") boxes = boxlists[i].bbox boxlist = boxlists[i] if cfg.MODEL.YOLACT.USE_FAST_NMS: scores, boxes, coeffs, labels = self.fast_nms(scores, boxes, coeffs) result = BoxList(boxes, boxlist.size, mode="xyxy") result.add_field("scores", scores) result.add_field("coeffs", coeffs) result.add_field("labels", labels) else: labels = boxlists[i].get_field("labels") result = [] # skip the background for j in range(1, self.num_classes): inds = (labels == j).nonzero().squeeze(1) # if inds.numel() == 0: # continue scores_j = scores[inds] coeffs_j = coeffs[inds, :] boxes_j = boxes[inds, :] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class.add_field("coeffs", coeffs_j) # per class nms boxlist_for_class = boxlist_nms( boxlist_for_class, self.nms_thresh, score_field="scores" ) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels,), j, dtype=torch.int64, device=scores.device) ) result.append(boxlist_for_class) result = cat_boxlist(result) # Limit to max_per_image detections **over all classes** number_of_detections = len(result) if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4).float() scores = boxlist.get_field("scores").reshape(-1, num_classes).float() image_shape = boxlist.size device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class boxes = boxes.view(-1, num_classes, 4).permute(1, 0, 2).contiguous()[1:, :, :] scores = scores.permute(1, 0).contiguous()[1:, :] inds_all = scores > self.score_thresh num_detections = scores.size(1) num_boxes_per_class = [num_detections] * (num_classes - 1) num_boxes_tensor = torch.tensor(num_boxes_per_class, device=boxes.device, dtype=torch.int32) sorted_idx = scores.argsort(dim=1, descending=True) batch_idx = torch.arange(num_classes - 1, device=device)[:, None] boxes = boxes[batch_idx, sorted_idx] scores = scores[batch_idx, sorted_idx] inds_all = inds_all[batch_idx, sorted_idx] boxes = boxes.reshape(-1, 4) # Apply batched_nms kernel keep_inds_batched = C.nms_batched(boxes, num_boxes_per_class, num_boxes_tensor, inds_all.byte(), self.nms) keep_inds = keep_inds_batched.view(-1).nonzero().squeeze(1) boxes = boxes.reshape(-1, 4).index_select(dim=0, index=keep_inds) scores = scores.reshape(-1).index_select(dim=0, index=keep_inds) labels_all = torch.tensor(num_detections * list(range(1, num_classes)), device=device, dtype=torch.int64) labels_all = labels_all.view(num_detections, (num_classes - 1)).permute( 1, 0).contiguous().reshape(-1) labels = labels_all.index_select(dim=0, index=keep_inds) result = BoxList(boxes, image_shape, mode="xyxy") result.add_field("scores", scores) result.add_field("labels", labels) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def sample_boxlist(self, boxlist: BoxList, indices, num_samples): assert (num_samples <= indices.numel()) if num_samples == 0: sampled_boxlist = self.get_dummy_boxlist(boxlist, num_boxes=0) else: permuted_idxs = torch.randperm(indices.numel()) sampled_idxs = indices[permuted_idxs[:num_samples], 0] sampled_boxes = boxlist.bbox[sampled_idxs, :] sampled_ids = None sampled_labels = None if 'ids' in boxlist.fields(): sampled_ids = boxlist.get_field('ids')[sampled_idxs] if 'labels' in boxlist.fields(): sampled_labels = boxlist.get_field('labels')[sampled_idxs] sampled_boxlist = self.get_default_boxlist(boxlist, sampled_boxes, sampled_ids, sampled_labels) return sampled_boxlist
def forward(self, boxes, pred_maskiou, labels): num_masks = pred_maskiou.shape[0] index = torch.arange(num_masks, device=labels.device) maskious = pred_maskiou[index, labels] maskious = [maskious] results = [] for maskiou, box in zip(maskious, boxes): bbox = BoxList(box.bbox, box.size, mode="xyxy") for field in box.fields(): bbox.add_field(field, box.get_field(field)) bbox_scores = bbox.get_field("scores") mask_scores = bbox_scores * maskiou bbox.add_field("mask_scores", mask_scores) results.append(bbox) return results
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) src_img_size = img.size # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] ##注意类别!!!!!!!!!!!!! ##注意类别!!!!!!!!!!!!! ##注意类别!!!!!!!!!!!!! ##注意类别!!!!!!!!!!!!! classes = [ self.json_category_id_to_contiguous_id[c] - 1 for c in classes ] classes = torch.tensor(classes) target.add_field("labels", classes) box_coord = target.bbox box_cls = target.get_field('labels').view(-1, 1).float() bbox_info = torch.cat((box_coord, box_cls), 1) img, bbox = load_data_detection(img, bbox_info.numpy(), self.transforms.transforms[0].size, self.train) if self.box_encoder is not None: gt = self.box_encoder(bbox) else: gt = np.zeros((50, 5), dtype=np.float32) gt[:len(bbox), :] = bbox gt = torch.from_numpy(gt).float() if self.transforms is not None: img = self.transforms(img) if self.train: return img, gt else: return img, gt, anno[0]['image_id'], src_img_size
def forward(self, boxes, pred_maskiou, labels): num_masks = pred_maskiou.shape[0] index = torch.arange(num_masks, device=labels.device) maskious = pred_maskiou[index, labels] # maskious = [maskious] # split `maskiou` accroding to `boxes` boxes_per_image = [len(box) for box in boxes] maskious = maskious.split(boxes_per_image, dim=0) results = [] for maskiou, box in zip(maskious, boxes): bbox = BoxList(box.bbox, box.size, mode="xyxy") for field in box.fields(): bbox.add_field(field, box.get_field(field)) bbox_scores = bbox.get_field("scores") mask_scores = bbox_scores * maskiou bbox.add_field("mask_scores", mask_scores) results.append(bbox) return results
def get_annotation(self, image_id): coco = self.coco ann_ids = coco.getAnnIds(imgIds=image_id) img_data = self.coco.imgs[image_id] anno = coco.loadAnns(ann_ids) boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, (img_data['width'], img_data['height']), mode="xywh").convert("xyxy") labels = [obj["category_id"] for obj in anno] labels = [self.json_category_id_to_contiguous_id[c] for c in labels] target.add_field("labels", torch.tensor(labels)) target = target.clip_to_image(remove_empty=True) return { 'boxes': target.bbox.tolist(), 'labels': target.get_field('labels').tolist() }
def agmap_total(gt_boxlist, l_boxlist, h_boxlist, class_independ=False, keep_small=True, reward=False, verbose=False): """ 利用真实值,生成agmap :param gt_boxlist (BoxList): 真实目标框,必须是xyxy类型 :param l_boxlist (BoxList): 低分辨率检测结果,必须是xyxy类型 :param h_boxlist (BoxList): 高分辨率检测结果,必须是xyxy类型 :param class_independ (bool): 是否类别无关,只考虑proposal之间的iou :param keep_small (bool): 只计算小目标(< 96x96)的agmap :return: agmap (np.ndarray) """ # 是否去除大目标,只计算96x96以下目标的agmap if keep_small: gt_area = gt_boxlist.area() l_area = l_boxlist.area() h_area = h_boxlist.area() gt_keep, l_keep, h_keep = gt_area < np.square(96), l_area < np.square( 96), h_area < np.square(96) if torch.sum(gt_keep) == 0: gt_boxlist = BoxList([[0, 0, 0, 0]], gt_boxlist.size, mode="xyxy") gt_boxlist.add_field("labels", torch.as_tensor([0], dtype=torch.int64)) else: gt_boxlist = gt_boxlist[gt_keep] if torch.sum(l_keep) == 0: l_boxlist = BoxList([[0, 0, 0, 0]], l_boxlist.size, mode="xyxy") l_boxlist.add_field("labels", torch.as_tensor([0], dtype=torch.int64)) l_boxlist.add_field("scores", torch.as_tensor([0], dtype=torch.float32)) else: l_boxlist = l_boxlist[l_keep] if torch.sum(h_keep) == 0: h_boxlist = BoxList([[0, 0, 0, 0]], h_boxlist.size, mode="xyxy") h_boxlist.add_field("labels", torch.as_tensor([0], dtype=torch.int64)) h_boxlist.add_field("scores", torch.as_tensor([0], dtype=torch.float32)) else: h_boxlist = h_boxlist[h_keep] # gt_boxlist.size为(image_width, image_height),转置后获得正确尺寸 agmap = np.zeros(gt_boxlist.size, np.float32).T # 将收益分误检和漏检 agmap_split = np.zeros((1, gt_boxlist.size[1], gt_boxlist.size[0]), np.float32) # 用于reward评价 agval = 0. # 用ground-truth显示agmap,或者用l_det显示 use_gt_bbox = True for i in range(len(gt_boxlist)): g_bbox_i = gt_boxlist[i] g_label = g_bbox_i.get_field("labels").item() if class_independ: l_boxlist_sel = l_boxlist h_boxlist_sel = h_boxlist else: l_boxlist_sel = l_boxlist[l_boxlist.get_field("labels") == g_label] # 正确召回的类别 h_boxlist_sel = h_boxlist[h_boxlist.get_field("labels") == g_label] if len(l_boxlist_sel) == 0: l_boxlist_sel = BoxList([[0, 0, 0, 0]], l_boxlist_sel.size, mode="xyxy") l_boxlist_sel.add_field( "scores", torch.as_tensor([0], dtype=torch.float32)) if len(h_boxlist_sel) == 0: h_boxlist_sel = BoxList([[0, 0, 0, 0]], h_boxlist_sel.size, mode="xyxy") h_boxlist_sel.add_field( "scores", torch.as_tensor([0], dtype=torch.float32)) l_score = l_boxlist_sel.get_field("scores").cpu().numpy() h_score = h_boxlist_sel.get_field("scores").cpu().numpy() iou_l = boxlist_iou(g_bbox_i, l_boxlist_sel) iou_h = boxlist_iou(g_bbox_i, h_boxlist_sel) l_val, l_id = iou_l.max(dim=1) l_val, l_id = l_val.item(), l_id.item() # g_bbox_i只有一个元素 h_val, h_id = iou_h.max(dim=1) h_val, h_id = h_val.item(), h_id.item() # g_bbox_i只有一个元素 # 首先根据ground-truth对agmap进行评分,分为3种情况,l和h都召回目标,l召回目标,h召回目标 # g_bbox = gt_boxlist.bbox[i, :].cpu().numpy() g_bbox = g_bbox_i.bbox[0, :].cpu().numpy() g_bbox = np.round(g_bbox).astype(np.int64) # 取整,以便索引 g_area = (g_bbox[3] - g_bbox[1]) * (g_bbox[2] - g_bbox[0]) l_bbox = l_boxlist_sel.bbox[l_id, :].cpu().numpy() l_bbox = np.round(l_bbox).astype(np.int64) l_area = (l_bbox[3] - l_bbox[1]) * (l_bbox[2] - l_bbox[0]) if l_val > iou_thrs and h_val > iou_thrs: ag = h_score[h_id] - l_score[l_id] elif l_val > iou_thrs: # 高分辨率漏检收益 ag = -l_score[l_id] elif h_val > iou_thrs: # 低分辨率漏检收益 ag = h_score[h_id] if g_area != 0: agmap_split[0, g_bbox[1]:g_bbox[3], g_bbox[0]:g_bbox[2]] += ag / g_area else: ag = 0 agval += ag if use_gt_bbox and g_area != 0: # 使用ground-truth目标框来改变agmap的得分 agmap[g_bbox[1]:g_bbox[3], g_bbox[0]:g_bbox[2]] += ag / g_area elif l_area != 0: agmap[l_bbox[1]:l_bbox[3], l_bbox[0]:l_bbox[2]] += ag / l_area iou_l = boxlist_iou(gt_boxlist, l_boxlist) iou_h = boxlist_iou(gt_boxlist, h_boxlist) l_score = l_boxlist.get_field("scores").cpu().numpy() h_score = h_boxlist.get_field("scores").cpu().numpy() l_label = l_boxlist.get_field("labels").cpu().numpy() h_label = h_boxlist.get_field("labels").cpu().numpy() g_label = gt_boxlist.get_field("labels").cpu().numpy() l_val, l_id = iou_l.max(dim=0) l_val, l_id = l_val.cpu().numpy(), l_id.cpu().numpy() h_val, h_id = iou_h.max(dim=0) h_val, h_id = h_val.cpu().numpy(), h_id.cpu().numpy() for i in range(len(l_boxlist)): l_bbox = l_boxlist.bbox[i, :].cpu().numpy() l_bbox = np.round(l_bbox).astype(np.int64) # 取整,以便索引 area = (l_bbox[3] - l_bbox[1]) * (l_bbox[2] - l_bbox[0]) if ((g_label[l_id[i]] != l_label[i] and not class_independ) or l_val[i] < iou_thrs) and area != 0: agval += l_score[i] agmap[l_bbox[1]:l_bbox[3], l_bbox[0]:l_bbox[2]] += l_score[i] / area # 低分辨率误检收益 for i in range(len(h_boxlist)): h_bbox = h_boxlist.bbox[i, :].cpu().numpy() h_bbox = np.round(h_bbox).astype(np.int64) # 取整,以便索引 area = (h_bbox[3] - h_bbox[1]) * (h_bbox[2] - h_bbox[0]) if ((g_label[h_id[i]] != h_label[i] and not class_independ) or h_val[i] < iou_thrs) and area != 0: agval -= h_score[i] agmap[h_bbox[1]:h_bbox[3], h_bbox[0]:h_bbox[2]] -= h_score[i] / area # 高分辨率误检收益 agmap = torch.from_numpy(agmap).unsqueeze(dim=0).unsqueeze(dim=0) agmap_split = torch.from_numpy(agmap_split).unsqueeze(dim=0) with torch.no_grad(): # agmap = agmap_avgpool(agmap) agmap = F.interpolate(agmap, size=agmap_size, mode='bilinear', align_corners=False) agmap_split = F.interpolate(agmap_split, size=agmap_size, mode='bilinear', align_corners=False) agmap = np.squeeze(agmap.cpu().numpy()) agmap_split = np.squeeze(agmap_split.cpu().numpy()) if verbose: # 从[-1,1]转换到[0,255],用以colormap可视化 agmap_color = agmap * alpha agmap_color = cv2.resize(agmap_color, gt_boxlist.size) agmap_color = (agmap_color + 1) * 127 agmap_color[agmap_color > 255] = 255 agmap_color[agmap_color < 0] = 0 agmap_color = cv2.applyColorMap(agmap_color.astype(np.uint8), cv2.COLORMAP_HOT) cv2.imshow("agmap", agmap_color) cv2.waitKey(200) if reward: return agval else: return agmap, agmap_split
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) boxes_per_cls = boxlist.bbox.reshape(-1, num_classes, 4) scores = boxlist.get_field("pred_scores").reshape(-1, num_classes) device = scores.device result = [] orig_inds = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("pred_scores", scores_j) boxlist_for_class, keep = boxlist_nms( boxlist_for_class, self.nms, max_proposals=self.post_nms_per_cls_topn, score_field='pred_scores') inds = inds[keep] num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "pred_labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) orig_inds.append(inds) #NOTE: kaihua, according to Neural-MOTIFS (and my experiments, we need remove duplicate bbox) if self.nms_filter_duplicates or self.save_proposals: assert len(orig_inds) == (num_classes - 1) # set all bg to zero inds_all[:, 0] = 0 for j in range(1, num_classes): inds_all[:, j] = 0 orig_idx = orig_inds[j - 1] inds_all[orig_idx, j] = 1 dist_scores = scores * inds_all.float() scores_pre, labels_pre = dist_scores.max(1) final_inds = scores_pre.nonzero() assert final_inds.dim() != 0 final_inds = final_inds.squeeze(1) scores_pre = scores_pre[final_inds] labels_pre = labels_pre[final_inds] result = BoxList(boxes_per_cls[final_inds, labels_pre], boxlist.size, mode="xyxy") result.add_field("pred_scores", scores_pre) result.add_field("pred_labels", labels_pre) orig_inds = final_inds else: result = cat_boxlist(result) orig_inds = torch.cat(orig_inds, dim=0) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("pred_scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] orig_inds = orig_inds[keep] return result, orig_inds, boxes_per_cls[orig_inds]
def prepare_for_coco_detection_mstest(predictions, dataset): # pdb.set_trace() predictions_s = predictions[0] predictions_m = predictions[1] predictions_l = predictions[2] dataset_s = dataset[0] dataset_m = dataset[1] dataset_l = dataset[2] coco_results = [] # one image. for image_id, predictions in enumerate( zip(predictions_s, predictions_m, predictions_l)): prediction_s = predictions[0] prediction_m = predictions[1] prediction_l = predictions[2] original_id = dataset_l.id_to_img_map[image_id] if len(predictions_l) == 0: continue img_info = dataset_l.get_img_info(image_id) image_width = img_info["width"] image_height = img_info["height"] img_id_json = img_info['id'] # rescale predict bbox to original images size. prediction_s = prediction_s.resize((image_width, image_height)) prediction_m = prediction_m.resize((image_width, image_height)) prediction_l = prediction_l.resize((image_width, image_height)) # get single-scale results from type BoxList. bbox_s = prediction_s.bbox score_s = prediction_s.get_field('scores').unsqueeze(1) label_s = prediction_s.get_field('labels').unsqueeze(1) bbox_m = prediction_m.bbox score_m = prediction_m.get_field('scores').unsqueeze(1) label_m = prediction_m.get_field('labels').unsqueeze(1) bbox_l = prediction_l.bbox score_l = prediction_l.get_field('scores').unsqueeze(1) label_l = prediction_l.get_field('labels').unsqueeze(1) # concat single-scale result and convert to type BoxList. (small, medium, large) min_size = 0 w = prediction_l.size[0] h = prediction_l.size[1] detections = torch.from_numpy(np.row_stack( (bbox_s, bbox_m, bbox_l))).cuda() per_class = torch.from_numpy(np.row_stack( (label_s, label_m, label_l))).cuda() per_class = torch.squeeze(per_class, dim=1) per_box_cls = torch.from_numpy( np.row_stack((score_s, score_m, score_l))).cuda() per_box_cls = torch.squeeze(per_box_cls, dim=1) boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, min_size) # multi-scale results apply NMS. (small, medium, large) nms_method = cfg.TEST.MS_TEST_NMS nms_thresh = cfg.TEST.MS_TEST_NMS_THR num_classes = 81 scores = boxlist.get_field("scores") labels = boxlist.get_field("labels") boxes = boxlist.bbox result = [] # multi-scale test + NMS for j in range(1, num_classes): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) if nms_method == "nms": boxlist_for_class = boxlist_nms(boxlist_for_class, nms_thresh, score_field="scores") elif nms_method == "soft_nms": boxlist_for_class = boxlist_soft_nms(boxlist_for_class, nms_thresh, score_field="scores") else: print('the nms method is wrong') num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=scores.device)) result.append(boxlist_for_class) result = cat_boxlist(result) boxlist = result boxlist = boxlist.convert("xywh") boxes = boxlist.bbox.tolist() scores = boxlist.get_field("scores").tolist() labels = boxlist.get_field("labels").tolist() mapped_labels = [ dataset_l.contiguous_category_id_to_json_id[int(i)] for i in labels ] coco_results.extend([{ "image_id": original_id, "category_id": mapped_labels[k], "bbox": box, "score": scores[k], } for k, box in enumerate(boxes)]) return coco_results
def eval_relation( dataset: VGDataset, predictions: [RelationTriplet], # list of RelationTriplet output_folder): logger = logging.getLogger(__name__) rel_total_cnt = 0 relaion_hit_cnt = torch.zeros((2), dtype=torch.int32) # top50 and 100 phrase_hit_num = torch.zeros((2), dtype=torch.int32) rel_loc_hit_cnt = torch.zeros((2), dtype=torch.int32) rel_inst_hit_cnt = torch.zeros((2), dtype=torch.int32) instance_det_hit_num = torch.zeros((2), dtype=torch.int32) eval_topks = cfg.MODEL.RELATION.TOPK_TRIPLETS cuda_dev = torch.zeros((1, 1)).cuda().device logger.info("start relationship evaluations. ") logger.info("relation static range %s" % str(eval_topks)) true_det_rel = [] det_total = 0 relation_eval_res = {} for indx, rel_pred in tqdm(enumerate(predictions)): # rel_pred is a RelationTriplet obj # ipdb.set_trace() original_id = dataset.id_to_img_map[indx] img_info = dataset.get_img_info(indx) image_width = img_info["width"] image_height = img_info["height"] rel_pred.instance = rel_pred.instance.resize( (image_width, image_height)) # get the boxes ann_ids = dataset.coco.getAnnIds(imgIds=original_id) anno = dataset.coco.loadAnns(ann_ids) gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0] det_total += len(gt_boxes) labels = [obj["category_id"] for obj in anno] # get gt boxes gt_boxes = torch.as_tensor(gt_boxes).reshape( -1, 4) # guard against no boxes gt_boxes = BoxList(gt_boxes, (image_width, image_height), mode="xywh").convert("xyxy") gt_boxes.add_field("labels", torch.LongTensor(labels)) gt_boxes = gt_boxes.to(cuda_dev) rel_pred = rel_pred.to(cuda_dev) # get gt relations gt_relations = torch.as_tensor(dataset.relationships[original_id]) gt_relations = gt_relations.to(cuda_dev) rel_total_cnt += gt_relations.shape[0] for i, topk in enumerate(eval_topks): selected_rel_pred = rel_pred[:topk] # fetch the iou rate of gt boxes and det res pairs instance_hit_iou = boxlist_iou(selected_rel_pred.instance, gt_boxes) if len(instance_hit_iou) == 0: continue max_iou_val, inst_loc_hit_idx = torch.max(instance_hit_iou, dim=1) # box pair location hit inst_det_hit_idx = inst_loc_hit_idx.clone().detach() neg_loc_hit_idx = (max_iou_val < 0.5) inst_loc_hit_idx[ neg_loc_hit_idx] = -1 # we set the det result that not hit as -1 # box pair and cate hit neg_det_hit_idx = neg_loc_hit_idx | \ (selected_rel_pred.instance.get_field("labels") != gt_boxes.get_field("labels")[ inst_det_hit_idx]) inst_det_hit_idx[ neg_det_hit_idx] = -1 # set the det result not hit as -1 instance_det_hit_num[i] += len( torch.unique(inst_det_hit_idx[inst_det_hit_idx != -1])) # check the hit of each triplets in gt rel set rel_pair_mat = -torch.ones((selected_rel_pred.pair_mat.shape), dtype=torch.int64, device=cuda_dev) # instances box location hit res rel_loc_pair_mat = -torch.ones((selected_rel_pred.pair_mat.shape), dtype=torch.int64, device=cuda_dev) # instances box location and category hit rel_det_pair_mat = -torch.ones((selected_rel_pred.pair_mat.shape), dtype=torch.int64, device=cuda_dev) hit_rel_idx_collect = [] for idx, gt_rel in enumerate(gt_relations): # write result into the pair mat # ipdb.set_trace() rel_pair_mat[:, 0] = inst_det_hit_idx[ selected_rel_pred.pair_mat[:, 0]] rel_pair_mat[:, 1] = inst_det_hit_idx[ selected_rel_pred.pair_mat[:, 1]] rel_loc_pair_mat[:, 0] = inst_loc_hit_idx[ selected_rel_pred.pair_mat[:, 0]] rel_loc_pair_mat[:, 1] = inst_loc_hit_idx[ selected_rel_pred.pair_mat[:, 1]] rel_det_pair_mat[:, 0] = inst_det_hit_idx[ selected_rel_pred.pair_mat[:, 0]] rel_det_pair_mat[:, 1] = inst_det_hit_idx[ selected_rel_pred.pair_mat[:, 1]] rel_hit_res = rel_pair_mat.eq(gt_rel[:2]) rel_hit_idx = torch.nonzero((rel_hit_res.sum(dim=1) >= 2) & ( selected_rel_pred.phrase_l == gt_rel[-1])) rel_pair_loc_res = rel_loc_pair_mat.eq(gt_rel[:2]) rel_loc_hit_idx = torch.nonzero( (rel_pair_loc_res.sum(dim=1) >= 2)) rel_inst_hit_res = rel_det_pair_mat.eq(gt_rel[:2]) rel_inst_hit_idx = torch.nonzero( (rel_inst_hit_res.sum(dim=1) >= 2)) phrase_hit_idx = torch.nonzero( selected_rel_pred.phrase_l == gt_rel[-1]) if len(rel_hit_idx) >= 1: relaion_hit_cnt[i] += 1 if len(rel_loc_hit_idx) >= 1: rel_loc_hit_cnt[i] += 1 if len(rel_inst_hit_idx) >= 1: rel_inst_hit_cnt[i] += 1 if len(phrase_hit_idx) >= 1: phrase_hit_num[i] += 1 # hit_rel_idx_collect.append(rel_hit_idx) # hit_rel_pair_id = torch.cat(hit_rel_idx_collect).cpu() # rel_pred_save = rel_pred.to(hit_rel_pair_id.device) # true_det_rel.append((rel_pred_save, hit_rel_pair_id)) # summarize result all_text_res = '' for i, topk in enumerate(eval_topks): relation_eval_res['relation Recall@%d' % topk] = { 'relation': relaion_hit_cnt[i].item() / rel_total_cnt, "phrase_cls": phrase_hit_num[i].item() / rel_total_cnt, "inst_pair_loc": rel_loc_hit_cnt[i].item() / rel_total_cnt, "inst_pair_cls": rel_inst_hit_cnt[i].item() / rel_total_cnt, "det": instance_det_hit_num[i].item() / det_total } txt_res = 'Relation detecion Recall@%d \n' % topk \ + "instances location pair: {inst_pair_loc}\n" \ "instances detection pair: {inst_pair_cls} \n" \ "phrase cls: {phrase_cls} \n" \ "relation: {relation}\n" \ "detection: {det}\n".format(**relation_eval_res['relation Recall@%d' % topk]) logger.info(txt_res) all_text_res += txt_res if output_folder: import json # torch.save(true_det_rel, os.path.join(output_folder, "relation_det_results.pth")) with open(os.path.join(output_folder, 'rel_eval_res.txt'), 'w') as f: f.write(json.dumps(relation_eval_res, indent=3)) # todo visualization return relation_eval_res pass
def filter_results_peter(self, boxlist, num_classes, feature=None): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) nms_mask = scores.clone() nms_mask.zero_() # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): scores_j = scores[:, j] boxes_j = boxes[:, j * 4: (j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class.add_field("idxs", torch.arange(0, scores.shape[0]).long()) boxlist_for_class = boxlist_nms( boxlist_for_class, 0.3 ) nms_mask[:, j][boxlist_for_class.get_field("idxs")] = 1 dists_all = nms_mask * scores # filter duplicate boxes scores_pre, labels_pre = dists_all.max(1) inds_pre = scores_pre.nonzero() assert inds_pre.dim() != 0 inds_pre = inds_pre.squeeze(1) labels_pre = labels_pre[inds_pre] scores_pre = scores_pre[inds_pre] box_inds_pre = inds_pre * scores.shape[1] + labels_pre result = BoxList(boxlist.bbox.view(-1, 4)[box_inds_pre], boxlist.size, mode="xyxy") result.add_field("labels", labels_pre) result.add_field("scores", scores_pre) if self.output_feature: features_pre = feature[inds_pre] result.add_field("box_features", features_pre) scores_all = scores[inds_pre] boxes_all = boxes[inds_pre] result.add_field("scores_all", scores_all) result.add_field("boxes_all", boxes_all.view(-1, num_classes, 4)) vs, idx = torch.sort(scores_pre, dim=0, descending=True) keep_boxes = torch.nonzero(scores_pre >= self.score_thresh, as_tuple=True)[0] num_dets = len(keep_boxes) if num_dets < self.min_detections_per_img: keep_boxes = idx[:self.min_detections_per_img] elif num_dets > self.detections_per_img: keep_boxes = idx[:self.detections_per_img] else: keep_boxes = idx[:num_dets] result = result[keep_boxes] return result
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self.panoptic_on: # add semantic masks to the boxlist for panoptic img_id = self.ids[idx] img_path = self.coco.loadImgs(img_id)[0]['file_name'] seg_path = self.root.replace('coco', 'coco/annotations').replace( 'train2017', 'panoptic_train2017_semantic_trainid_stff').replace( 'val2017', 'panoptic_val2017_semantic_trainid_stff') + '/' + img_path seg_img = Image.open(seg_path.replace('jpg', 'png')) # seg_img.mode = 'L' seg_gt = torch.ByteTensor( torch.ByteStorage.from_buffer(seg_img.tobytes())) seg_gt = seg_gt.view(seg_img.size[1], seg_img.size[0], 1) seg_gt = seg_gt.transpose(0, 1).transpose(0, 2).contiguous().float() seg_gt = SegmentationMask(seg_gt, seg_img.size, "mask") target.add_field("seg_masks", seg_gt) if self._transforms is not None: img, target = self._transforms(img, target) if self.use_binary_mask and target.has_field("masks"): if self.use_polygon_det: # compute target maps masks = target.get_field("masks") w, h = target.size assert target.mode == "xyxy" targets_map = np.ones((h, w), dtype=np.uint8) * 255 assert len(masks.instances) <= 255 for target_id, polygons in enumerate(masks.instances): targets_map = self.compute_target_maps( targets_map, target_id, polygons) target.add_field("targets_map", torch.Tensor(targets_map)) # compute binary masks MASK_SIZE = self.binary_mask_size binary_masks = torch.zeros(len(target), MASK_SIZE[0] * MASK_SIZE[1]) masks = target.get_field("masks") # assert len(target) == len(masks.instances) for i, polygons in enumerate(masks.instances): mask = self.polygons_to_mask(polygons) mask = mask.to(binary_masks.device) mask = F.interpolate(mask.unsqueeze(0).unsqueeze(0), MASK_SIZE) binary_masks[i, :] = mask.view(-1) target.add_field("binary_masks", binary_masks) return img, target, idx
def agmap_coarse(gt_boxlist, l_boxlist, class_independ=False, keep_small=True, verbose=False): """ 利用真实值和低分辨率检测结果,生成agmap :param gt_boxlist (BoxList): 真实目标框,必须是xyxy类型 :param l_boxlist (BoxList): 低分辨率检测结果,必须是xyxy类型 :param class_independ (bool): 是否类别无关,只考虑proposal之间的iou :param keep_small (bool): 只计算小目标(< 96x96)的agmap :return: agmap (np.ndarray) """ # 是否去除大目标,只计算96x96以下目标的agmap if keep_small: gt_area = gt_boxlist.area() l_area = l_boxlist.area() gt_keep, l_keep = gt_area < np.square(96), l_area < np.square(96) if torch.sum(gt_keep) == 0: gt_boxlist = BoxList([[0, 0, 0, 0]], gt_boxlist.size, mode="xyxy") gt_boxlist.add_field("labels", torch.as_tensor([0], dtype=torch.int64)) else: gt_boxlist = gt_boxlist[gt_keep] if torch.sum(l_keep) == 0: l_boxlist = BoxList([[1, 1, 1, 1]], l_boxlist.size, mode="xyxy") l_boxlist.add_field("labels", torch.as_tensor([0], dtype=torch.int64)) l_boxlist.add_field("scores", torch.as_tensor([0], dtype=torch.float32)) else: l_boxlist = l_boxlist[l_keep] # 初始化agmap gt_w, gt_h = gt_boxlist.size agmap = np.zeros((2, gt_h, gt_w), np.float32) for i in range(len(gt_boxlist)): g_bbox_i = gt_boxlist[i] g_label = g_bbox_i.get_field("labels").item() if class_independ: # 是否类别无关 l_boxlist_sel = l_boxlist else: l_boxlist_sel = l_boxlist[l_boxlist.get_field("labels") == g_label] # 正确召回的类别 if len(l_boxlist_sel) == 0: l_boxlist_sel = BoxList([[1, 1, 1, 1]], l_boxlist_sel.size, mode="xyxy") l_boxlist_sel.add_field( "scores", torch.as_tensor([0], dtype=torch.float32)) l_score = l_boxlist_sel.get_field("scores").cpu().numpy() iou_l = boxlist_iou(g_bbox_i, l_boxlist_sel) l_val, l_id = iou_l.max(dim=1) l_val, l_id = l_val.item(), l_id.item() # g_bbox_i只有一个元素 g_bbox = g_bbox_i.bbox[0, :].cpu().numpy() g_bbox = np.round(g_bbox).astype(np.int64) # 取整,以便索引 g_area = (g_bbox[3] - g_bbox[1]) * (g_bbox[2] - g_bbox[0]) l_bbox = l_boxlist_sel.bbox[l_id, :].cpu().numpy() l_bbox = np.round(l_bbox).astype(np.int64) l_area = (l_bbox[3] - l_bbox[1]) * (l_bbox[2] - l_bbox[0]) if l_val > iou_thrs and g_area != 0: agmap[0, g_bbox[1]:g_bbox[3], g_bbox[0]:g_bbox[2]] += (1 - l_score[l_id]) / g_area elif g_area != 0: agmap[0, g_bbox[1]:g_bbox[3], g_bbox[0]:g_bbox[2]] += 1. / g_area iou_l = boxlist_iou(gt_boxlist, l_boxlist) l_score = l_boxlist.get_field("scores").cpu().numpy() l_label = l_boxlist.get_field("labels").cpu().numpy() g_label = gt_boxlist.get_field("labels").cpu().numpy() l_val, l_id = iou_l.max(dim=0) l_val, l_id = l_val.cpu().numpy(), l_id.cpu().numpy() for i in range(len(l_boxlist)): l_bbox = l_boxlist.bbox[i, :].cpu().numpy() l_bbox = np.round(l_bbox).astype(np.int64) # 取整,以便索引 area = (l_bbox[3] - l_bbox[1]) * (l_bbox[2] - l_bbox[0]) if ((g_label[l_id[i]] != l_label[i] and not class_independ) or l_val[i] < iou_miss_thrs) and area != 0: agmap[1, l_bbox[1]:l_bbox[3], l_bbox[0]:l_bbox[2]] += l_score[i] / area # 低分辨率误检收益 agmap = torch.from_numpy(agmap).unsqueeze(dim=0) with torch.no_grad(): # agmap = agmap_avgpool(agmap) agmap = F.interpolate(agmap, size=agmap_size, mode='bilinear', align_corners=False) agmap = np.squeeze(agmap.cpu().numpy()) return agmap
masks.append(x['mask'][i]) labels.append(x['label']) index.append(x['index']) colors.append(x['color']) scores.append(x['scores'][i]) if len(boxes) == 0: continue detections = BoxList(torch.stack(boxes), (W, H)) detections.add_field('mask', torch.stack(masks)) detections.add_field('labels', torch.tensor(labels)) detections.add_field('index', torch.tensor(index)) detections.add_field('scores', torch.tensor(scores)) if detections.get_field('mask').dim() != 4: detections.get_field('mask').data = detections.get_field( 'mask').unsqueeze(1) detections = project_masks(detections) ax = plt.gca() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # BGR -> RGB frame = frame[:, :, [2, 1, 0]] display_instances(frame, detections, CLASS_NAMES, ax, colors=colors) plt.savefig(os.path.join(folder, f'image-{FRAME_ID}.png'), bbox_inches='tight', pad_inches=0, dpi=DPI)
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # UPDATED boxes = boxes.reshape(-1, num_classes, 4) class_ids = scores.argmax(dim=-1, keepdim=False) index = torch.arange(len(boxes)) scores = scores[index, class_ids] mask = (scores > self.score_thresh) & (class_ids != 0) index, scores, class_ids = index[mask], scores[mask], class_ids[mask] boxes = boxes[index, class_ids] result = BoxList(boxes, boxlist.size, mode="xyxy") result.add_field("scores", scores) result.add_field("labels", class_ids) result = boxlist_nms(result, self.nms) # OLD CODE, CLASS AWARE NMS # Skip j = 0, because it's the background class # inds_all = scores > self.score_thresh # for j in range(1, num_classes): # inds = inds_all[:, j].nonzero().squeeze(1) # scores_j = scores[inds, j] # boxes_j = boxes[inds, j * 4 : (j + 1) * 4] # boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") # boxlist_for_class.add_field("scores", scores_j) # boxlist_for_class = boxlist_nms( # boxlist_for_class, self.nms # ) # num_labels = len(boxlist_for_class) # boxlist_for_class.add_field( # "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device) # ) # result.append(boxlist_for_class) # # result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result