def resize(self, size, *args, **kwargs): """ Returns a resized copy of this bounding box :param size: The requested size in pixels, as a 2-tuple: (width, height). """ ratios = tuple( float(s) / float(s_orig) for s, s_orig in zip(size, self.size)) if ratios[0] == ratios[1]: ratio = ratios[0] scaled_box = self.bbox_3d * ratio bbox_3d = Box3dList(scaled_box, size, mode=self.mode) # bbox_3d._copy_extra_fields(self) return bbox_3d ratio_width, ratio_height = ratios xmin, ymin, xmax, ymax = self._split_into_xyxy() scaled_xmin = xmin * ratio_width scaled_xmax = xmax * ratio_width scaled_ymin = ymin * ratio_height scaled_ymax = ymax * ratio_height scaled_box = torch.cat( (scaled_xmin, scaled_ymin, scaled_xmax, scaled_ymax), dim=-1) bbox_3d = BoxList(scaled_box, size, mode="xyxy") # bbox_3d._copy_extra_fields(self) for k, v in self.extra_fields.items(): if not isinstance(v, torch.Tensor): v = v.resize(size, *args, **kwargs) bbox_3d.add_field(k, v) return bbox_3d.convert(self.mode)
def boxlist_crop(self, box: BoxList, x1, y1, x2, y2): """ Adjust the coordinate of the bounding box within image crop specified by (x1, y1, x2, y2) """ w, h = (x2 - x1), (y2 - y1) xmin, ymin, xmax, ymax = box._split_into_xyxy() cropped_xmin = (xmin - x1) cropped_ymin = (ymin - y1) cropped_xmax = (xmax - x1) cropped_ymax = (ymax - y1) cropped_bbox = torch.cat( (cropped_xmin, cropped_ymin, cropped_xmax, cropped_ymax), dim=-1) cropped_box = BoxList(cropped_bbox, (w, h), mode="xyxy") for k, v in box.extra_fields.items(): cropped_box.add_field(k, v) if self.amodal: # amodal allows the corners of bbox go beyond image boundary cropped_box = self.remove_invisible_box(cropped_box) else: # the corners of bbox need to be within image boundary for non-amodal training cropped_box = cropped_box.clip_to_image(remove_empty=True) return cropped_box.convert(box.mode)
def crop(self, box): """ Cropss a rectangular region from this bounding box. The box is a 4-tuple defining the left, upper, right, and lower pixel coordinate. """ xmin, ymin, xmax, ymax = self._split_into_xyxy() w, h = box[2] - box[0], box[3] - box[1] cropped_xmin = (xmin - box[0]).clamp(min=0, max=w) cropped_ymin = (ymin - box[1]).clamp(min=0, max=h) cropped_xmax = (xmax - box[0]).clamp(min=0, max=w) cropped_ymax = (ymax - box[1]).clamp(min=0, max=h) # TODO should I filter empty boxes here? if False: is_empty = (cropped_xmin == cropped_xmax) | (cropped_ymin == cropped_ymax) cropped_box = torch.cat( (cropped_xmin, cropped_ymin, cropped_xmax, cropped_ymax), dim=-1) bbox_3d = BoxList(cropped_box, (w, h), mode="xyxy") # bbox_3d._copy_extra_fields(self) for k, v in self.extra_fields.items(): if not isinstance(v, torch.Tensor): v = v.crop(box) bbox_3d.add_field(k, v) return bbox_3d.convert(self.mode)
def oneLargeBboxList(W_batch_array, H_batch_array): bbox_list_list = [] for W, H in zip(W_batch_array, H_batch_array): bbox_list = BoxList(np.asarray([[0, 0, W, H]]), (W, H), "xywh") # Following COCO annotations: (box coordinates are measured from the top left image corner and are 0-indexed) # http://cocodataset.org/#format-data bbox_list = bbox_list.convert('xyxy') bbox_list_list.append(bbox_list) return bbox_list_list
def frame_vis_generator(self, frame, results: BoxList): frame, results = self.normalize_output(frame, results) ids = results.get_field('ids') results = results[ids >= 0] results = results.convert('xyxy') bbox = results.bbox.detach().cpu().numpy() ids = results.get_field('ids').tolist() labels = results.get_field('labels').tolist() for i, entity_id in enumerate(ids): color = self.colors[entity_id % self.num_colors] class_name = self.class_names[labels[i] - 1] text_width = len(class_name) * 20 x1, y1, x2, y2 = (np.round(bbox[i, :])).astype(np.int) cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness=3) cv2.putText(frame, str(entity_id), (x1 + 5, y1 + 40), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color, thickness=3) # Draw black background rectangle for test cv2.rectangle(frame, (x1 - 5, y1 - 25), (x1 + text_width, y1), color, -1) cv2.putText(frame, '{}'.format(class_name), (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), thickness=2) return frame
def bboxArray_to_bboxList(bboxes_batch_array, bboxes_length_batch_array, W_batch_array, H_batch_array): bbox_list_list = [] for bboxes_array, bboxes_length, W, H in zip(bboxes_batch_array, bboxes_length_batch_array, W_batch_array, H_batch_array): bbox_list = BoxList(bboxes_array[:bboxes_length, :], (W, H), "xywh") bbox_list = bbox_list.convert('xyxy') bbox_list_list.append(bbox_list) return bbox_list_list
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] #decode2cxywh = self.nms_func.input_mode == 'cxywh' decode2cxywh = False if decode2cxywh: proposals = self.box_coder.decode2cxywh(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) mode = 'cxywh' else: proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) mode = 'xyxy' proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode=mode) boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = self.nms_func(boxlist) boxlist = boxlist.convert('xyxy') result.append(boxlist) return result
def sample_pos_neg_idxs( gt, rois, fg_thres=pretrain_opts['overlap_pos'][0], bg_thres=pretrain_opts['overlap_neg'][1], fg_num=pretrain_opts['batch_pos'] * pretrain_opts['batch_frames'], bg_num=pretrain_opts['batch_neg'] * pretrain_opts['batch_frames']): if len(gt) != len(rois): assert False, "gt size {} is not same with rois size {}".format( len(gt), len(rois)) gt = torch.from_numpy(gt).cuda() proposal_matcher = Matcher(fg_thres, bg_thres) total_matched_idxs = torch.LongTensor([]).cuda() for i_gt, i_roi in zip(gt, rois): i_gt = BoxList(i_gt[None, :], i_roi.size, mode="xywh") i_gt = i_gt.convert("xyxy") match_quality_matrix = boxlist_iou(i_gt, i_roi) matched_idxs = proposal_matcher(match_quality_matrix) total_matched_idxs = torch.cat([total_matched_idxs, matched_idxs ]) # 0 is fg, -1 is bg, -2 is fg<>bg pos_idx = torch.nonzero(total_matched_idxs == 0).squeeze(1) neg_idx = torch.nonzero(total_matched_idxs == -1).squeeze(1) # randomly select positive and negative examples num_pos = min(pos_idx.numel(), fg_num) num_neg = min(neg_idx.numel(), bg_num) if len(pos_idx) >= fg_num: perm1 = torch.randperm(pos_idx.numel(), device=pos_idx.device)[:fg_num] perm2 = torch.randperm(neg_idx.numel(), device=neg_idx.device)[:bg_num] elif len(pos_idx) > 0: perm1 = torch.randint(0, pos_idx.size(0), (fg_num, )).type(torch.LongTensor) perm2 = torch.randint(0, neg_idx.size(0), (bg_num, )).type(torch.LongTensor) else: return None, None pos_idx = pos_idx[perm1] neg_idx = neg_idx[perm2] return pos_idx, neg_idx
def convert_kitti_instance_only(root, ann_file, out_dir, dataset): image_index, label_list, boxes_list, boxes_3d_list, \ alphas_list = get_pkl_element(ann_file) number_image = len(image_index) image_lists = [] calib_lists = [] depth_list = [] for i in range(number_image): image_lists.append(root + '/training' + '/image_2/' + image_index[i] + ".png") calib_lists.append(root + '/training' + '/calib/' + image_index[i] + ".txt") depth_list.append(root + '/training' + '/depth/' + image_index[i] + "_01.png.npz") # img_id = 0 # ann_id = 0 img_id = 3712 ann_id = 11855 # cat_id = 1 category_dict = {'car': 1} category_instancesonly = [ 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle', ] ann_dict = {} images = [] annotations = [] for i, id in image_index.items(): if len(images) % 50 == 0: print("Processed %s images, %s annotations" % (len(images), len(annotations))) image = {} image['id'] = img_id img_id += 1 img = Image.open(image_lists[i]).convert("RGB") width, height = img.size image['width'] = width image['height'] = height image['file_name'] = image_lists[i].split('/')[-1] image['seg_file_name'] = image['file_name'] images.append(image) num_instances = label_list[i].shape[0] boxes = boxes_list[i] boxes = torch.as_tensor(boxes).reshape(-1, 4) box2d = BoxList(boxes, img.size, mode="xyxy") area = box2d.area().tolist() boxes = box2d.convert('xywh') boxes = boxes.bbox.tolist() for j in range(num_instances): ann = {} ann['id'] = ann_id ann_id += 1 ann['image_id'] = image['id'] ann['segmentation'] = [] ann['category_id'] = category_dict['car'] ann['iscrowd'] = 0 ann['area'] = area[j] ann['bbox'] = boxes[j] annotations.append(ann) ann_dict['images'] = images categories = [{ "id": category_dict[name], "name": name } for name in category_dict] ann_dict['categories'] = categories ann_dict['annotations'] = annotations print("Num categories: %s" % len(categories)) print("Num images: %s" % len(images)) print("Num annotations: %s" % len(annotations)) with open( os.path.join(out_dir, 'instancesonly_filtered_gtFine_' + dataset + '.json'), 'w') as outfile: outfile.write(json.dumps(ann_dict))
def inference(self, colors_pred, add_class_names=None, save_path=None, save_independently=None, show_ground_truth=True): """ Do Inference, either show the boxes or the masks """ # load the config paths_catalog = import_file("maskrcnn_benchmark.config.paths_catalog", cfg.PATHS_CATALOG, True) DatasetCatalog = paths_catalog.DatasetCatalog test_datasets = DatasetCatalog.get(cfg.DATASETS.TEST[0]) img_dir = test_datasets['args']['root'] anno_file = test_datasets['args']['ann_file'] data = json.load(open(anno_file)) coco = COCO(anno_file) predis = [] filenames = [] # iterate through data for i, image in enumerate(data['images']): pil_img = Image.open(img_dir + '/' + image['file_name']) filenames.append(image['file_name']) img = np.array(pil_img)[:, :, [0, 1, 2]] # get ground truth boxes or masks anno = [ obj for obj in data['annotations'] if obj['image_id'] == image['id'] ] classes = [ obj['category_id'] for obj in data['annotations'] if obj['image_id'] == image['id'] ] json_category_id_to_contiguous_id = { v: i + 1 for i, v in enumerate(coco.getCatIds()) } classes = [json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) boxes = [obj['bbox'] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) target = BoxList(boxes, pil_img.size, mode='xywh').convert('xyxy') target.add_field('labels', classes) masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size) target.add_field("masks", masks) target = target.clip_to_image(remove_empty=True) # these are the ground truth polygons polygons = [] color_rgb = [[255, 101, 80], [255, 55, 55], [255, 255, 61], [255, 128, 0]] colors = { i: [s / 255 for s in color] for i, color in enumerate(color_rgb) } color = [colors[i.item()] for i in classes] # ground truth boxes boxes = [] polys = vars(target)['extra_fields']['masks'] for polygon in polys: try: tenso = vars(polygon)['polygons'][0] except KeyError: continue poly1 = tenso.numpy() poly = poly1.reshape((int(len(poly1) / 2), 2)) polygons.append(Polygon(poly)) xywh_tar = target.convert("xywh") for box in vars(xywh_tar)['bbox'].numpy(): rect = Rectangle((box[0], box[1]), box[2], box[3]) boxes.append(rect) # compute predictions predictions = self.compute_prediction(img) predis.append(predictions) top_predictions = self.select_top_predictions(predictions) polygons_predicted, colors_prediction = self.overlay_mask( img, top_predictions, colors_pred, inference=True) #print(colors_prediction) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.imshow(Image.fromarray(img)) ax.axis('off') # this is for ground thruth if show_ground_truth == True: p = PatchCollection(polygons, facecolor='none', linewidths=0, alpha=0.4) ax.add_collection(p) p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) ax.add_collection(p) # this is for prediction ppd = PatchCollection(polygons_predicted, facecolor='none', linewidths=0, alpha=0.4) ax.add_collection(ppd) ppd = PatchCollection(polygons_predicted, facecolor='none', edgecolors=colors_prediction, linewidths=2) ax.add_collection(ppd) plt.savefig(save_path + image['file_name'], dpi=200, bbox_inches='tight', pad_inches=0) plt.show() dic = {} for i in range(len(filenames)): dic[filenames[i]] = predis[i] return dic
def prepare_for_coco_detection_mstest(predictions, dataset): # pdb.set_trace() predictions_s = predictions[0] predictions_m = predictions[1] predictions_l = predictions[2] dataset_s = dataset[0] dataset_m = dataset[1] dataset_l = dataset[2] coco_results = [] # one image. for image_id, predictions in enumerate( zip(predictions_s, predictions_m, predictions_l)): prediction_s = predictions[0] prediction_m = predictions[1] prediction_l = predictions[2] original_id = dataset_l.id_to_img_map[image_id] if len(predictions_l) == 0: continue img_info = dataset_l.get_img_info(image_id) image_width = img_info["width"] image_height = img_info["height"] img_id_json = img_info['id'] # rescale predict bbox to original images size. prediction_s = prediction_s.resize((image_width, image_height)) prediction_m = prediction_m.resize((image_width, image_height)) prediction_l = prediction_l.resize((image_width, image_height)) # get single-scale results from type BoxList. bbox_s = prediction_s.bbox score_s = prediction_s.get_field('scores').unsqueeze(1) label_s = prediction_s.get_field('labels').unsqueeze(1) bbox_m = prediction_m.bbox score_m = prediction_m.get_field('scores').unsqueeze(1) label_m = prediction_m.get_field('labels').unsqueeze(1) bbox_l = prediction_l.bbox score_l = prediction_l.get_field('scores').unsqueeze(1) label_l = prediction_l.get_field('labels').unsqueeze(1) # concat single-scale result and convert to type BoxList. (small, medium, large) min_size = 0 w = prediction_l.size[0] h = prediction_l.size[1] detections = torch.from_numpy(np.row_stack( (bbox_s, bbox_m, bbox_l))).cuda() per_class = torch.from_numpy(np.row_stack( (label_s, label_m, label_l))).cuda() per_class = torch.squeeze(per_class, dim=1) per_box_cls = torch.from_numpy( np.row_stack((score_s, score_m, score_l))).cuda() per_box_cls = torch.squeeze(per_box_cls, dim=1) boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", per_box_cls) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, min_size) # multi-scale results apply NMS. (small, medium, large) nms_method = cfg.TEST.MS_TEST_NMS nms_thresh = cfg.TEST.MS_TEST_NMS_THR num_classes = 81 scores = boxlist.get_field("scores") labels = boxlist.get_field("labels") boxes = boxlist.bbox result = [] # multi-scale test + NMS for j in range(1, num_classes): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) if nms_method == "nms": boxlist_for_class = boxlist_nms(boxlist_for_class, nms_thresh, score_field="scores") elif nms_method == "soft_nms": boxlist_for_class = boxlist_soft_nms(boxlist_for_class, nms_thresh, score_field="scores") else: print('the nms method is wrong') num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=scores.device)) result.append(boxlist_for_class) result = cat_boxlist(result) boxlist = result boxlist = boxlist.convert("xywh") boxes = boxlist.bbox.tolist() scores = boxlist.get_field("scores").tolist() labels = boxlist.get_field("labels").tolist() mapped_labels = [ dataset_l.contiguous_category_id_to_json_id[int(i)] for i in labels ] coco_results.extend([{ "image_id": original_id, "category_id": mapped_labels[k], "bbox": box, "score": scores[k], } for k, box in enumerate(boxes)]) return coco_results
def prepare_for_vrd_detection(predictions, dataset): # assert isinstance(dataset, COCODataset) vrd_results = [] for image_id, prediction in enumerate(predictions): original_id = dataset.ann_file[image_id]['filename'] # if len(prediction) == 0: # continue # TODO replace with get_img_info? image_width = dataset.ann_file[image_id]["width"] image_height = dataset.ann_file[image_id]["height"] subject_boundingboxes = prediction.get_field("subject_boundingboxes") object_boundingboxes = prediction.get_field("object_boundingboxes") prediction_size = prediction.size prediction_sub = BoxList(subject_boundingboxes, prediction_size, mode="xyxy") prediction_ob = BoxList(object_boundingboxes, prediction_size, mode="xyxy") prediction = prediction.resize((image_width, image_height)) prediction_sub = prediction_sub.resize((image_width, image_height)) prediction_ob = prediction_ob.resize((image_width, image_height)) prediction_sub = prediction_sub.convert("xywh") prediction = prediction.convert("xywh") prediction_ob = prediction_ob.convert("xywh") boxes = prediction.bbox.tolist() subject_boundingboxes = prediction_sub.bbox.tolist() object_boundingboxes = prediction_ob.bbox.tolist() subject_category = prediction.get_field("subject_category").tolist() object_category = prediction.get_field("object_category").tolist() subject_scores = prediction.get_field("subject_scores").tolist() object_scores = prediction.get_field("object_scores").tolist() objectpairs_scores = prediction.get_field( "objectpairs_scores").tolist() predicate_scores = prediction.get_field("predicate_scores").tolist() ids = prediction.get_field("ids").tolist() a = {} a.update(filename=original_id) a.update(height=image_height) a.update(width=image_width) a.update(objects_num=len(prediction)) objects = [{ "subject_boundingboxes": subject_boundingboxes[k], "object_boundingboxes": object_boundingboxes[k], "subject_category": subject_category[k], "object_category": object_category[k], "subject_scores": subject_scores[k], "object_scores": object_scores[k], "objectpairs_scores": objectpairs_scores[k], "predicate_scores": predicate_scores[k], "ids": ids[k], } for k, box in enumerate(boxes)] a.update(objects=objects) vrd_results.append(a) return vrd_results