def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] if cfg.ROTATE: boxes8 = torch.as_tensor(boxes).reshape(-1, 8) target = BoxList(boxes8, img.size, mode="xy8").convert("xy854") else: boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size) target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # 过滤 crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] # 把过滤之后的annotations对应的box转换成BoxList对象, 注意这些box可能属于多个类别 boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") # 将boxes的labels作为属性添加到target对象当中 classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size) target.add_field("masks", masks) # TODO: 关键点代码 if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def __getitem__(self, idx): import pdb;pdb.set_trace() print("you have reached micr datatset get method") img, anno = super(MICRDataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def forward(self, x, boxes, features): mask_prob = x keypoints_features = x scores = None # print(mask_prob.shape) if self.keypointer: mask_prob, scores = self.keypointer(x, boxes) assert len(boxes) == 1, "Only non-batched inference supported for now" boxes_per_image = [box.bbox.size(0) for box in boxes] mask_prob = mask_prob.split(boxes_per_image, dim=0) # print([yy.shape for yy in y]) keypoints_features = keypoints_features.split(boxes_per_image, dim=0) # print(feature.shape) scores = scores.split(boxes_per_image, dim=0) results = [] # print(boxes_per_image) for prob, box, score, feature in zip(mask_prob, boxes, scores, keypoints_features): bbox = BoxList(box.bbox, box.size, mode="xyxy") for field in box.fields(): bbox.add_field(field, box.get_field(field)) prob = PersonKeypoints(prob, box.size) prob.add_field("logits", score) prob.add_field("feature", feature) bbox.add_field("keypoints", prob) results.append(bbox) return results
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # img: an PIL image('RBG') # anno: [{'segmentation': [[164.5, 479.38, 120.26, 448.4, 93.7, 442.87, 91.5, 440.65, # 36.17, 383.12, 28.43, 384.23, 35.06, 382.02, 4.1, 307.89, 9.62, 297.94, 6.3, 224.92, # 0.0, 224.92, 9.62, 219.4, 38.39, 146.38, 52.77, 143.06, 111.4, 88.85, 106.97, 78.89, # 119.16, 83.32, 204.34, 61.2, 203.23, 50.12, 208.76, 57.87, 302.8, 70.04, 302.8, 63.4, # 306.12, 71.15, 383.55, 120.92, 387.98, 117.62, 383.55, 124.25, 433.34, 193.94, 439.97, # 192.83, 433.34, 199.48, 452.14, 274.71, 457.68, 274.71, 451.05, 280.23, 434.45, 355.47, # 436.67, 364.33, 428.92, 358.79, 395.72, 404.15, 380.23, 327.81, 362.54, 318.96, 341.52, # 310.11, 344.84, 255.9, 344.84, 221.6, 332.67, 200.59, 326.03, 197.26, 318.29, 171.82, # 288.42, 160.76, 265.18, 157.44, 245.27, 162.98, 229.78, 171.82, 207.65, 195.05, 203.23, # 250.36, 220.94, 295.72, 232.0, 307.89, 232.0, 376.49, 223.14, 375.39, 225.35, 318.96, # 194.37, 323.38, 182.21, 338.87, 171.15, 393.09, 191.07, 480.47, 162.3, 480.47], # [226.46, 416.32, 220.94, 463.89, 230.89, 468.31, 229.78, 411.89]], # 'area': 97486.80810000001, # 'iscrowd': 0, # 'image_id': 36, # 'bbox': [0.0, 50.12, 457.68, 430.35], 'category_id': 28, 'id': 284996}, # {....}...... ] # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") # target: BoxList(object): [] classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) # target: BoxList(object).extra_fields = {"labels":classes} masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size) target.add_field("masks", masks) # target: BoxList(object).extra_fields = {"labels":classes, "masks": masks} if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def __getitem__(self, idx): img, anno = super(DukeMTMCDataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) # if anno and "segmentation" in anno[0]: # masks = [obj["segmentation"] for obj in anno] # masks = SegmentationMask(masks, img.size, mode='poly') # target.add_field("masks", masks) # add a dummy mask masks = [] for obj in anno: xywh = obj["bbox"] eps = 1e-6 rect = [[ xywh[0] + eps, xywh[0] + eps, xywh[0] + xywh[2] - eps, xywh[0] + xywh[2] - eps, xywh[1] + eps, xywh[1] + xywh[3] - eps, xywh[1] + xywh[3] - eps, xywh[1] + eps ]] masks.append(rect) masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) if anno and "depth" in anno[0]: depth = [1. / obj["depth"] for obj in anno] depth = torch.tensor(depth) target.add_field("depths", depth) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def forward(self, x, edges, boxes): graph_prob = x scores = None if self.keypointer: graph_prob, scores = self.keypointer(x, edges, boxes) assert len(boxes) == 1, "Only non-batched inference supported for now" boxes_per_image = [box.bbox.size(0) for box in boxes] graph_prob = graph_prob.split(boxes_per_image, dim=0) scores = scores.split(boxes_per_image, dim=0) results = [] for prob, box, score in zip(graph_prob, boxes, scores): bbox = BoxList(box.bbox, box.size, mode="xyxy") for field in box.fields(): bbox.add_field(field, box.get_field(field)) prob = PersonKeypoints(prob, box.size) prob.add_field("logits", score) bbox.add_field("keypoints", prob) results.append(bbox) return results
def __getitem__(self, idx): for th in range(0): try: img, anno = super(COCODataset, self).__getitem__(idx) break except: #idx += 1 from boxx import pred pred - "\n\n%sth times to try `img, anno = super(COCODataset, self).__getitem__(idx)`, idx=%s\n\n" % ( th, idx) pass tryTimes = 0 while 1: try: img, anno = super(COCODataset, self).__getitem__(idx + int(tryTimes // 3)) break except: tryTimes += 1 pass # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size) target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def __getitem__(self, idx): #img, anno = super(COCODataset, self).__getitem__(idx) coco = self.coco img_id = self.ids[idx] ann_ids = coco.getAnnIds(imgIds=img_id) anno = coco.loadAnns(ann_ids) path = coco.loadImgs(img_id)[0]['file_name'] if isinstance(self.root, list): root = [r for r in self.root if path.split('_')[1] in r][0] else: root = self.root img = Image.open(os.path.join(root, path)).convert('RGB') if self.transform is not None: img = self.transform(img) if self.target_transform is not None: anno = self.target_transform(anno) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def __getitem__(self, idx): img, anno = super(DOTARotateDataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] # scale_seed = random.uniform(0.4,1) # img_tem = img1.resize((int(img1.size[0]*scale_seed), int(img1.size[1]*scale_seed))) # img = Image.new('RGB', (img1.size[0], img1.size[1]), (255, 255, 255)) # img.paste(img_tem, (0, 0)) # ori # boxes = [obj["bbox"] for obj in anno] boxes = self.conver4xyTO4xy(anno) # if(len(boxes)<=0): # print('scale: ', scale_seed) # img = img1 # new # boxes = self.conver4xyTOxywha(boxes) # print(len(boxes)) boxes = torch.as_tensor(boxes).reshape(-1, 8) # guard against no boxes target = RBoxList(boxes, img.size, mode="4xy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) # masks = [obj["segmentation"] for obj in anno] # masks = SegmentationMask(masks, img.size, mode='poly') # target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) if (len(target) >= 800): target = target[0:800] # print(len(target)) return img, target, idx
def filltarget(ann, size): labels = getlabels(ann) boxes = getbbox(ann) target = BoxList(boxes, size, mode="xyxy") target.add_field("labels", labels) styles = getstyles(ann) target.add_field("styles", styles) masks = getseg(ann) masks = SegmentationMask(masks, size, mode='poly') target.add_field("masks", masks) keypoints = getkeypoints(ann) keypoints = PersonKeypoints(keypoints, size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) return target
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) img_id = self.id_to_img_map[idx] img_name = self.coco.imgs[img_id]['file_name'] semgt, semgt_cnt = gt_name_to_label(img_name) if self._transforms is not None: img, target, semgt, semgt_cnt = self._transforms(img, target, semgt, semgt_cnt) # print('img', img) # print('semgt', semgt) # semgt, sempath = gt_name_to_label(img_name) # print('semgt size', semgt.size) # h, w = img.size()[1], img.size()[2] # sem_gt_list = torch.zeros(1, h, w).long() # sem_gt_list[0] = bisem_tensor # semgt_new = torch.cat([semgt, semgt_cnt], dim=0) # print(semgt_new.size()) return img, target, semgt
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] # decode RLEs to masks masks = mask_utils.decode(masks) # hxwxn # the training size for mask mask_size = self.resize.get_size(img.size) # (h, w) mask_size = (mask_size[1], mask_size[0]) # (w, h) # resize mask for saving memory mask_list = [] for i in range(masks.shape[-1]): mask = cv2.resize(masks[:, :, i], mask_size, interpolation=cv2.INTER_NEAREST) mask_list.append(torch.from_numpy(mask)) masks = torch.stack(mask_list, dim=0).clone() # nxhxw masks = SegmentationMask(masks, mask_size, mode='mask') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) print("img = ", img) print("anno = ", anno) # ipdb.set_trace() # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] print("after filter crowd annotations, anno = ", anno) for k, ann in enumerate(anno): # print("ann keypoints = ", ann['keypoints']) # ipdb.set_trace() joints_3d = np.array(ann['keypoints']).reshape( 17, 3) ##17代表关键点个数,3代表(x, y, v),v为{0:不存在, 1:存在但不可见, 2:存在且可见} x, y, w, h = list(map(int, np.array(ann['bbox']))) keypoints_to_heat_map(keypoints, rois, heatmap_size) ipdb.set_trace() boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) # if anno and "segmentation" in anno[0]: # masks = [obj["segmentation"] for obj in anno] # masks = SegmentationMask(masks, img.size, mode='poly') # target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) print("keypoints = ", keypoints) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] print(len(boxes)) print(boxes[0]) boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes print(boxes) print(img.size) target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] print(self.categories) print(self.coco.getCatIds()) print(classes) classes = [self.json_category_id_to_contiguous_id[c] for c in classes] print(classes) classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') print(masks) target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) print(img) print(img.shape) sys.exit(1) return img, target, idx
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) else: print(f"\n WARNING_ NO SEGMENTATION: {idx} \n") if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) if boxes.shape[0] != len(masks.instances.polygons): print("========= PRINT INFO =========") print(f"INDEX: [{idx}]") print(f"BBOX: [{boxes.shape}]") print(f"SEGM: [{len(masks.instances.polygons)}]") print("========= PRINT END =========") raise RuntimeError("BOX and SEGM inconsistant") target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] #masks = [poly2rle(segm, img.size[1], img.size[0]) for segm in masks] masks = [ generate_pyramid_label( img.size[1], img.size[0], np.array(segm, dtype=np.float32).reshape(-1, 2)) for segm in masks ] #for mask in masks: # cv2.imwrite(f'/content/sample_data/{idx}.jpg', mask*255.0) masks = [torch.as_tensor(mask) for mask in masks] masks = SegmentationMask(masks, img.size, mode='mask') #masks = masks.convert("mask") target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] # need by DanielCocoEval # non_crowd = [obj for obj in anno if obj["iscrowd"] == 0] # crowd = [obj for obj in anno if obj["iscrowd"] == 1] # anno = non_crowd + crowd # num_crowd = len(crowd) boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") # need by DanielCocoEval # target.num_crowd = num_crowd classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = torch.stack( [torch.from_numpy(self.coco.annToMask(obj)) for obj in anno]) masks = SegmentationMask(masks, img.size, mode='mask') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # ########################## add by hui ######################################## img_info = self.get_img_info(idx) if 'corner' in img_info: img = img.crop(img_info['corner']) ################################################################################ # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] # ######################### add by hui #################################### if self.filter_ignore and anno and "ignore" in anno[ 0]: # filter ignore out anno = [obj for obj in anno if not obj["ignore"]] ########################################################################### boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size) target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def __getitem__(self, idx): coco_id = self.id_to_img_map[idx] root_dir = self.root file_name = self.coco.loadImgs(coco_id)[0]['file_name'] im_path = os.path.join(root_dir, file_name) img = np.array(tifffile.imread(im_path)) anno = self.coco.loadAnns(self.coco.getAnnIds(coco_id, iscrowd=None)) img_size = img.shape[:2] # filter crowd annotations anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img_size, mode="xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img_size, mode='poly') # png_file = root_dir + '/../data_labels/' + file_name.split('_')[0] + '_' + file_name.split('_')[1] + '.png' # png_mask = np.array(Image.open(png_file)).astype(np.uint8) # png_mask = torch.from_numpy(np.array([(png_mask == i) for i in range(1, png_mask.max()+1)]).astype(np.uint8)) # masks = SegmentationMask(png_mask, img_size, mode='mask') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img_size) target.add_field("keypoints", keypoints) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) # Groundtruth # masks = [obj["segmentation"] for obj in anno] # Assign whole bbox as mask masks = [[[ obj["bbox"][0], obj["bbox"][1], obj["bbox"][0], obj["bbox"][1] + obj["bbox"][3], obj["bbox"][0] + obj["bbox"][2], obj["bbox"][1] + obj["bbox"][3], obj["bbox"][0] + obj["bbox"][2], obj["bbox"][1] ]] for obj in anno ] # Noted that we do not utilize any mask-level annotation masks = SegmentationMask(masks, img.size) target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field # anno = [obj for obj in anno if obj["iscrowd"] == 0] anno = [obj for obj in anno] # print(anno) boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) # masks = [ # mask_utils.frPyObjects(obj["segmentation"], obj["width"], obj["height"]) # for obj in anno # ] # masks = SegmentationMask(masks, img.size, mode="mask") # target.add_field("masks", masks) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def __getitem__(self, idx): # idx = 12 img, anno = super(COCODataset, self).__getitem__(idx) # img.save('/home/w/workspace/onnx/maskrcnn-benchmark/demo/test_yolo.jpg') # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) # target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) # img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # img = cv2.rectangle(img, (int(target.bbox[0][0]), int(target.bbox[0][1])), (int(target.bbox[0][2]), int(target.bbox[0][3])), (255, 0, 0), 2) # cv2.imshow("OpenCV", img) # cv2.waitKey() return img, target, idx
def match_targets_to_proposals(self, proposal, target, copied_fields=[]): if target: match_quality_matrix = boxlist_iou(target, proposal) matched_idxs = self(match_quality_matrix) # RPN doesn't need any fields from target # Fast RCNN only need "labels" field for selecting the targets # Mask RCNN needs "labels" and "masks "fields for creating the targets # Keypoint RCNN needs "labels" and "keypoints "fields for creating the targets target = target.copy_with_fields(copied_fields) # get the targets corresponding GT for each proposal # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds matched_targets = target[matched_idxs.clamp(min=0)] else: device = proposal.bbox.device matched_targets = proposal.copy_with_fields([]) matched_idxs = torch.full((len(proposal), ), Matcher.BELOW_LOW_THRESHOLD, device=device, dtype=torch.int64) if "labels" in copied_fields: matched_labels = torch.zeros_like(matched_idxs) matched_targets.add_field("labels", matched_labels) if "masks" in copied_fields: # don't care about masks, since they will never be used all all pass if "keypoints" in copied_fields: # make psudo kps keypoints = torch.zeros((len(proposal), 3), device=device, dtype=torch.float32) matched_kps = PersonKeypoints(keypoints, proposal.size) matched_targets.add_field("keypoints", matched_kps) matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def __getitem__(self, idx): # img, anno = super(KITTILR3DDataset, self).__getitem__(idx) coco = self.coco img_id = self.ids[idx] ann_ids = coco.getAnnIds(imgIds=img_id) anno = coco.loadAnns(ann_ids) coco = self.coco img_id = self.ids[idx] img_info = coco.loadImgs(img_id)[0] path = coco.loadImgs(img_id)[0]['file_name'] right_path = coco.loadImgs(img_id)[0]['right_file_name'] img = Image.open(os.path.join(self.root, path)).convert('RGB') right_img = Image.open(os.path.join(self.root, right_path)).convert('RGB') if self.transform is not None: # img = self.transform(img) # right_img = self.transform(right_img) (img, right_img) = self.transform((img, right_img)) if self.target_transform is not None: anno = self.target_transform(anno) if len(self.class_filter_list) > 0: anno = [ obj for obj in anno if obj["category_id"] in self.class_filter_list ] # filter crowd annotations # TODO might be better to add an extra field if self.remove_truncated and len([ obj for obj in anno if obj["truncated"] < self.truncated_threshold ]) > 0: anno = [ obj for obj in anno if obj["truncated"] < self.truncated_threshold ] # else: # anno = [obj for obj in anno] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") if anno and "bbox_right" in anno[0] and not self.box_right_from_depth: boxes_right = [obj["bbox_right"] for obj in anno] boxes_right = torch.as_tensor(boxes_right).reshape( -1, 4) # guard against no boxes right_target = BoxList(boxes_right, img.size, mode="xywh").convert("xyxy") else: boxes_right = [obj["bbox"] for obj in anno] boxes_right = torch.as_tensor(boxes_right).reshape( -1, 4) # guard against no boxes # transform left to right if anno and self.depth_key in anno[0]: depth = [obj[self.depth_key] for obj in anno] # depth = torch.tensor(depth) depth = PointDepth( depth, img.size, focal_length=img_info["camera_params"]["intrinsic"]["fx"], baseline=img_info["camera_params"]["extrinsic"] ["baseline"], min_value=self.depth_range[0], max_value=self.depth_range[1], mode=self.input_depth_mode) disp = depth.convert("disp").depths boxes_right[:, 0] -= disp right_target = BoxList(boxes_right, right_img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) right_target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) if anno and self.depth_key in anno[0]: depth = [obj[self.depth_key] for obj in anno] # depth = torch.tensor(depth) depth = PointDepth( depth, img.size, focal_length=img_info["camera_params"]["intrinsic"]["fx"], baseline=img_info["camera_params"]["extrinsic"]["baseline"], min_value=self.depth_range[0], max_value=self.depth_range[1], mode=self.input_depth_mode) depth = depth.convert(self.output_depth_mode) target.add_field("depths", depth) right_target.add_field("depths", depth) if anno and "dim" in anno[0]: dim = [obj["dim"] for obj in anno] dim = torch.tensor(dim) target.add_field("dims", dim) right_target.add_field("dims", dim) if anno and "box_center" in anno[0]: center = [[obj["box_center"]] + [obj["box_center"]] for obj in anno] center_box = torch.tensor(center).reshape(-1, 4) # add center as a boxlist so that it can be resized target.add_field("centers", BoxList(center_box, img.size, mode="xyxy")) # add as keypoints keypoints = [obj["box_center"] + [1.] for obj in anno] keypoints = BoxCenterKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) if anno and "alpha" in anno[0]: alphas = [obj["alpha"] for obj in anno] # convert alpha to binary-scale orientation (from CenterNet) rotbin = torch.zeros(len(alphas), 2, dtype=torch.long) rotres = torch.zeros(len(alphas), 2) for k, alpha in enumerate(alphas): if alpha < math.pi / 6. or alpha > 5 * math.pi / 6.: rotbin[k, 0] = 1 rotres[k, 0] = alpha - (-0.5 * math.pi) if alpha > -math.pi / 6. or alpha < -5 * math.pi / 6.: rotbin[k, 1] = 1 rotres[k, 1] = alpha - (0.5 * math.pi) # ori = torch.tensor(alpha) alphas = torch.tensor(alphas) target.add_field("alphas", alphas) target.add_field("rotbins", rotbin) target.add_field("rotregs", rotres) right_target.add_field("alphas", alphas) right_target.add_field("rotbins", rotbin) right_target.add_field("rotregs", rotres) # Only for ground truth visualization if anno and "location" in anno[0]: location = [obj["location"] for obj in anno] location = torch.tensor(location) target.add_field("locations", location) right_target.add_field("locations", location) if anno and "rotation_y" in anno[0]: ry = [obj["rotation_y"] for obj in anno] ry = torch.tensor(ry) target.add_field("rys", ry) right_target.add_field("rys", ry) target = target.clip_to_image(remove_empty=False) right_target = right_target.clip_to_image(remove_empty=False) if self._transforms is not None: # (img, right_img), target = self._transforms((img, right_img), target) img, target = self._transforms(img, target) right_img, right_target = self._transforms(right_img, right_target) # samples = { # "images_left" : img, # "images_right" : right_img, # "targets_left" : target, # "targets_right" :right_target, # "idx" : idx # } # [print(a.bbox[0]) for a in [target, right_target]] img = dict(images=img, images_right=right_img, img_info=img_info) target = dict(targets=target, targets_right=right_target) # if self.is_train: # return img, right_img, target, right_target, idx # else: # return img, target, idx return img, target, idx
def __getitem__(self, idx): img, anno = super(KITTI3DDataset, self).__getitem__(idx) coco = self.coco img_id = self.ids[idx] img_info = coco.loadImgs(img_id)[0] # filter crowd annotations # TODO might be better to add an extra field if self.remove_truncated: anno = [obj for obj in anno if obj["truncated"] < self.truncated_threshold] if len(self.class_filter_list) > 0: anno = [obj for obj in anno if obj["category_id"] in self.class_filter_list] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) if anno and self.depth_key in anno[0]: depth = [obj[self.depth_key] for obj in anno] # depth = torch.tensor(depth) depth = PointDepth(depth, img.size, focal_length=img_info["camera_params"]["intrinsic"]["fx"], baseline=img_info["camera_params"]["extrinsic"]["baseline"], min_value=self.depth_range[0], max_value=self.depth_range[1], mode=self.input_depth_mode) depth = depth.convert(self.output_depth_mode) target.add_field("depths", depth) if anno and "dim" in anno[0]: dim = [obj["dim"] for obj in anno] dim = torch.tensor(dim) target.add_field("dims", dim) if anno and "box_center" in anno[0]: center = [[obj["box_center"]]+[obj["box_center"]] for obj in anno] center_box = torch.tensor(center).reshape(-1, 4) # add center as a boxlist so that it can be resized target.add_field("centers", BoxList(center_box, img.size, mode="xyxy")) # add as keypoints keypoints = [obj["box_center"]+[1.] for obj in anno] keypoints = BoxCenterKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) if anno and "location" in anno[0]: location = [obj["location"] for obj in anno] location = torch.tensor(location) target.add_field("locations", location) if anno and "rotation_y" in anno[0]: ry = [obj["rotation_y"] for obj in anno] ry = torch.tensor(ry) target.add_field("rys", ry) if anno and "alpha" in anno[0]: alphas = [obj["alpha"] for obj in anno] # convert alpha to binary-scale orientation (from CenterNet) rotbin = torch.zeros(len(alphas), 2, dtype=torch.long) rotres = torch.zeros(len(alphas), 2) for k,alpha in enumerate(alphas): if alpha < math.pi / 6. or alpha > 5 * math.pi / 6.: rotbin[k,0] = 1 rotres[k,0] = alpha - (-0.5 * math.pi) if alpha > -math.pi / 6. or alpha < -5 * math.pi / 6.: rotbin[k,1] = 1 rotres[k,1] = alpha - (0.5 * math.pi) # ori = torch.tensor(alpha) alphas = torch.tensor(alphas) target.add_field("alphas", alphas) target.add_field("rotbins", rotbin) target.add_field("rotregs", rotres) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def __getitem__(self, idx): # ''' # img is tensor now # ''' # img_a, target_a, idx_a = self.get_one_item(idx) # img_b, target_b, idx_b = self.get_one_item((idx+1) % len(self.ids)) # #merge them # #merge img # m = Beta(torch.tensor([1.5]), torch.tensor([1.5])) # cof_a = m.sample() # #cof_a = 0.5 # c,ha,wa = img_a.shape # c,hb,wb = img_b.shape # h,w = (max(ha,hb),max(wa,wb)) # img = img_a.new_zeros((c,h,w)) # img[:,:ha,:wa] = cof_a * img_a # img[:,:hb,:wb] = (1-cof_a) * img_b # #merge labels and masks # boxes = torch.cat([target_a.bbox,target_b.bbox],dim=0) # target = BoxList(boxes, (w,h), mode="xyxy") # classes = torch.cat([target_a.get_field('labels'),target_b.get_field('labels')],dim=0) # target.add_field("labels", classes) # masks = target_a.get_field("masks").instances.polygons + target_b.get_field("masks").instances.polygons # masks = SegmentationMask(masks, (w,h), mode='poly') # target.add_field("masks", masks) # # #add marks # # marks = [1]*target_a.bbox.size(0) + [0] * target_b.bbox.size(0) # # target.add_field("marks", torch.tensor(marks)) # cofs = [cof_a]*target_a.bbox.size(0) + [1-cof_a] * target_b.bbox.size(0) # target.add_field('cofs',torch.tensor(cofs)) # return img, target, idx # def get_one_item(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def __getitem__(self, k): im_ori_RGB = Image.open(self.img_files[k]).convert('RGB') # im_ori_RGB.size: (W, H with open(self.pickle_files[k], 'rb') as filehandle: data = pickle.load(filehandle) bboxes = data['bboxes'].astype(np.float32) # [xywh] assert len(bboxes.shape)==2 and bboxes.shape[1]==4 num_bboxes_ori = bboxes.shape[0] if 'label' in data: labels = data['label'] # ['car', 'person', 'person'] else: labels = ['person'] * num_bboxes_ori # bboxes = np.load(self.bbox_npy_files[k]).astype(np.float32) # [xywh] if bboxes.shape[0] > self.cfg.DATA.COCO.GOOD_NUM: bboxes = bboxes[:self.cfg.DATA.COCO.GOOD_NUM, :] labels = labels[:self.cfg.DATA.COCO.GOOD_NUM] target_boxes = torch.as_tensor(bboxes).reshape(-1, 4) # guard against no boxes target = BoxList(target_boxes, im_ori_RGB.size, mode="xywh").convert("xyxy") num_boxes = target.bbox.shape[0] if self.opt.est_kps: if 'kps' in data: kps_gt = data['kps'].astype(int) # [N, 51] if num_bboxes_ori > self.cfg.DATA.COCO.GOOD_NUM: kps_gt = kps_gt[:self.cfg.DATA.COCO.GOOD_NUM, :] kps_gt = kps_gt.tolist() # [[51]] else: kps_gt = [[0]*51 for i in range(num_boxes)] target_keypoints = PersonKeypoints(kps_gt, im_ori_RGB.size) # kps_sum = torch.sum(torch.sum(target_keypoints.keypoints[:, :, :2], 1), 1) # kps_mask = kps_sum != 0. # print(target_keypoints.keypoints.shape, kps_sum, kps_mask) target.add_field("keypoints", target_keypoints) # target.add_field("keypoints_mask", kps_mask) target = target.clip_to_image(remove_empty=True) classes = [1] * num_boxes # !!!!! all person (1) for now... classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) scores = torch.tensor([1.] * target.bbox.shape[0]) target.add_field("scores", scores) W, H = im_ori_RGB.size[:2] if self.train: yannick_results = loadmat(self.yannick_mat_files[k]) horizon_visible = yannick_results['horizon_visible'][0][0].astype(np.float32) assert horizon_visible == 1 horizon = yannick_results['pitch'][0][0].astype(np.float32) horizon_pixels_yannick = H * horizon v0 = H - horizon_pixels_yannick vfov = yannick_results['vfov'][0][0].astype(np.float32) f_pixels_yannick = H/2./(np.tan(vfov/2.)) else: f_pixels_yannick = -1 v0 = -1 im_yannickTransform = self.transforms_yannick(im_ori_RGB) # [0., 1.] by default im_maskrcnnTransform, target_maskrcnnTransform = self.transforms_maskrcnn(im_ori_RGB, target) # [0., 1.] by default # print('---', im.size(), np.asarray(im).shape) # im_array = np.asarray(im) # if len(im_array.shape)==2: # im_array = np.stack((im_array,)*3, axis=-1) # # print(im_array.shape) # x = torch.from_numpy(im_array.transpose((2,0,1))) if self.train and self.opt.est_kps: target_maskrcnnTransform.add_field("keypoints_ori", target_keypoints) target_maskrcnnTransform.add_field("boxlist_ori", target) target_maskrcnnTransform.add_field('img_files', [self.img_files[k]] * num_boxes) if self.train: y_person = 1.75 bbox_good_list = bboxes vc = H / 2. inv_f2_yannick = 1./ (f_pixels_yannick * f_pixels_yannick) yc_list = [] for bbox in bbox_good_list: vt = H - bbox[1] vb = H - (bbox[1] + bbox[3]) # v0_single = yc * (vt - vb) / y_person + vb yc_single = y_person * (v0 - vb) / (vt - vb) / (1. + (vc - v0) * (vc - vt) / f_pixels_yannick**2) yc_list.append(yc_single) yc_estCam = np.median(np.asarray(yc_list)) else: yc_estCam = -1 assert len(labels)==bboxes.shape[0] # im_ori_BGR_array = np.array(im_ori_RGB.copy())[:,:,::-1] return im_yannickTransform, im_maskrcnnTransform, W, H, \ float(yc_estCam), \ self.pad_bbox(bboxes, self.GOOD_NUM).astype(np.float32), bboxes.shape[0], float(v0), float(f_pixels_yannick), \ os.path.basename(self.img_files[k])[:12], self.img_files[k], target_maskrcnnTransform, labels
def __getitem__(self, k): im_ori_RGB = Image.open(self.img_files[k]).convert('RGB') # im_ori_RGB.size: (W, H with open(self.pickle_files[k], 'rb') as filehandle: data = pickle.load(filehandle) bboxes = data['bboxes'].astype(np.float32) # [xywh] assert len(bboxes.shape)==2 and bboxes.shape[1]==4 num_bboxes_ori = bboxes.shape[0] if 'label' in data: labels = data['label'] # ['car', 'person', 'person'] else: labels = ['person'] * num_bboxes_ori if bboxes.shape[0] > self.cfg.DATA.COCO.GOOD_NUM and self.cfg.DATA.COCO.CLIP_N_IN_MASKRCNN: bboxes = bboxes[:self.cfg.DATA.COCO.GOOD_NUM, :] labels = labels[:self.cfg.DATA.COCO.GOOD_NUM] target_boxes = torch.as_tensor(bboxes).reshape(-1, 4) # guard against no boxes target = BoxList(target_boxes, im_ori_RGB.size, mode="xywh").convert("xyxy") num_boxes = target.bbox.shape[0] if self.opt.est_kps: if 'kps' in data: kps_gt = data['kps'].astype(int) # [N, 51] if num_bboxes_ori > self.cfg.DATA.COCO.GOOD_NUM and self.cfg.DATA.COCO.CLIP_N_IN_MASKRCNN: kps_gt = kps_gt[:self.cfg.DATA.COCO.GOOD_NUM, :] kps_gt = kps_gt.tolist() # [[51]] else: kps_gt = [[0]*51 for i in range(num_boxes)] target_keypoints = PersonKeypoints(kps_gt, im_ori_RGB.size) target.add_field("keypoints", target_keypoints) # target.add_field("keypoints_mask", kps_mask) # target = target.clip_to_image(remove_empty=True) if self.opt.est_bbox: classes = [1] * num_boxes # !!!!! all person (1) for now... classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) scores = torch.tensor([1.] * target.bbox.shape[0]) target.add_field("scores", scores) target = target.clip_to_image(remove_empty=True) im_maskrcnnTransform, target_maskrcnnTransform = self.transforms_maskrcnn(im_ori_RGB, target) # [0., 1.] by default # Obsolete GT params from offline Yannick model W, H = im_ori_RGB.size[:2] f_pixels_yannick = -1 v0 = -1 yc_estCam = -1 if self.train_val: if self.opt.est_kps: target_maskrcnnTransform.add_field("keypoints_ori", target_keypoints) if self.opt.est_bbox: target_maskrcnnTransform.add_field("boxlist_ori", target) target_maskrcnnTransform.add_field('img_files', [self.img_files[k]] * num_boxes) assert len(labels)==bboxes.shape[0] mis = [self.coco_subset, self.img_idxes[k]] return im_maskrcnnTransform, W, H, \ float(yc_estCam), \ self.pad_bbox(bboxes, self.GOOD_NUM).astype(np.float32), bboxes.shape[0], float(v0), float(f_pixels_yannick), \ os.path.basename(self.img_files[k])[:12], self.img_files[k], target_maskrcnnTransform, labels, mis
def __getitem__(self, idx): # img, anno = super(CityScapesLRDataset, self).__getitem__(idx) coco = self.coco img_id = self.ids[idx] ann_ids = coco.getAnnIds(imgIds=img_id) anno = coco.loadAnns(ann_ids) path = coco.loadImgs(img_id)[0]['file_name'] right_path = coco.loadImgs(img_id)[0]['right_file_name'] img = Image.open(os.path.join(self.root, path)).convert('RGB') right_img = Image.open(os.path.join(self.root, right_path)).convert('RGB') if self.transform is not None: # img = self.transform(img) # right_img = self.transform(right_img) (img, right_img) = self.transform((img, right_img)) if self.target_transform is not None: anno = self.target_transform(anno) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") if anno and "bbox_right" in anno[0]: boxes_right = [obj["bbox_right"] for obj in anno] boxes_right = torch.as_tensor(boxes_right).reshape(-1, 4) # guard against no boxes right_target = BoxList(boxes_right, img.size, mode="xywh").convert("xyxy") else: boxes_right = [obj["bbox"] for obj in anno] boxes_right = torch.as_tensor(boxes_right).reshape(-1, 4) # guard against no boxes # transform left to right if anno and self.depth_key in anno[0]: depth = [obj[self.depth_key] for obj in anno] # depth = torch.tensor(depth) depth = PointDepth(depth, img.size, focal_length=img_info["camera_params"]["intrinsic"]["fx"], baseline=img_info["camera_params"]["extrinsic"]["baseline"], min_value=self.depth_range[0], max_value=self.depth_range[1], mode=self.input_depth_mode) disp = depth.convert("disp").depths boxes_right[:, 0] -= disp right_target = BoxList(boxes_right, right_img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) right_target.add_field("labels", classes) if anno and "segmentation" in anno[0]: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) # if anno and "height_rw" in anno[0]: # depth = [obj["height_rw"] for obj in anno] if anno and self.depth_key in anno[0]: depth = [obj[self.depth_key] for obj in anno] # depth = torch.tensor(depth) depth = PointDepth(depth, img.size, focal_length=img_info["camera_params"]["intrinsic"]["fx"], baseline=img_info["camera_params"]["extrinsic"]["baseline"], min_value=self.depth_range[0], max_value=self.depth_range[1], mode=self.input_depth_mode) depth = depth.convert(self.output_depth_mode) target.add_field("depths", depth) right_target.add_field("depths", depth) target = target.clip_to_image(remove_empty=False) right_target = right_target.clip_to_image(remove_empty=False) if self._transforms is not None: # (img, right_img), target = self._transforms((img, right_img), target) img, target = self._transforms(img, target) right_img, right_target = self._transforms(right_img, right_target) # samples = { # "images_left" : img, # "images_right" : right_img, # "targets_left" : target, # "targets_right" :right_target, # "idx" : idx # } img = dict(images=img, images_right=right_img, img_info=img_info) target = dict(targets=target, targets_right=right_target) # tmp fix to enable evaluation # if self.is_train or (self.lr_test and not self.is_train): # return img, right_img, target, right_target, idx # else: # return img, target, idx # return samples return img, target, idx