class cocoReward(Reward): def __init__(self, ann_path): self.ann_path = ann_path self.coco = COCO(ann_path) self.cur_ins_index = -1 self.cur_score = 0 def next_episode(self, *args): img_id = args[0] ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None) tmp = self.coco.loadImgs(img_id)[0] self.shape = (tmp['height'], tmp['width']) self.anns = self.coco.loadAnns(ann_ids) print('%d instance in mask gt of %d' % (len(self.anns), img_id)) self.rles = [self.coco.annToRLE(ann) for ann in self.anns] # self.rle_states = np.zeros(len(self.anns)) # denote current detected mask self.cur_ins_index = -1 self.cur_score = 0 def get_reward(self, *args): ''' :param args: args[0] is current mask :return: score increment and possibility that whether the ''' # just return the iou change mask = args[0] mask = resize(mask, self.shape).astype(np.uint8) rle = encode(np.asfortranarray(mask)) if self.cur_ins_index == -1: # choose the instance with maximum iou scores = iou(self.rles, rle, np.zeros(len(self.rles))) self.cur_ins_index = np.argmax(scores) self.cur_score = scores[self.cur_ins_index][0] return self.cur_score, False scores = iou([self.rles[self.cur_ins_index]], rle, np.zeros(len(self.rles))) incre = scores[0][0] - self.cur_score self.cur_score = scores[0][0] print('score %f' % self.cur_score) instance_stop = self.cur_score >= 0.8 # sufficiently good if instance_stop: # do not detect this again del self.rles[self.cur_ins_index] self.cur_ins_index = -1 return incre, self.cur_score >= 0.8 def get_reward2(self, *args): mask = args[0]
def _cacheMaskImgs(dataDir, classGrps, training, cocoObj = None): """ Caches mask images for each class group given in ``classGrps``. Generally speeds up CocoDataGen.sample(). Mask images are stored in folders created in dataDir. # Params dataDir: Root directory of coco dataset. classGrps: List of lists of strings specifying object classes. Each list of classes will be grouped together into one mask image. training: If ``True`` then uses training set, otherwise uses validation set. cocoObj: Provides option to pass a pre-existing coco dataset object. """ if cocoObj: coco = cocoObj else: annPath = '{}/annotations/instances_{}.json'.format(dataDir, 'train2017' if training else 'val2017') coco = COCO(annPath) # Get image ids by class group catIdsByGrp = [coco.getCatIds(catNms=catNms) for catNms in classGrps] imgIdsByGrp = [] for grpIdx, catIds in enumerate(catIdsByGrp): imgIds = [] for catId in catIds: imgIds.extend(coco.getImgIds(catIds=[catId])) imgIdsByGrp.append(list(set(imgIds))) for grp_idx in range(len(classGrps)): folder_name = '_'.join(classGrps[grp_idx]) + ('_train' if training else '_val') cache_path = os.path.join(dataDir, folder_name) if not os.path.exists(cache_path): os.makedirs(cache_path) for img_id in imgIdsByGrp[grp_idx]: img_info = coco.loadImgs([img_id])[0] fname_mask = str(img_id).zfill(12)+'.jpg' mask_path = os.path.join(cache_path, fname_mask) if not os.path.exists(mask_path): ann_ids = coco.getAnnIds(imgIds=img_id, catIds=catIdsByGrp[grp_idx], iscrowd=False) # Not sure about the iscrowd param TODO anns = coco.loadAnns(ann_ids) # create an empty mask image mask_composite = np.zeros(shape=(img_info['height'], img_info['width']), dtype=np.uint8) for ann in anns: # render mask of each ann and add it to the composite image ann_mask = maskUtils.decode(coco.annToRLE(ann)) # get the contours and mask of this instance. mask_composite = np.maximum(mask_composite, ann_mask) mask_composite *= 255 cv2.imwrite(mask_path, mask_composite) # save the composite image
class COCOLoader(Loader): def __init__(self, is_train=True, shuffle=True): super(COCOLoader, self).__init__() coco_dir = join(dirname(__file__), '..', '..', 'COCO') if is_train: self.image_dir = join(coco_dir, 'images', 'train2014') self.coco = COCO( join(coco_dir, 'annotations', 'instances_train2014.json')) else: self.image_dir = join(coco_dir, 'images', 'val2014') self.coco = COCO( join(coco_dir, 'annotations', 'instances_val2014.json')) self.imgIds = self.coco.getImgIds() self.catIds = self.coco.getCatIds() self.catId2label = dict( zip(self.catIds, range(1, len(self.catIds) + 1))) self.shuffleIds = [i for i in range(len(self.imgIds))] if shuffle: random.shuffle(self.shuffleIds) def __len__(self): return len(self.imgIds) def load_img(self, image_index): imgInd = self.imgIds[self.shuffleIds[image_index]] img = self.coco.imgs[imgInd] height, width = img['height'], img['width'] image_path = join(self.image_dir, img['file_name']) return image_path, height, width def load_ann(self, image_index): imgInd = self.imgIds[self.shuffleIds[image_index]] annIds = self.coco.getAnnIds(imgIds=imgInd) anns = self.coco.loadAnns(annIds) anns = [ann for ann in anns if not ann['iscrowd']] for i, ann in enumerate(anns): ann['gt_id'] = self.catId2label[ann['category_id']] if not cfg.rpn_only: rle = self.coco.annToRLE(ann) anns[i]['mask'] = np.array(maskUtils.decode(rle)) return anns
class DidiDataset(Dataset): idx_in_coco_str = [ 'left_eye', 'right_eye', 'nose', 'neck', 'left_chest', 'right_chest', 'left_shoulder', 'left_upperarm', 'left_elbow', 'left_forearm', 'left_wrist', 'left_hand', 'right_shoulder', 'right_upperarm', 'right_elbow', 'right_forearm', 'right_wrist', 'right_hand' ] num_joints = len(idx_in_coco_str) # 18 num_joints_and_bkg = num_joints + 1 # 19 idx_in_coco = list(range(num_joints)) # [0:17] joint_pairs = [[3, 2], [2, 0], [2, 1], [3, 4], [3, 5], [3, 6], [6, 7], [7, 8], [8, 9], [9, 10], [10, 11], [3, 12], [12, 13], [13, 14], [14, 15], [15, 16], [16, 17]] num_connections = len(joint_pairs) # 17 def __init__(self, img_dir, anno_path, target_size=(368, 368), stride=8): self.coco_anno = COCO(anno_path) self.img_dir = img_dir self.ids = list(self.coco_anno.imgs.keys()) for i, idx in enumerate(self.ids): img_meta = self.coco_anno.imgs[idx] # load annotations id = img_meta['id'] img_file = img_meta['file_name'] h, w = img_meta['height'], img_meta['width'] img_path = os.path.join(self.img_dir, img_file) ann_ids = self.coco_anno.getAnnIds(imgIds=id) anns = self.coco_anno.loadAnns(ann_ids) total_keypoints = sum( [ann.get('num_keypoints', 0) for ann in anns]) if total_keypoints == 0: continue persons = [] prev_center = [] masks = [] keypoints = [] # sort from the biggest person to the smallest one persons_ids = np.argsort([-a['area'] for a in anns], kind='mergesort') for id in list(persons_ids): person_meta = anns[id] if person_meta["iscrowd"]: masks.append(self.coco_anno.annToRLE(person_meta)) continue # skip this person if parts number is too low or if segmentation area is too small if person_meta["num_keypoints"] < 5 or person_meta[ "area"] < 32 * 32: masks.append(self.coco_anno.annToRLE(person_meta)) continue # skip this person if the distance to existing person is too small person_center = [ person_meta["bbox"][0] + person_meta["bbox"][2] / 2, person_meta["bbox"][1] + person_meta["bbox"][3] / 2 ] too_close = False for pc in prev_center: a = np.expand_dims(pc[:2], axis=0) b = np.expand_dims(person_center, axis=0) dist = cdist(a, b)[0] if dist < pc[2] * 0.3: too_close = True break if too_close: # add mask of this person. we don't want to show the network unlabeled people masks.append(self.coco_anno.annToRLE(person_meta)) continue keypoints.append(person_meta["keypoints"]) pers = PersonMeta(img_path=img_path, height=h, width=w, center=np.expand_dims(person_center, axis=0), bbox=person_meta["bbox"], area=person_meta["area"], scale=person_meta["bbox"][3] / target_size[0], num_keypoints=person_meta["num_keypoints"]) persons.append(pers) prev_center.append( np.append( person_center, max(person_meta["bbox"][2], person_meta["bbox"][3]))) if len(persons) > 0: main_person = persons[0] main_person.masks_segments = masks main_person.all_joints = DidiDataset.from_coco_keypoints( keypoints, w, h) self.all_meta.append(main_person) if i % 1000 == 0: print("Loading image annot {}/{}".format(i, len(ids))) def get_ground_truth(self): # create heatmap heatmap = DidiDataset.create_heatmap() paf = DidiDataset.create_paf() @staticmethod def from_coco_keypoints(all_keypoints, w, h): """ Creates list of joints based on the list of coco keypoints vectors. :param all_keypoints: list of coco keypoints vector [[x1,y1,v1,x2,y2,v2,....], []] :param w: image width :param h: image height :return: list of joints [[(x1,y1), (x1,y1), ...], [], []] """ all_joints = [] for keypoints in all_keypoints: kp = np.array(keypoints) xs = kp[0::3] ys = kp[1::3] vs = kp[2::3] # filter and loads keypoints to the list keypoints_list = [] for idx, (x, y, v) in enumerate(zip(xs, ys, vs)): # only visible and occluded keypoints are used if v >= 1 and x >= 0 and y >= 0 and x < w and y < h: keypoints_list.append((x, y)) else: keypoints_list.append(None) # build the list of joints. It contains the same coordinates # of body parts like in the orginal coco keypoints plus # additional body parts interpolated from coco # keypoints (ex. a neck) joints = [] for part_idx in range(len(DidiDataset.idx_in_coco)): coco_kp_idx = DidiDataset.idx_in_coco[part_idx] if callable(coco_kp_idx): p = coco_kp_idx(keypoints_list) else: p = keypoints_list[coco_kp_idx] joints.append(p) all_joints.append(joints) return all_joints @staticmethod def create_heatmap(num_maps, height, width, all_joints, sigma, stride): def _put_heatmap_on_plane(heatmap, plane_idx, joint, sigma, height, width, stride): start = stride / 2.0 - 0.5 center_x, center_y = joint for g_y in range(height): for g_x in range(width): x = start + g_x * stride y = start + g_y * stride d2 = (x - center_x) * (x - center_x) + (y - center_y) * ( y - center_y) exponent = d2 / 2.0 / sigma / sigma if exponent > 4.6052: continue heatmap[g_y, g_x, plane_idx] += math.exp(-exponent) if heatmap[g_y, g_x, plane_idx] > 1.0: heatmap[g_y, g_x, plane_idx] = 1.0 heatmap = np.zeros((height, width, num_maps), dtype=np.float64) for joints in all_joints: for plane_idx, joint in enumerate(joints): if joint: _put_heatmap_on_plane(heatmap, plane_idx, joint, sigma, height, width, stride) # background heatmap[:, :, -1] = np.clip(1.0 - np.amax(heatmap, axis=2), 0.0, 1.0) return heatmap @staticmethod def create_paf(num_maps, height, width, all_joints, threshold, stride): def _put_paf_on_plane(vectormap, countmap, plane_idx, x1, y1, x2, y2, threshold, height, width): min_x = max(0, int(round(min(x1, x2) - threshold))) max_x = min(width, int(round(max(x1, x2) + threshold))) min_y = max(0, int(round(min(y1, y2) - threshold))) max_y = min(height, int(round(max(y1, y2) + threshold))) vec_x = x2 - x1 vec_y = y2 - y1 norm = math.sqrt(vec_x**2 + vec_y**2) if norm < 1e-8: return vec_x /= norm vec_y /= norm for y in range(min_y, max_y): for x in range(min_x, max_x): bec_x = x - x1 bec_y = y - y1 dist = abs(bec_x * vec_y - bec_y * vec_x) if dist > threshold: continue cnt = countmap[y][x][plane_idx] if cnt == 0: vectormap[y][x][plane_idx * 2 + 0] = vec_x vectormap[y][x][plane_idx * 2 + 1] = vec_y else: vectormap[y][x][ plane_idx * 2 + 0] = (vectormap[y][x][plane_idx * 2 + 0] * cnt + vec_x) / (cnt + 1) vectormap[y][x][ plane_idx * 2 + 1] = (vectormap[y][x][plane_idx * 2 + 1] * cnt + vec_y) / (cnt + 1) countmap[y][x][plane_idx] += 1 paf = np.zeros((height, width, num_maps * 2), dtype=np.float64) countmap = np.zeros((height, width, num_maps), dtype=np.uint8) for joints in all_joints: for plane_idx, (j_idx1, j_idx2) in enumerate(DidiDataset.joint_pairs): center_from = joints[j_idx1] center_to = joints[j_idx2] # skip if no valid pair of keypoints if center_from is None or center_to is None: continue x1, y1 = (center_from[0] / stride, center_from[1] / stride) x2, y2 = (center_to[0] / stride, center_to[1] / stride) _put_paf_on_plane(paf, countmap, plane_idx, x1, y1, x2, y2, threshold, height, width) return paf
class Test(object): def __init__(self, opt=None): assert opt is not None self.opt = opt self.device = torch.device(cfg.device) self.val_dataset = NewDataset(train_set=False) self.val_dataloader = DataLoader( self.val_dataset, batch_size=1, shuffle=True, num_workers=cfg.num_worker, collate_fn=self.val_dataset.collate_fn) self.len_train_dataset = len(self.val_dataset) # self.model = yolov3().to(self.device) self.model = build_model(opt.model) weights_path = self.opt.weights_path checkpoint = torch.load(weights_path) self.model.load_state_dict(checkpoint) self.cocoGt = COCO(cfg.test_json) def plot_one_box( self, x, img, color=None, label=None, line_thickness=None): # Plots one bounding box on image img tl = line_thickness or round( 0.001 * max(img.shape[0:2])) + 1 # line thickness color = color or [random.randint(0, 255) for _ in range(3)] cv2.line(img, (int(x[0]), int(x[1])), (int(x[2]), int(x[3])), color, tl) cv2.line(img, (int(x[2]), int(x[3])), (int(x[4]), int(x[5])), color, tl) cv2.line(img, (int(x[4]), int(x[5])), (int(x[6]), int(x[7])), color, tl) cv2.line(img, (int(x[6]), int(x[7])), (int(x[0]), int(x[1])), color, tl) cv2.putText(img, label, (int(x[0]), int(x[1])), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 1) def drow_box(self, anns): image_id = [i['image_id'] for i in anns] assert all(x == image_id[0] for x in image_id) img_ann = self.cocoGt.loadImgs(ids=image_id[0])[0] img_name = img_ann['file_name'] print('images:{}'.format(img_name)) img_path = os.path.join(opt.image_folder, img_name) txt_path = os.path.join(opt.output_folder, img_name.replace('png', 'txt')) img = cv2.imread(img_path) for ann in anns: cat = self.cocoGt.loadCats(ids=ann['category_id'])[0] score = ann['score'] label = '%s %.2f' % (cat['name'], score) color = (0, 0, 255) coord = ann['segmentation'][0] with open(txt_path, 'a') as f: f.write('%s %.2f %g %g %g %g %g %g %g %g \n' % (cat['name'], score, coord[0], coord[1], coord[2], coord[3], coord[4], coord[5], coord[6], coord[7])) self.plot_one_box(coord, img, color, label) cv2.imwrite(os.path.join(opt.output_folder, img_name), img) @torch.no_grad() def eval(self): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(n_threads) cpu_device = torch.device("cpu") self.model.eval() for ann_idx in self.cocoGt.anns: ann = self.cocoGt.anns[ann_idx] ann['area'] = maskUtils.area(self.cocoGt.annToRLE(ann)) iou_types = 'segm' anns = [] mAP_list = [] for val_data in self.val_dataloader: image, target, logit = val_data image = image.to(self.device) image_size = image.shape[3] # image.shape[2]==image.shape[3] # resize之后图像的大小 _, pred = self.model(image) # TODO:当前只支持batch_size=1 pred = pred.unsqueeze(0) pred = pred[pred[:, :, 8] > cfg.conf_thresh] detections = non_max_suppression(pred.unsqueeze(0), cls_thres=cfg.cls_thresh, nms_thres=cfg.conf_thresh) new_ann = reorginalize_target(detections, logit, image_size, self.cocoGt) self.drow_box(new_ann) anns.extend(new_ann) for ann in anns: ann['segmentation'] = self.cocoGt.annToRLE( ann) # 将polygon形式的segmentation转换RLE形式 cocoDt = self.cocoGt.loadRes(anns) cocoEval = COCOeval(self.cocoGt, cocoDt, iou_types) cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() ap_per_category(self.cocoGt, cocoEval, cfg.max_epoch) draw_pr(self.cocoGt, cocoEval) print_txt = cocoEval.stats coco_mAP = print_txt[0] voc_mAP = print_txt[1] if isinstance(mAP_list, list): mAP_list.append(voc_mAP)
class CocoDataFlow(RNGDataFlow): """ Tensorpack dataflow serving coco data points. """ def __init__(self, target_size, annot_path, img_dir, select_ids=None): """ Initializes dataflow. :param target_size: :param annot_path: path to the coco annotation file :param img_dir: directory containing images :param select_ids: (optional) identifiers of images to serve (for debugging) """ self.img_dir = img_dir self.coco = COCO(annot_path) self.all_meta = [] self.select_ids = select_ids self.target_size = target_size def prepare(self): """ Loads coco metadata. Partially populates meta objects (image path, scale of main person, bounding box, area, joints) Remaining fields are populated in next steps - MapData tensorpack tranformer. """ if self.select_ids: ids = self.select_ids else: ids = list(self.coco.imgs.keys()) for i, img_id in enumerate(ids): img_meta = self.coco.imgs[img_id] # load annotations img_id = img_meta['id'] img_file = img_meta['file_name'] h, w = img_meta['height'], img_meta['width'] img_path = os.path.join(self.img_dir, img_file) ann_ids = self.coco.getAnnIds(imgIds=img_id) anns = self.coco.loadAnns(ann_ids) total_keypoints = sum( [ann.get('num_keypoints', 0) for ann in anns]) if total_keypoints == 0: continue persons = [] prev_center = [] masks = [] keypoints = [] # sort from the biggest person to the smallest one persons_ids = np.argsort([-a['area'] for a in anns], kind='mergesort') for id in list(persons_ids): person_meta = anns[id] if person_meta["iscrowd"]: masks.append(self.coco.annToRLE(person_meta)) continue # skip this person if parts number is too low or if # segmentation area is too small if person_meta["num_keypoints"] < 5 or person_meta[ "area"] < 32 * 32: masks.append(self.coco.annToRLE(person_meta)) continue person_center = [ person_meta["bbox"][0] + person_meta["bbox"][2] / 2, person_meta["bbox"][1] + person_meta["bbox"][3] / 2 ] # skip this person if the distance to existing person is too small too_close = False for pc in prev_center: a = np.expand_dims(pc[:2], axis=0) b = np.expand_dims(person_center, axis=0) dist = cdist(a, b)[0] if dist < pc[2] * 0.3: too_close = True break if too_close: # add mask of this person. we don't want to show the network # unlabeled people masks.append(self.coco.annToRLE(person_meta)) continue pers = Meta(img_path=img_path, height=h, width=w, center=np.expand_dims(person_center, axis=0), bbox=person_meta["bbox"], area=person_meta["area"], scale=person_meta["bbox"][3] / self.target_size[0], num_keypoints=person_meta["num_keypoints"]) keypoints.append(person_meta["keypoints"]) persons.append(pers) prev_center.append( np.append( person_center, max(person_meta["bbox"][2], person_meta["bbox"][3]))) for person in persons: person.masks_segments = masks person.all_joints = JointsLoader.from_coco_keypoints( keypoints, w, h) self.all_meta.append(person) if i % 1000 == 0: print("Loading image annot {}/{}".format(i, len(ids))) def save(self, path): raise NotImplemented def load(self, path): raise NotImplemented def size(self): """ :return: number of items """ return len(self.all_meta) def get_data(self): """ Generator of data points :return: instance of Meta """ idxs = np.arange(self.size()) self.rng.shuffle(idxs) for idx in idxs: yield [self.all_meta[idx]]
class MSCOCO(PoseDataset): def load_dataset(self): dataset = self.cfg.dataset dataset_phase = self.cfg.dataset_phase dataset_ann = self.cfg.dataset_ann # initialize COCO api annFile = '%s/annotations/%s_%s.json'%(dataset,dataset_ann,dataset_phase) self.coco = COCO(annFile) imgIds = self.coco.getImgIds() data = [] # loop through each image for imgId in imgIds: item = DataItem() img = self.coco.loadImgs(imgId)[0] item.im_path = "%s/images/%s/%s"%(dataset, dataset_phase, img["file_name"]) item.im_size = [3, img["height"], img["width"]] item.coco_id = imgId annIds = self.coco.getAnnIds(imgIds=img['id'], iscrowd=False) anns = self.coco.loadAnns(annIds) all_person_keypoints = [] masked_persons_RLE = [] visible_persons_RLE = [] all_visibilities = [] # Consider only images with people has_people = len(anns) > 0 if not has_people and self.cfg.coco_only_images_with_people: continue for ann in anns: # loop through each person person_keypoints = [] visibilities = [] if ann["num_keypoints"] != 0: for i in range(self.cfg.num_joints): x_coord = ann["keypoints"][3 * i] y_coord = ann["keypoints"][3 * i + 1] visibility = ann["keypoints"][3 * i + 2] visibilities.append(visibility) if visibility != 0: # i.e. if labeled person_keypoints.append([i, x_coord, y_coord]) all_person_keypoints.append(np.array(person_keypoints)) visible_persons_RLE.append(maskUtils.decode(self.coco.annToRLE(ann))) all_visibilities.append(visibilities) if ann["num_keypoints"] == 0: masked_persons_RLE.append(self.coco.annToRLE(ann)) item.joints = all_person_keypoints item.im_neg_mask = maskUtils.merge(masked_persons_RLE) if self.cfg.use_gt_segm: item.gt_segm = np.moveaxis(np.array(visible_persons_RLE), 0, -1) item.visibilities = all_visibilities data.append(item) self.has_gt = self.cfg.dataset is not "image_info" return data def compute_scmap_weights(self, scmap_shape, joint_id, data_item): size = scmap_shape[0:2] scmask = np.ones(size) m = maskUtils.decode(data_item.im_neg_mask) if m.size: scmask = 1.0 - imresize(m, size) scmask = np.stack([scmask] * self.cfg.num_joints, axis=-1) return scmask def get_pose_segments(self): return [[0, 1], [0, 2], [1, 3], [2, 4], [5, 7], [6, 8], [7, 9], [8, 10], [11, 13], [12, 14], [13, 15], [14, 16]] def visualize_coco(self, coco_img_results, visibilities): inFile = "tmp.json" with open(inFile, 'w') as outfile: json.dump(coco_img_results, outfile) get_gt_visibilities(inFile, visibilities) # initialize cocoPred api cocoPred = self.coco.loadRes(inFile) os.remove(inFile) imgIds = [coco_img_results[0]["image_id"]] for imgId in imgIds: img = cocoPred.loadImgs(imgId)[0] im_path = "%s/images/%s/%s" % (self.cfg.dataset, self.cfg.dataset_phase, img["file_name"]) I = io.imread(im_path) fig = plt.figure() a = fig.add_subplot(2, 2, 1) plt.imshow(I) a.set_title('Initial Image') a = fig.add_subplot(2, 2, 2) plt.imshow(I) a.set_title('Predicted Keypoints') annIds = cocoPred.getAnnIds(imgIds=img['id']) anns = cocoPred.loadAnns(annIds) cocoPred.showAnns(anns) a = fig.add_subplot(2, 2, 3) plt.imshow(I) a.set_title('GT Keypoints') annIds = self.coco.getAnnIds(imgIds=img['id']) anns = self.coco.loadAnns(annIds) self.coco.showAnns(anns) plt.show()
class MSCOCO(PoseDataset): def __init__(self, cfg): cfg.all_joints = [[0], [2, 1], [4, 3], [6, 5], [8, 7],[10, 9], [12, 11], [14, 13], [16, 15]] cfg.all_joints_names = ["nose", 'eye', 'ear', 'shoulder', 'elbow', 'hand', 'hip', 'knee', 'foot'] cfg.num_joints = 17 super().__init__(cfg) def load_dataset(self): dataset = self.cfg.dataset dataset_phase = self.cfg.dataset_phase dataset_ann = self.cfg.dataset_ann # initialize COCO api annFile = '%s/annotations/%s_%s.json'%(dataset,dataset_ann,dataset_phase) self.coco = COCO(annFile) imgIds = self.coco.getImgIds() data = [] # loop through each image for imgId in imgIds: item = DataItem() img = self.coco.loadImgs(imgId)[0] item.im_path = "%s/images/%s/%s"%(dataset, dataset_phase, img["file_name"]) item.im_size = [3, img["height"], img["width"]] item.coco_id = imgId annIds = self.coco.getAnnIds(imgIds=img['id'], iscrowd=False) anns = self.coco.loadAnns(annIds) all_person_keypoints = [] masked_persons_RLE = [] visible_persons_RLE = [] all_visibilities = [] # Consider only images with people has_people = len(anns) > 0 if not has_people and self.cfg.coco_only_images_with_people: continue for ann in anns: # loop through each person person_keypoints = [] visibilities = [] if ann["num_keypoints"] != 0: for i in range(self.cfg.num_joints): x_coord = ann["keypoints"][3 * i] y_coord = ann["keypoints"][3 * i + 1] visibility = ann["keypoints"][3 * i + 2] visibilities.append(visibility) if visibility != 0: # i.e. if labeled person_keypoints.append([i, x_coord, y_coord]) all_person_keypoints.append(np.array(person_keypoints)) visible_persons_RLE.append(maskUtils.decode(self.coco.annToRLE(ann))) all_visibilities.append(visibilities) if ann["num_keypoints"] == 0: masked_persons_RLE.append(self.coco.annToRLE(ann)) item.joints = all_person_keypoints item.im_neg_mask = maskUtils.merge(masked_persons_RLE) if self.cfg.use_gt_segm: item.gt_segm = np.moveaxis(np.array(visible_persons_RLE), 0, -1) item.visibilities = all_visibilities data.append(item) self.has_gt = self.cfg.dataset is not "image_info" return data def compute_scmap_weights(self, scmap_shape, joint_id, data_item): size = scmap_shape[0:2] scmask = np.ones(size) m = maskUtils.decode(data_item.im_neg_mask) if m.size: scmask = 1.0 - imresize(m, size) scmask = np.stack([scmask] * self.cfg.num_joints, axis=-1) return scmask def get_pose_segments(self): return [[0, 1], [0, 2], [1, 3], [2, 4], [5, 7], [6, 8], [7, 9], [8, 10], [11, 13], [12, 14], [13, 15], [14, 16]] def visualize_coco(self, coco_img_results, visibilities): inFile = "tmp.json" with open(inFile, 'w') as outfile: json.dump(coco_img_results, outfile) get_gt_visibilities(inFile, visibilities) # initialize cocoPred api cocoPred = self.coco.loadRes(inFile) os.remove(inFile) imgIds = [coco_img_results[0]["image_id"]] for imgId in imgIds: img = cocoPred.loadImgs(imgId)[0] im_path = "%s/images/%s/%s" % (self.cfg.dataset, self.cfg.dataset_phase, img["file_name"]) I = io.imread(im_path) fig = plt.figure() a = fig.add_subplot(2, 2, 1) plt.imshow(I) a.set_title('Initial Image') a = fig.add_subplot(2, 2, 2) plt.imshow(I) a.set_title('Predicted Keypoints') annIds = cocoPred.getAnnIds(imgIds=img['id']) anns = cocoPred.loadAnns(annIds) cocoPred.showAnns(anns) a = fig.add_subplot(2, 2, 3) plt.imshow(I) a.set_title('GT Keypoints') annIds = self.coco.getAnnIds(imgIds=img['id']) anns = self.coco.loadAnns(annIds) self.coco.showAnns(anns) plt.show()
for i in imgIds[:]: Id = str(i) l = len(Id) name = tname[:-l] + Id file = imgDir + name + '.jpg' # im = tf.gfile.FastGFile(file, 'rb').read() img = cv2.imread(file) h, w = img.shape[:2] AnnIds = cocoGt.getAnnIds([i], ) Anns = cocoGt.loadAnns(AnnIds) bboxes = np.zeros((0, 6)) Counts = [] for Ann in Anns: tt = cocoGt.annToRLE(Ann) hh, ww = tt['size'] counts = tt['counts'] if hh != h or ww != w: continue bbox = Ann['bbox'] bbox = np.array(bbox) x1, y1 = bbox[:2] x2, y2 = bbox[:2] + bbox[2:] catId = Ann['category_id'] cls = catId2cls[catId] iscrowd = Ann['iscrowd'] if iscrowd==1: continue t = np.array([[y1, x1, y2, x2, cls, iscrowd]])
class _Trainer(object): def __init__(self): self.device = torch.device(cfg.device) self.max_epoch = cfg.max_epoch self.train_dataset = NewDataset(train_set=True) self.train_dataloader = DataLoader( self.train_dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.num_worker, collate_fn=self.train_dataset.collate_fn) self.val_dataset = NewDataset(train_set=False) self.val_dataloader = DataLoader( self.val_dataset, batch_size=1, shuffle=True, num_workers=cfg.num_worker, collate_fn=self.val_dataset.collate_fn) self.len_train_dataset = len(self.train_dataset) self.model = build_model(cfg.model) self.optimizer = torch.optim.SGD(self.model.parameters(), lr=cfg.lr_start, momentum=cfg.momentum, weight_decay=cfg.weight_decay) if cfg.linear_lr: lf = lambda x: (1 - x / (cfg.max_epoch - 1)) * (1.0 - 0.2) + 0.2 # linear else: # hyp['lrf']==0.2 lf = one_cycle(1, 0.2, cfg.max_epoch) # cosine 1->hyp['lrf'] self.scheduler = LambdaLR(self.optimizer, lr_lambda=lf) # self.scheduler = adjust_lr_by_wave(self.optimizer, self.max_epoch * self.len_train_dataset, cfg.lr_start, # cfg.lr_end, cfg.warmup) # self.scheduler = adjust_lr_by_loss(self.optimizer,cfg.lr_start,cfg.warmup,self.train_dataloader.num_batches) self.writer = SummaryWriter(cfg.tensorboard_path) self.iter = 0 self.cocoGt = COCO(cfg.test_json) def put_log(self, epoch_index, mean_loss, time_per_iter): print( "[epoch:{}|{}] [iter:{}|{}] time:{}s loss:{} giou_loss:{} conf_loss:{} cls_loss:{} lr:{}" .format(epoch_index + 1, self.max_epoch, self.iter + 1, math.ceil(self.len_train_dataset / cfg.batch_size), round(time_per_iter, 2), round(mean_loss[0], 4), round(mean_loss[1], 4), round(mean_loss[2], 4), round(mean_loss[3], 4), self.optimizer.param_groups[0]['lr'])) step = epoch_index * math.ceil( self.len_train_dataset / cfg.batch_size) + self.iter self.writer.add_scalar("loss", mean_loss[0], global_step=step) self.writer.add_scalar("giou loss", mean_loss[1], global_step=step) self.writer.add_scalar("conf loss", mean_loss[2], global_step=step) self.writer.add_scalar("cls loss", mean_loss[3], global_step=step) self.writer.add_scalar("learning rate", self.optimizer.param_groups[0]['lr'], global_step=step) def train_one_epoch(self, epoch_index, train_loss=None, train_lr=None): mean_loss = [0, 0, 0, 0] self.model.train() for self.iter, train_data in enumerate(self.train_dataloader): start_time = time.time() # self.scheduler.step(epoch_index, # self.len_train_dataset * epoch_index + self.iter / cfg.batch_size) # 调整学习率 # self.scheduler.step(self.len_train_dataset * epoch_index + self.iter + 1,mean_loss[0]) image, target, _ = train_data image = image.to(self.device) output, pred = self.model(image) # 计算loss loss, loss_giou, loss_conf, loss_cls = build_loss(output, target) self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.scheduler.step() end_time = time.time() time_per_iter = end_time - start_time # 每次迭代所花时间 loss_items = [ loss.item(), loss_giou.item(), loss_conf.item(), loss_cls.item() ] mean_loss = [ (mean_loss[i] * self.iter + loss_items[i]) / (self.iter + 1) for i in range(4) ] self.put_log(epoch_index, mean_loss, time_per_iter) # 记录训练损失 loss_value = round(mean_loss[0], 4) if isinstance(train_loss, list): train_loss.append(loss_value) now_lr = self.optimizer.param_groups[0]["lr"] if isinstance(train_lr, list): train_lr.append(now_lr) if (epoch_index + 1) % cfg.save_step == 0: checkpoint = { 'epoch': epoch_index, 'model': self.model.state_dict(), 'optimizer': self.optimizer.state_dict() } torch.save( self.model.state_dict(), cfg.checkpoint_save_path + cfg.model + '_' + str(epoch_index + 1) + '.pth') @torch.no_grad() def eval(self, epoch_index, mAP_list=None): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(n_threads) cpu_device = torch.device("cpu") self.model.eval() for ann_idx in self.cocoGt.anns: ann = self.cocoGt.anns[ann_idx] ann['area'] = maskUtils.area(self.cocoGt.annToRLE(ann)) iou_types = 'segm' anns = [] for val_data in self.val_dataloader: image, target, logit = val_data image = image.to(self.device) image_size = image.shape[3] # image.shape[2]==image.shape[3] # resize之后图像的大小 _, pred = self.model(image) # TODO:当前只支持batch_size=1 pred = pred.unsqueeze(0) pred = pred[pred[:, :, 8] > cfg.conf_thresh] if pred.shape[0] == 0: pass else: detections = non_max_suppression(pred.unsqueeze(0), cls_thres=cfg.cls_thresh, nms_thres=cfg.conf_thresh) anns.extend( reorginalize_target(detections, logit, image_size, self.cocoGt)) for ann in anns: ann['segmentation'] = self.cocoGt.annToRLE( ann) # 将polygon形式的segmentation转换RLE形式 cocoDt = self.cocoGt.loadRes(anns) cocoEval = COCOeval(self.cocoGt, cocoDt, iou_types) cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() ap_per_category(self.cocoGt, cocoEval, epoch_index) print_txt = cocoEval.stats coco_mAP = print_txt[0] voc_mAP = print_txt[1] if isinstance(mAP_list, list): mAP_list.append(voc_mAP)
def main(): # Use first line of file docstring as description if it exists. parser = argparse.ArgumentParser( description=__doc__.split('\n')[0] if __doc__ else '', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--lvis', type=Path, required=True) parser.add_argument('--coco', type=Path, required=True) parser.add_argument('--mapping', type=Path, default=ROOT / 'data/lvis_coco_to_synset.json') parser.add_argument('--output-json', type=Path, required=True) parser.add_argument( '--iou-thresh', default=0.7, type=float, help=('If a COCO annotation overlaps with an LVIS annotations with ' 'IoU over this threshold, we use only the LVIS annotation.')) args = parser.parse_args() args.output_json.parent.mkdir(exist_ok=True, parents=True) common_setup(args.output_json.name + '.log', args.output_json.parent, args) coco = COCO(args.coco) lvis = COCO(args.lvis) synset_to_lvis_id = {x['synset']: x['id'] for x in lvis.cats.values()} coco_to_lvis_category = {} with open(args.mapping, 'r') as f: name_mapping = json.load(f) for category in coco.cats.values(): mapped = name_mapping[category['name']] assert mapped['coco_cat_id'] == category['id'] synset = mapped['synset'] if synset not in synset_to_lvis_id: logging.debug( f'Found no LVIS category for "{category["name"]}" from COCO') continue coco_to_lvis_category[category['id']] = synset_to_lvis_id[synset] for image_id, image in coco.imgs.items(): if image_id in lvis.imgs: coco_name = coco.imgs[image_id]['file_name'] lvis_name = lvis.imgs[image_id]['file_name'] assert coco_name in lvis_name else: logging.info( f'Image {image_id} in COCO, but not annotated in LVIS') lvis_highest_id = max(x['id'] for x in lvis.anns.values()) ann_id_generator = itertools.count(lvis_highest_id + 1) new_annotations = [] for image_id, lvis_anns in tqdm(lvis.imgToAnns.items()): if image_id not in coco.imgToAnns: logging.info( f'Image {image_id} in LVIS, but not annotated in COCO') continue coco_anns = coco.imgToAnns[image_id] # Compute IoU between coco_anns and lvis_anns # Shape (num_coco_anns, num_lvis_anns) mask_iou = mask_util.iou([coco.annToRLE(x) for x in coco_anns], [lvis.annToRLE(x) for x in lvis_anns], pyiscrowd=np.zeros(len(lvis_anns))) does_overlap = mask_iou.max(axis=1) > args.iou_thresh to_add = [] for i, ann in enumerate(coco_anns): if does_overlap[i]: continue if ann['category_id'] not in coco_to_lvis_category: continue ann['category_id'] = coco_to_lvis_category[ann['category_id']] ann['id'] = next(ann_id_generator) to_add.append(ann) new_annotations.extend(to_add) with open(args.lvis, 'r') as f: merged = json.load(f) merged['annotations'].extend(new_annotations) with open(args.output_json, 'w') as f: json.dump(merged, f)
def get_mask_form_anns(anns: List[Any], coco: COCO) -> Image.Image: mask = np.array(decode(coco.annToRLE(anns[0]))) for ann in anns[1:]: mask += np.array(decode(coco.annToRLE(ann))) mask = (mask > 0).astype(np.uint8) return Image.fromarray(mask, mode='L')
class PoseInfo: metas = [] metas_test = [] def __init__(self, data_dir, data_type, anno_path): self.data_dir = data_dir self.data_type = data_type self.image_base_dir = '{}/images/{}2014/'.format(data_dir, data_type) self.anno_path = '{}/annotations/person_keypoints_{}2014.json'.format( data_dir, data_type) self.coco = COCO(self.anno_path) self.get_image_annos() self.image_list = os.listdir(self.image_base_dir) @staticmethod def get_keypoints(annos_info): annolist = [] for anno in annos_info: adjust_anno = { 'keypoints': anno['keypoints'], 'num_keypoints': anno['num_keypoints'] } annolist.append(adjust_anno) return annolist def get_image_annos(self): images_ids = self.coco.getImgIds() for idx in range(len(images_ids)): images_info = self.coco.loadImgs(images_ids[idx]) image_path = self.image_base_dir + images_info[0]['file_name'] # filter that some images might not in the list if not os.path.exists(image_path): continue id = images_info[0]['id'] annos_ids = self.coco.getAnnIds(imgIds=images_ids[idx]) annos_info = self.coco.loadAnns(annos_ids) keypoints = self.get_keypoints(annos_info) ############################################################################# anns = annos_info prev_center = [] masks = [] # sort from the biggest person to the smallest one persons_ids = np.argsort([-a['area'] for a in anns], kind='mergesort') for id in list(persons_ids): person_meta = anns[id] if person_meta["iscrowd"]: masks.append(self.coco.annToRLE(person_meta)) continue # skip this person if parts number is too low or if # segmentation area is too small if person_meta["num_keypoints"] < 5 or person_meta[ "area"] < 32 * 32: masks.append(self.coco.annToRLE(person_meta)) continue person_center = [ person_meta["bbox"][0] + person_meta["bbox"][2] / 2, person_meta["bbox"][1] + person_meta["bbox"][3] / 2 ] # skip this person if the distance to existing person is too small too_close = False for pc in prev_center: a = np.expand_dims(pc[:2], axis=0) b = np.expand_dims(person_center, axis=0) dist = cdist(a, b)[0] if dist < pc[2] * 0.3: too_close = True break if too_close: # add mask of this person. we don't want to show the network # unlabeled people masks.append(self.coco.annToRLE(person_meta)) continue ############################################################################ total_keypoints = sum( [ann.get('num_keypoints', 0) for ann in annos_info]) if total_keypoints > 0: meta = CocoMeta(images_ids[idx], image_path, images_info[0], keypoints, masks) self.metas.append(meta) print("Overall get {}".format(len(self.metas))) def load_images(self): pass def get_image_list(self): list = [] for meta in self.metas: list.append(meta.img_url) return list def get_joint_list(self): list = [] for meta in self.metas: list.append(meta.joint_list) return list def get_mask(self): list = [] for meta in self.metas: list.append(meta.masks) return list
class PoseInfo: """ Use COCO for pose estimation, returns images with people only. """ def __init__(self, image_base_dir, anno_path, with_mask): self.metas = [] # self.data_dir = data_dir # self.data_type = data_type self.image_base_dir = image_base_dir self.anno_path = anno_path self.with_mask = with_mask self.coco = COCO(self.anno_path) self.get_image_annos() self.image_list = os.listdir(self.image_base_dir) @staticmethod def get_keypoints(annos_info): annolist = [] for anno in annos_info: adjust_anno = { 'keypoints': anno['keypoints'], 'num_keypoints': anno['num_keypoints'] } annolist.append(adjust_anno) return annolist def get_image_annos(self): """Read JSON file, and get and check the image list. Skip missing images. """ images_ids = self.coco.getImgIds() len_imgs = len(images_ids) for idx in range(len_imgs): images_info = self.coco.loadImgs(images_ids[idx]) image_path = os.path.join(self.image_base_dir, images_info[0]['file_name']) # filter that some images might not in the list if not os.path.exists(image_path): print( "[skip] json annotation found, but cannot found image: {}". format(image_path)) continue annos_ids = self.coco.getAnnIds(imgIds=images_ids[idx]) annos_info = self.coco.loadAnns(annos_ids) keypoints = self.get_keypoints(annos_info) ############################################################################# anns = annos_info prev_center = [] masks = [] # sort from the biggest person to the smallest one if self.with_mask: persons_ids = np.argsort([-a['area'] for a in anns], kind='mergesort') for p_id in list(persons_ids): person_meta = anns[p_id] if person_meta["iscrowd"]: masks.append(self.coco.annToRLE(person_meta)) continue # skip this person if parts number is too low or if # segmentation area is too small if person_meta["num_keypoints"] < 5 or person_meta[ "area"] < 32 * 32: masks.append(self.coco.annToRLE(person_meta)) continue person_center = [ person_meta["bbox"][0] + person_meta["bbox"][2] / 2, person_meta["bbox"][1] + person_meta["bbox"][3] / 2 ] # skip this person if the distance to existing person is too small too_close = False for pc in prev_center: a = np.expand_dims(pc[:2], axis=0) b = np.expand_dims(person_center, axis=0) dist = cdist(a, b)[0] if dist < pc[2] * 0.3: too_close = True break if too_close: # add mask of this person. we don't want to show the network # unlabeled people masks.append(self.coco.annToRLE(person_meta)) continue ############################################################################ total_keypoints = sum( [ann.get('num_keypoints', 0) for ann in annos_info]) if total_keypoints > 0: meta = CocoMeta(images_ids[idx], image_path, images_info[0], keypoints, masks) self.metas.append(meta) print("Overall get {} valid pose images from {} and {}".format( len(self.metas), self.image_base_dir, self.anno_path)) def load_images(self): pass def get_image_list(self): img_list = [] for meta in self.metas: img_list.append(meta.img_url) return img_list def get_joint_list(self): joint_list = [] for meta in self.metas: joint_list.append(meta.joint_list) return joint_list def get_mask(self): mask_list = [] for meta in self.metas: mask_list.append(meta.masks) return mask_list
class TrafficDataset(Dataset): def __init__(self, ann_file, root, to_contiguous_class_mapping, to_json_class_mapping, transforms): self.coco = COCO(ann_file) self.class_mapping = to_contiguous_class_mapping MASK = Image.open(os.path.join(root, 'mask.png')) # Mask used to keep only visible roadbed tmp = np.array(MASK) data_points = np.argwhere(tmp) self.min_y, self.min_x = data_points.min(axis=0) self.max_y, self.max_x = data_points.max(axis=0) + 1 tmp = tmp[self.min_y:self.max_y, self.min_x:self.max_x] assert tmp.shape == (769, 1920), tmp.shape MASK = Image.fromarray(tmp, MASK.mode) # filter images without detection annotations self.ids, self.images = [], [] for img_id in sorted(self.coco.imgs.keys()): ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None) anno = self.coco.loadAnns(ann_ids) if has_valid_annotation(anno): path = os.path.join(root, self.coco.imgs[img_id]['file_name']) try: # open and crop image = Image.open(path) image = Image.fromarray( np.array(image)[self.min_y:self.max_y, self.min_x:self.max_x], image.mode) zeros = Image.fromarray(np.zeros_like(image), image.mode) image = Image.composite(image, zeros, mask=MASK) except Exception as e: print("Failed to load image ", path, e) continue self.images.append(image) self.ids.append(img_id) self.id_to_img_map = {k: v for k, v in enumerate(self.ids) } # inner id to json id self.contiguous_category_id_to_json_id = copy.deepcopy( to_json_class_mapping) # inner class -> json class name for class_id, class_name in self.contiguous_category_id_to_json_id.items( ): self.contiguous_category_id_to_json_id[class_id] = \ [x for x in self.coco.cats.values() if x['name'] == class_name][0]['id'] # inner contiguous class to json class self.transforms = transforms # Update bboxes to avoid coco-annotator bugs with conversion bb coordinates from tqdm import tqdm for k, v in tqdm(list(self.coco.anns.items())): rle = self.coco.annToRLE(v) before = v['bbox'] v['bbox'] = maskUtils.toBbox(rle) if sum(abs(before - v['bbox'])) > 1: print(f"Changed {before}->{v['bbox']}") def __len__(self): return len(self.ids) def __getitem__(self, idx): img = self.images[idx] coco_idx = self.ids[idx] anno = self.coco.loadAnns(self.coco.getAnnIds([coco_idx])) boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [ self.class_mapping[self.coco.cats[c]['name']] for c in classes ] classes = torch.tensor(classes) target.add_field("labels", classes) masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size) target.add_field("masks", masks) target = target.crop([self.min_x, self.min_y, self.max_x, self.max_y]).clip_to_image(remove_empty=True) if self.transforms: img, target = self.transforms(img, target) return img, target, idx def get_img_info(self, index): img_id = self.ids[index] img_data = self.coco.imgs[img_id] img_data["crop_x"], img_data["crop_y"] = self.min_x, self.min_y img_data["crop_w"], img_data[ "crop_h"] = self.max_x - self.min_x, self.max_y - self.min_y return img_data
class COCOSemantic(SegmentationDataset): """COCO Semantic Segmentation Dataset for the Panoptic Segmentation task. Parameters ---------- root : string Path to COCO dataset folder. Default is './mscoco' split: string 'train', 'val' or 'test' transform : callable, optional A function that transforms the image Examples -------- >>> from mxnet.gluon.data.vision import transforms >>> # Transforms for Normalization >>> input_transform = transforms.Compose([ >>> transforms.ToTensor(), >>> transforms.Normalize([.485, .456, .406], [.229, .224, .225]), >>> ]) >>> # Create Dataset >>> trainset = gluoncv.data.COCOSegmentation(split='train', transform=input_transform) >>> # Create Training Loader >>> train_data = gluon.data.DataLoader( >>> trainset, 4, shuffle=True, last_batch='rollover', >>> num_workers=4) """ #CAT_LIST = [92, 93, 95, 100, 107, 109, 112, 118, 119, 122, 125, 128, 130, 133, 138, 141, 144, 145, 147, 148, # 149, 151, 154, 155, 156, 159, 161, 166, 168, 171, 175, 176, 177, 178, 180, 181, 184, 185, 186, # 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200] #NUM_CLASS = 53 #CAT_LIST = [92, 93, 95, 100, 107, 109, 112, 118, 119, 122, 125, 128, 130, 133, 138, 141, 144, 145, 147, 148, # 149, 151, 154, 155, 156, 159, 161, 166, 168, 171, 175, 176, 177, 178, 180, 181, 183, 184, 185, 186, # 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200] #NUM_CLASS = 54 CAT_LIST = [0, 92, 93, 95, 100, 107, 109, 112, 118, 119, 122, 125, 128, 130, 133, 138, 141, 144, 145, 147, 148, 149, 151, 154, 155, 156, 159, 161, 166, 168, 171, 175, 176, 177, 178, 180, 181, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200] NUM_CLASS = 54 def __init__(self, root=os.path.expanduser('~/.mxnet/datasets/coco'), split='train', mode=None, transform=None, **kwargs): super(COCOSemantic, self).__init__(root, split, mode, transform, **kwargs) # lazy import pycocotools from pycocotools.coco import COCO from pycocotools import mask if split == 'train': print('train set') ann_file = os.path.join(root, 'annotations/stuff_train2017.json') ids_file = os.path.join(root, 'annotations/sem_train_ids_54_0.mx') self.root = os.path.join(root, 'train2017') else: print('val set') ann_file = os.path.join(root, 'annotations/stuff_val2017.json') ids_file = os.path.join(root, 'annotations/sem_val_ids_54_0.mx') self.root = os.path.join(root, 'val2017') self.coco = COCO(ann_file) self.coco_mask = mask if os.path.exists(ids_file): with open(ids_file, 'rb') as f: self.ids = pickle.load(f) else: ids = list(self.coco.imgs.keys()) self.ids = self._preprocess(ids, ids_file) self.transform = transform def __getitem__(self, index): coco = self.coco img_id = self.ids[index] img_metadata = coco.loadImgs(img_id)[0] path = img_metadata['file_name'] img = Image.open(os.path.join(self.root, path)).convert('RGB') cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id)) mask = Image.fromarray(self._gen_seg_mask( cocotarget, img_metadata['height'], img_metadata['width'])) # synchrosized transform if self.mode == 'train': img, mask = self._sync_transform(img, mask) elif self.mode == 'val': img, mask = self._val_sync_transform(img, mask) else: assert self.mode == 'testval' img, mask = self._img_transform(img), self._mask_transform(mask) # general resize, normalize and toTensor if self.transform is not None: img = self.transform(img) """ # only used for generating mask ground truth outdir = 'gt_outdir' if not os.path.exists(outdir): os.makedirs(outdir) outname = 'gt_mask_' + str(index) + '_' + str(img_id) + '.png' #print(mask) mask_out = get_color_pallete(mask.asnumpy(), 'coco') mask_out.save(os.path.join(outdir, outname)) print(str(img_id) + ' is saved.') """ return img, mask def __len__(self): return len(self.ids) def _gen_seg_mask(self, target, h, w): mask = np.zeros((h, w), dtype=np.uint8) coco_mask = self.coco_mask for instance in target: #print(instance) #print(instance['segmentation'][0]) #-------------------------------------------------------------------- # right one for this version (1atest version) rle = self.coco.annToRLE(instance) m = coco_mask.decode(rle) #-------------------------------------------------------------------- # For original github version. #m = coco_mask.decode(instance['segmentation']) #-------------------------------------------------------------------- #print('decode is successful') # Here is anoter recommendation from gluoncv/data/mscoco/segmentation.py #rle = coco_mask.frPyObjects(instance['segmentation'], h, w) #m = coco_mask.decode(rle) #-------------------------------------------------------------------- cat = instance['category_id'] if cat in self.CAT_LIST: c = self.CAT_LIST.index(cat) else: continue if len(m.shape) < 3: mask[:, :] += (mask == 0) * (m * c) else: mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8) return mask def _preprocess(self, ids, ids_file): print("Preprocessing mask, this will take a while." + \ "But don't worry, it only run once for each split.") tbar = trange(len(ids)) new_ids = [] for i in tbar: img_id = ids[i] cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id)) #print(len(cocotarget)) img_metadata = self.coco.loadImgs(img_id)[0] #print(len(img_metadata)) #print(img_metadata['height']) #print(img_metadata['width']) mask = self._gen_seg_mask(cocotarget, img_metadata['height'], img_metadata['width']) # more than 1k pixels if (mask > 0).sum() > 1000: new_ids.append(img_id) tbar.set_description('Doing: {}/{}, got {} qualified images'. \ format(i, len(ids), len(new_ids))) print('Found number of qualified images: ', len(new_ids)) with open(ids_file, 'wb') as f: pickle.dump(new_ids, f) return new_ids @property def classes(self): """Category names.""" #return ('banner', 'blanket', 'bridge', 'cardboard', 'counter', 'curtain', 'door-stuff', 'floor-wood', # 'flower', 'fruit', 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform', # 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea', 'shelf', 'snow', 'stairs', # 'tent', 'towel', 'wall-brick', 'wall-stone', 'wall-tile', 'wall-wood', 'water-other', # 'window-blind', 'window-other', 'tree-merged', 'fence-merged', 'ceiling-merged', # 'sky-other-merged', 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged', # 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged', 'food-other-merged', # 'building-other-merged', 'rock-merged', 'wall-other-merged', 'rug-merged') #return ('banner', 'blanket', 'bridge', 'cardboard', 'counter', 'curtain', 'door-stuff', 'floor-wood', # 'flower', 'fruit', 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform', # 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea', 'shelf', 'snow', 'stairs', # 'tent', 'towel', 'wall-brick', 'wall-stone', 'wall-tile', 'wall-wood', 'water-other', # 'window-blind', 'window-other', 'other', 'tree-merged', 'fence-merged', 'ceiling-merged', # 'sky-other-merged', 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged', # 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged', 'food-other-merged', # 'building-other-merged', 'rock-merged', 'wall-other-merged', 'rug-merged') return ('thing', 'banner', 'blanket', 'bridge', 'cardboard', 'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit', 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform', 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea', 'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone', 'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other', 'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged', 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged', 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged', 'food-other-merged', 'building-other-merged', 'rock-merged', 'wall-other-merged', 'rug-merged')
# print('COCO categories: \n{}\n'.format(' '.join(nms))) # nms = set([cat['supercategory'] for cat in cats]) # print('COCO supercategories: \n{}'.format(' '.join(nms))) # get all images containing given categories, select one at random # catIds = coco.getCatIds(catNms=['person','dog','skateboard']); catIds = coco.getCatIds(catNms=['person']); imgIds = coco.getImgIds(catIds=catIds ); # imgIds = coco.getImgIds(imgIds = [324158]) img = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0] #load and display image imgpath = '%s/images/%s/%s'%(dataDir,dataType,img['file_name']) labelspath = imgpath.replace('.jpg', '.txt') print(os.path.abspath(imgpath)) I = io.imread(imgpath) h,w,x = I.shape print(I.shape) annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None) anns = coco.loadAnns(annIds) for ann in anns: rle = coco.annToRLE(ann) bboxes = maskUtils.toBbox(rle) print(bboxes) ## load and display instance annotations # plt.imshow(I); plt.axis('off') # coco.showAnns(anns) # plt.show()
def main(): # Use first line of file docstring as description if it exists. parser = argparse.ArgumentParser( description=__doc__.split('\n')[0] if __doc__ else '', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--lvis', type=Path, required=True, help='lvis json path') parser.add_argument( '--coco', type=Path, required=True, help='coco json path') parser.add_argument( '--mapping', type=Path, required=True, help='synset mapping from coco to lvis') parser.add_argument('--output', type=Path, required=True) parser.add_argument( '--iou-thresh', default=0.7, type=float, help=('If a COCO annotation overlaps with an LVIS annotations with ' 'IoU over this threshold, we use only the LVIS annotation.')) args = parser.parse_args() args.output.parent.mkdir(exist_ok=True, parents=True) coco = COCO(args.coco) lvis = COCO(args.lvis) # transfer COCO category name LVIS according to synset # synset format # "bench": { # "coco_cat_id": 15, # "meaning": "a long seat for more than one person", # "synset": "bench.n.01"} synset2lvis = {cat['syn set']: cat['id'] for cat in lvis.cats.values()} coco2lvis = {} with open(args.mapping, 'r') as f: mapping = json.load(f) for cat in coco.cats.values(): mapped = mapping[cat['name']] assert mapped['coco_cat_id'] == cat['id'] synset = mapped['synset'] if synset not in synset2lvis: print(f'Found no LVIS category for "{cat["name"]}" from COCO') continue coco2lvis[cat['id']] = synset2lvis[synset] for img_id, _ in coco.imgs.items(): if img_id in lvis.imgs: coco_name = coco.imgs[img_id]['file_name'] lvis_name = lvis.imgs[img_id]['file_name'] assert coco_name in lvis_name else: print(f'Image {img_id} in COCO, but not annotated in LVIS') # add coco annotations at the end of lvis's lvis_highest_id = max(x['id'] for x in lvis.anns.values()) ann_id_generator = itertools.count(lvis_highest_id + 1) new_annotations = [] for img_id, lvis_anns in tqdm(lvis.imgToAnns.items()): if img_id not in coco.imgToAnns: print(f'Image {img_id} in LVIS, but not annotated in COCO') continue coco_anns = coco.imgToAnns[img_id] # Compute IoU between coco_anns and lvis_anns # Shape (num_coco_anns, num_lvis_anns) mask_iou = mask_util.iou([coco.annToRLE(x) for x in coco_anns], [lvis.annToRLE(x) for x in lvis_anns], pyiscrowd=np.zeros(len(lvis_anns))) does_overlap = mask_iou.max(axis=1) > args.iou_thresh to_add = [] for i, ann in enumerate(coco_anns): if does_overlap[i]: continue if ann['category_id'] not in coco2lvis: continue ann['category_id'] = coco2lvis[ann['category_id']] ann['id'] = next(ann_id_generator) to_add.append(ann) new_annotations.extend(to_add) with open(args.lvis, 'r') as f: merged = json.load(f) merged['annotations'].extend(new_annotations) with open(args.output, 'w') as f: json.dump(merged, f)
class Cowbird_Dataset(torch.utils.data.Dataset): """ Dataset class for instance level task, including detection, instance segmentation, and single view reconstruction. Since data are in COCO format, this class utilize COCO API to do most of the dataloading. """ def __init__(self, root, annfile, scale_factor=0.25, output_size=256, transform=None): self.root = root self.coco = COCO(annfile) self.imgIds = self.coco.getImgIds(catIds=1) self.imgIds.sort() self.scale_factor = scale_factor self.output_size = output_size self.transform = transform self.data = self.get_data() def __getitem__(self, index): data = self.data[index] x, y, w, h = data['bbox'] # input image img = cv2.imread(data['imgpath']) img = img[y:y+h, x:x+w] img = cv2.resize(img, (self.output_size, self.output_size)) if self.transform is not None: img = self.transform(img) else: img = torch.tensor(img).permute(2,0,1).float()/255 # keypoints kpts = data['keypoints'].clone() valid = kpts[:,-1] > 0 kpts[valid,:2] -= torch.tensor([x, y]) kpts[valid,:2] *= self.output_size / w.float() # mask mask = decode(data['rle']) mask = mask[y:y+h, x:x+w] mask = cv2.resize(mask, (self.output_size, self.output_size)) mask = torch.tensor(mask).long() # meta size = data['size'] * self.output_size / w.float() meta = { 'imgpath': data['imgpath'], 'size': size } return img, kpts, mask, meta def __len__(self): return len(self.data) def get_data(self): data = [] for imgId in self.imgIds: data.extend(self.load_data(imgId)) return data def load_data(self, imgId): img_dict = self.coco.loadImgs(imgId)[0] width = img_dict['width'] height = img_dict['height'] annIds = self.coco.getAnnIds(imgIds=imgId) anns = self.coco.loadAnns(annIds) data = [] for ann in anns: path = self.path_from_Id(imgId) kpts = torch.tensor(ann['keypoints']).float().reshape(-1, 3) bbox = dialate_boxes([ann['bbox']], s=self.scale_factor)[0] rle = self.coco.annToRLE(ann) size = max(ann['bbox'][2:]) data.append({ 'imgpath': path, 'bbox': bbox, 'keypoints': kpts, 'rle': rle, # to save memory, we store rle and convert to mask on the fly 'size': size }) return data def path_from_Id(self, imgId): img_dict = self.coco.loadImgs(imgId)[0] filename = img_dict['file_name'] path = os.path.join(self.root, filename) return path