class DataWorker: """Using for loading ms coco dataset (only train and val parts), filter data for person category (because of solving task), create generator for batch learning """ train_folder = 'train2017' test_folder = 'val2017' annotation_folder = 'annotations' # person category_id = 1 def __init__(self, data_path, seed=7): """Read images description and annotations about it :data_path (str): path to dataset's folder :seed (int): random seed """ self.seed = seed if os.path.islink(data_path): data_path = os.readlink(data_path) self.data_path = data_path self.train_folder = os.path.join(data_path, self.train_folder) self.test_folder = os.path.join(data_path, self.test_folder) self.annotation_folder = os.path.join(data_path, self.annotation_folder) self.coco_train = COCO( os.path.join(self.annotation_folder, 'instances_train2017.json')) self.coco_test = COCO( os.path.join(self.annotation_folder, 'instances_val2017.json')) # load information about images with class label self.train_images = self.coco_train.loadImgs( ids=self.coco_train.getImgIds(catIds=self.category_id)) self.test_images = self.coco_test.loadImgs( ids=self.coco_test.getImgIds(catIds=self.category_id)) # load annotations for loaded images self.train_annotations = { img_desc['id']: self.coco_train.loadAnns( self.coco_train.getAnnIds(imgIds=img_desc['id'], iscrowd=False, catIds=self.category_id)) for img_desc in self.train_images } self.test_annotations = { img_desc['id']: self.coco_test.loadAnns( self.coco_test.getAnnIds(imgIds=img_desc['id'], iscrowd=False, catIds=self.category_id)) for img_desc in self.test_images } self.train_images, self.val_images = train_test_split( self.train_images, test_size=0.2) @property def train_shape(self): """Return train shape :return (int): return train shape """ return len(self.train_images) @property def val_shape(self): """Return test shape :return (int): return test shape """ return len(self.val_images) @property def test_shape(self): """Return test shape :return (int): return test shape """ return len(self.test_images) def load_image_mask(self, image_desc): """Load image and corresponding mask :image_desc (dict): description of image in COCO format :return (tuple([N, M, 3], [N, M])): tuple of image and mask """ # reorder, because cv2 use BGR format if image_desc['id'] in self.train_annotations: image = cv2.imread( os.path.join(self.train_folder, image_desc['file_name'])) masks = [ self.coco_train.annToMask(i_img_ann) for i_img_ann in self.train_annotations[image_desc['id']] ] else: image = cv2.imread( os.path.join(self.test_folder, image_desc['file_name'])) masks = [ self.coco_test.annToMask(i_img_ann) for i_img_ann in self.test_annotations[image_desc['id']] ] image = image[:, :, [2, 1, 0]] total_mask = np.bitwise_or.reduce(masks) return (image, total_mask) def batch_loader(self, images_descriptions, batch_size, height, width): """Load batches of images and resize with padding to given shape :images_descriptions (dict): descriptions of images in coco format :batch_size (int): size of batch :height (int): height of proccessed images :width (int): width of proccesses images :return (generator): generator of batches with images and masks tuple of [batch_size, height, width, 3] and [batch_size, height, width] """ for start_ind in range(0, len(images_descriptions), batch_size): images = np.empty([0, height, width, 3], dtype=np.uint8) masks = np.empty([0, height, width], dtype=np.float32) for image_desc in images_descriptions[start_ind:start_ind + batch_size]: image, mask = self.load_image_mask(image_desc) shaped_image = resize_pad(image, height, width) shaped_mask = resize_pad(mask, height, width) images = np.append(images, [shaped_image], axis=0) masks = np.append(masks, [shaped_mask], axis=0) yield (images, masks) def batch_augmentation(self, image_generator, augment_args): """Augmentate batch of images :image_generator (generator): generator with batches of images tuple of [batch_size, height, width, 3] and [batch_size, height, width] :augment_args (dict): params for augmentation :return (generator): generator with batches of augmented images tuple of [batch_size, height, width, 3] and [batch_size, height, width] """ augment = ImageDataGenerator(**augment_args) for images, masks in image_generator: stacked = np.concatenate([images, masks[:, :, :, np.newaxis]], axis=-1) aug_batch = augment.flow(stacked, seed=self.seed, batch_size=stacked.shape[0], shuffle=False) for aug_stacked in aug_batch: aug_images = aug_stacked[:, :, :, :3].astype(np.uint8) aug_masks = aug_stacked[:, :, :, 3] #[:, :, :, np.newaxis] yield (aug_images, aug_masks) def batch_generator(self, images_descriptions, batch_size=100, height=512, width=512, augment_args=None): """Pipeline, which take image description and proccessing information, Load it by batches, augmented and normalize :images_descriptions (dict): descriptions of images in coco format :batch_size (int): size of batch :height (int): height of proccessed images :width (int): width of proccesses images :augment_args (dict): params for augmentation :return (generator): generator with batches of images tuple of [batch_size, height, width, 3] and [batch_size, height, width] """ if augment_args is None: augment_args = { 'rotation_range': 15, 'width_shift_range': 0.1, 'height_shift_range': 0.1, 'zoom_range': 0.25, 'horizontal_flip': True, 'brightness_range': [0.75, 1.25], 'fill_mode': 'constant' } batch_gen = self.batch_augmentation( self.batch_loader(images_descriptions, batch_size, height, width), augment_args) for images, masks in batch_gen: yield (images, masks)
class InpaintingDatasetTest: def initialize(self, opt, model): self.opt = opt self.root = opt.dataroot self.annFile = opt.ann_path self.coco = COCO(self.annFile) self.dataset_size = len(IMG_PAIRS) self.model = model def load_coco_image(self, image_id, object_id=None, use_seg=False): image_info = self.coco.loadImgs(ids=image_id)[0] image_url = image_info['coco_url'] image_url_split = image_url.split('/') image_path = '{}/{}'.format(self.root, image_url_split[-1]) image = scipy.misc.imread(image_path, mode='RGB') mask = None if object_id is not None: annIds = self.coco.getAnnIds(imgIds=image_info['id'], iscrowd=None) anns = self.coco.loadAnns(annIds) mask = self.coco.annToMask(anns[object_id]) if use_seg is True: mask = get_segmentation(image, mask, self.model) return image, mask, image_path def compute_bounding_box(self, object_mask): object_mask[object_mask > 0] = 1 # fineSize x fineSize object_pixels = np.where(object_mask != 0) bbox = np.min(object_pixels[0]), np.max(object_pixels[0]), \ np.min(object_pixels[1]), np.max(object_pixels[1]) object_x = bbox[2] object_y = bbox[0] object_height = bbox[1] - object_y + 1 object_width = bbox[3] - object_x + 1 return object_x, object_y, object_height, object_width def get_item(self, index): # load object image and mask object_image, object_mask, image_path = \ self.load_coco_image(image_id=IMG_PAIRS[index]['object_img_id'], object_id=IMG_PAIRS[index]['object_id'], use_seg=self.opt.use_segmentation) background_image, _, _ = \ self.load_coco_image(image_id=IMG_PAIRS[index]['background_img_id']) object_image_height, object_image_width, _ = \ object_image.shape # find bounding box _, _, object_ori_height, object_ori_width = \ self.compute_bounding_box(object_mask) # Compute the new size of the image based on the size of inpainted # object. object_image_resize_height = int( object_image_height * IMG_PAIRS[index]['object_composite_height'] / object_ori_height) object_image_resize_width = int( object_image_width * IMG_PAIRS[index]['object_composite_width'] / object_ori_width) # Inpainting object_image_resized = scipy.misc.imresize( object_image, [object_image_resize_height, object_image_resize_width]) object_image_resized_chw = np.rollaxis(object_image_resized, 2, 0) # 3 x fineSize x fineSize # resize object mask mask_resized = scipy.misc.imresize( object_mask, [object_image_resize_height, object_image_resize_width]) mask_resized[mask_resized > 0] = 1 # fineSize x fineSize # find bounding box mask_resized_hw = mask_resized.copy() object_x, object_y, object_height, object_width = \ self.compute_bounding_box(mask_resized) mask_resized = np.tile(mask_resized, (3, 1, 1)) # normalize object image object_image_resized_chw = object_image_resized_chw / 122.5 - 1 # get the image patch that contains the object. object_image_patch_with_bg = object_image_resized_chw[:, object_y: object_y + object_height, object_x: object_x + object_width] # noqa 501 object_mask_patch = mask_resized[:, object_y:object_y + object_height, object_x:object_x + object_width] object_image_patch_no_bg = np.copy(object_image_patch_with_bg) object_image_patch_no_bg[object_mask_patch == 0] = 0 # resize and normalize the background image. background_image_resized = scipy.misc.imresize( background_image, [self.opt.fineSize, self.opt.fineSize ]) # fineSize x fineSize x 3 # noqa 501 background_image_resized_chw = np.rollaxis(background_image_resized, 2, 0) # noqa 501 background_image_resized_chw = background_image_resized_chw / 122.5 - 1 # image composition. We remove the background of the image patch. new_object_x = IMG_PAIRS[index]['object_composite_x'] new_object_y = IMG_PAIRS[index]['object_composite_y'] image_composite_no_bg = np.copy(background_image_resized_chw) image_composite_no_bg[:, new_object_y:new_object_y + object_height, new_object_x:new_object_x + object_width] = object_image_patch_no_bg # noqa 501 # Image composition. We keep the background of the image patch. image_composite_with_bg = np.copy(background_image_resized_chw) image_composite_with_bg[:, new_object_y:new_object_y + object_height, new_object_x:new_object_x + object_width] = object_image_patch_with_bg # noqa 501 mask_composite = np.zeros(image_composite_no_bg.shape) mask_composite[:, new_object_y:new_object_y + object_height, new_object_x:new_object_x + object_width] = 1 - object_mask_patch # noqa 501 mask_composite_object = np.zeros(image_composite_no_bg.shape) mask_composite_object[:, new_object_y:new_object_y + object_height, new_object_x:new_object_x + object_width] = \ object_mask_patch mask_composite = torch.from_numpy(mask_composite).float() image_composite_no_bg = torch.from_numpy(image_composite_no_bg).float() background_image_resized_chw = torch.from_numpy( background_image_resized_chw).float() mask_composite_object = torch.from_numpy(mask_composite_object).float() image_composite_with_bg = torch.from_numpy( image_composite_with_bg).float() feat_tensor = 0 image_composite_no_bg = image_composite_no_bg.unsqueeze(0) mask_composite = mask_composite.unsqueeze(0) background_image_resized_chw = background_image_resized_chw.unsqueeze( 0) image_composite_with_bg = image_composite_with_bg.unsqueeze(0) mask_composite_object = mask_composite_object.unsqueeze(0) input_dict = { 'input': image_composite_no_bg, 'mask': mask_composite, 'image': background_image_resized_chw, 'feat': feat_tensor, 'path': [image_path], 'image_composite_with_bg': image_composite_with_bg, 'mask_composite_object': mask_composite_object } return input_dict def __len__(self): return len(IMG_PAIRS) def name(self): return 'InpaintingDatasetGuided'
def generate_json_mask(ann_path, json_path, mask_dir, filelist_path, masklist_path): COCO_Order = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] COCO_TO_OURS = [0, 15, 14, 17, 16, 5, 2, 6, 3, 7, 4, 11, 8, 12, 9, 13, 10] coco = COCO(ann_path) ids = list(coco.imgs.keys()) lists = [] filelist_fp = open(filelist_path, 'w') masklist_fp = open(masklist_path, 'w') for i, img_id in enumerate(ids): ann_ids = coco.getAnnIds(imgIds=img_id) img_anns = coco.loadAnns(ann_ids) numPeople = len(img_anns) name = coco.imgs[img_id]['file_name'] height = coco.imgs[img_id]['height'] width = coco.imgs[img_id]['width'] persons = [] person_centers = [] for p in range(numPeople): if img_anns[p]['num_keypoints'] < 5 or img_anns[p][ 'area'] < 32 * 32: continue kpt = img_anns[p]['keypoints'] dic = dict() # person center person_center = [ img_anns[p]['bbox'][0] + img_anns[p]['bbox'][2] / 2.0, img_anns[p]['bbox'][1] + img_anns[p]['bbox'][3] / 2.0 ] scale = img_anns[p]['bbox'][3] / 368.0 # skip this person if the distance to exiting person is too small flag = 0 for pc in person_centers: dis = math.sqrt((person_center[0] - pc[0]) * (person_center[0] - pc[0]) + (person_center[1] - pc[1]) * (person_center[1] - pc[1])) if dis < pc[2] * 0.3: flag = 1 break if flag == 1: continue dic['objpos'] = person_center dic['keypoints'] = np.zeros((17, 3)).tolist() dic['scale'] = scale for part in range(17): dic['keypoints'][part][0] = kpt[part * 3] dic['keypoints'][part][1] = kpt[part * 3 + 1] # visiable is 1, unvisiable is 0 and not labeled is 2 if kpt[part * 3 + 2] == 2: dic['keypoints'][part][2] = 1 elif kpt[part * 3 + 2] == 1: dic['keypoints'][part][2] = 0 else: dic['keypoints'][part][2] = 2 persons.append(dic) person_centers.append( np.append(person_center, max(img_anns[p]['bbox'][2], img_anns[p]['bbox'][3]))) if len(persons) > 0: filelist_fp.write(name + '\n') info = dict() info['filename'] = name info['info'] = [] cnt = 1 for person in persons: dic = dict() dic['pos'] = person['objpos'] dic['keypoints'] = np.zeros((18, 3)).tolist() dic['scale'] = person['scale'] for i in range(17): dic['keypoints'][ COCO_TO_OURS[i]][0] = person['keypoints'][i][0] dic['keypoints'][ COCO_TO_OURS[i]][1] = person['keypoints'][i][1] dic['keypoints'][ COCO_TO_OURS[i]][2] = person['keypoints'][i][2] dic['keypoints'][1][0] = (person['keypoints'][5][0] + person['keypoints'][6][0]) * 0.5 dic['keypoints'][1][1] = (person['keypoints'][5][1] + person['keypoints'][6][1]) * 0.5 if person['keypoints'][5][2] == person['keypoints'][6][2]: dic['keypoints'][1][2] = person['keypoints'][5][2] elif person['keypoints'][5][2] == 2 or person['keypoints'][6][ 2] == 2: dic['keypoints'][1][2] = 2 else: dic['keypoints'][1][2] = 0 info['info'].append(dic) lists.append(info) mask_all = np.zeros((height, width), dtype=np.uint8) mask_miss = np.zeros((height, width), dtype=np.uint8) flag = 0 for p in img_anns: if p['iscrowd'] == 1: mask_crowd = coco.annToMask(p) temp = np.bitwise_and(mask_all, mask_crowd) mask_crowd = mask_crowd - temp flag += 1 continue else: mask = coco.annToMask(p) mask_all = np.bitwise_or(mask, mask_all) if p['num_keypoints'] <= 0: mask_miss = np.bitwise_or(mask, mask_miss) if flag < 1: mask_miss = np.logical_not(mask_miss) elif flag == 1: mask_miss = np.logical_not(np.bitwise_or( mask_miss, mask_crowd)) mask_all = np.bitwise_or(mask_all, mask_crowd) else: raise Exception('crowd segments > 1') np.save(os.path.join(mask_dir, name.split('.')[0] + '.npy'), mask_miss) masklist_fp.write( os.path.join(mask_dir, name.split('.')[0] + '.npy') + '\n') if i % 1000 == 0: print "Processed {} of {}".format(i, len(ids)) masklist_fp.close() filelist_fp.close() print 'write json file' fp = open(json_path, 'w') fp.write(json.dumps(lists)) fp.close() print 'done!'
class Datahandler_COCO(): def __init__(self, image_dir, annotation_file): self.dataset, self.anns, self.cats, self.imgs = dict(), dict(), dict( ), dict() self.annotation_file = annotation_file self.image_dir = image_dir print("loading dataset") dataset = json.load(open(self.annotation_file, 'r')) self.coco = COCO(self.annotation_file) # Load all classes (Only Building in this version) self.classIds = self.coco.getCatIds() #print(self.classIds) # Load all images self.image_ids = list(self.coco.imgs.keys()) #print(self.image_ids) for image_id in self.image_ids: self.anns[image_id] = [] self.categories = self.coco.loadCats([100]) #print(self.categories) for object in dataset["annotations"]: self.anns[object["image_id"]].append(object) #print (object) #print(self.anns) def get_mask(self, id): temp = self.anns[id] m = self.coco.annToMask(temp[0]) for ob in temp[1:]: m1 = self.coco.annToMask(ob) m = m | m1 #print(temp) return m def make_batches(self, batchsize=4, Train=True): batch_images = [] batch_masks = [] list = self.image_ids #print(self.image_ids) while True: for id in list: #print(id) filename = self.coco.imgs[id]["file_name"] path = os.path.join(self.image_dir, filename) print(path) img = cv2.imread(path) mask = self.get_mask(id) img = cv2.resize(img, (320, 320)) mask = cv2.resize(mask, (320, 320)) batch_images.append(img) batch_masks.append(mask) #cv2.imwrite(filename+"gt.jpg",img) #cv2.imwrite(filename+"b.jpg",mask*255) if len(batch_images) == batchsize: yield (np.array(batch_images), np.expand_dims(np.array(batch_masks), axis=-1)) batch_images = [] batch_masks = [] def get_batch(self, batch_size=1, train=True): a = next(self.make_batches(batch_size, train)) for b in a: print(b) return np.array(b), np.expand_dims(np.array(b), axis=-1)
'min_crop_size': 0.3 } auth = ExtraAugmentation_new(distort, randrot, expand, randcrop) # for i in range(10): imn = imgp + res.loadImgs(imgids[i])[0]['file_name'] im = np.array(Image.open(imn).convert('RGB')) im = im[..., ::-1] # convert to GBR annids = res.getAnnIds(imgids[i]) anns = res.loadAnns(annids) boxes = [] masks = [] labels = [] for ann in anns: labels.append(ann['category_id']) boxes.append(ann['bbox']) masks.append(res.annToMask(ann)) boxes = np.array(boxes) boxes[:, 2] = boxes[:, 0] + boxes[:, 2] boxes[:, 3] = boxes[:, 1] + boxes[:, 3] # segs = np.array(segs) # segs is still a list for they may have different lenth labels = np.array(labels) imout = im.copy() for box, ma in zip(boxes, masks): cv2.rectangle(imout, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 1) imout[ma > 0] = imout[ma > 0] * 0.5 + ma[ma > 0, None] * np.array([255, 0, 0]) * 0.5 cv2.imwrite('a{}_0.jpg'.format(i), imout.astype(np.uint8)) segs = [sum(masks)] cv2.imwrite('a{}_0_s.jpg'.format(i), segs[0] * 255) im, boxes, masks, segs, labels = auth(im, boxes, masks, segs, labels) im_o = im.copy() print(im_o.shape)
class AugmentationDataset(CustomDataset): CLASSES = ('person') def load_annotations(self, ann_file): self.coco = COCO(ann_file) self.cat_ids = self.coco.getCatIds() self.cat2label = { cat_id: i + 1 for i, cat_id in enumerate(self.cat_ids) } self.img_ids = self.coco.getImgIds() img_infos = [] for i in self.img_ids: info = self.coco.loadImgs([i])[0] info['filename'] = info['file_name'] img_infos.append(info) return img_infos def get_ann_info(self, idx): img_id = self.img_infos[idx]['id'] ann_ids = self.coco.getAnnIds(imgIds=[img_id]) ann_info = self.coco.loadAnns(ann_ids) return self._parse_ann_info(ann_info, self.with_mask) def _filter_imgs(self, min_size=32): """Filter images too small or without ground truths.""" valid_inds = [] ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values()) for i, img_info in enumerate(self.img_infos): if self.img_ids[i] not in ids_with_ann: continue if min(img_info['width'], img_info['height']) >= min_size: valid_inds.append(i) return valid_inds def _parse_ann_info(self, ann_info, with_mask=True): """Parse bbox and mask annotation. Args: ann_info (list[dict]): Annotation info of an image. with_mask (bool): Whether to parse mask annotations. Returns: dict: A dict containing the following keys: bboxes, bboxes_ignore, labels, masks, mask_polys, poly_lens. """ gt_bboxes = [] gt_labels = [] gt_bboxes_ignore = [] # Two formats are provided. # 1. mask: a binary map of the same size of the image. # 2. polys: each mask consists of one or several polys, each poly is a # list of float. if with_mask: gt_masks = [] gt_mask_polys = [] gt_poly_lens = [] for i, ann in enumerate(ann_info): if ann.get('ignore', False): continue x1, y1, w, h = ann['bbox'] if ann['area'] <= 0 or w < 1 or h < 1: continue bbox = [x1, y1, x1 + w - 1, y1 + h - 1] if ann['iscrowd']: gt_bboxes_ignore.append(bbox) else: gt_bboxes.append(bbox) gt_labels.append(self.cat2label[ann['category_id']]) if with_mask: gt_masks.append(self.coco.annToMask(ann)) mask_polys = [ p for p in ann['segmentation'] if len(p) >= 6 ] # valid polygons have >= 3 points (6 coordinates) poly_lens = [len(p) for p in mask_polys] gt_mask_polys.append(mask_polys) gt_poly_lens.extend(poly_lens) if gt_bboxes: gt_bboxes = np.array(gt_bboxes, dtype=np.float32) gt_labels = np.array(gt_labels, dtype=np.int64) else: gt_bboxes = np.zeros((0, 4), dtype=np.float32) gt_labels = np.array([], dtype=np.int64) if gt_bboxes_ignore: gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) else: gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) ann = dict( bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) if with_mask: ann['masks'] = gt_masks # poly format is not used in the current implementation ann['mask_polys'] = gt_mask_polys ann['poly_lens'] = gt_poly_lens return ann
class CrowAiBuildingDataset(torch.utils.data.Dataset): def __init__(self, images_dir, annotation_file, use_mask=False, transforms=None): self.images_dir = images_dir self.annotation_file = annotation_file self.transform = transforms self.use_mask = use_mask self.coco = COCO(self.annotation_file) self.class_Ids = self.coco.getCatIds() self.image_ids = self.coco.getImgIds() def __getitem__(self, i): annos = self.coco.getAnnIds(imgIds=[self.image_ids[i]], catIds=self.class_Ids, iscrowd=None) anns = self.coco.loadAnns(annos) img_name = self.coco.imgs[self.image_ids[i]]['file_name'] image = cv2.imread(os.path.join(self.images_dir, img_name)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) boxes = [] labels = [] masks = [] for ann in anns: seg = ann["segmentation"] x_points = seg[0][::2] y_points = seg[0][1::2] x11 = min(x_points) x22 = max(x_points) y11 = min(y_points) y22 = max(y_points) category_id = 1 if x11 != x22 and y11 != y22: bbox = [x11, y11, x22, y22] boxes.append(bbox) labels.append(category_id) if self.use_mask: mask = self.coco.annToMask(ann) masks.append(mask) image_id = torch.tensor([i]) iscrowd = torch.zeros((len(annos), ), dtype=torch.int64) # # print(boxes) boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.as_tensor(labels, dtype=torch.int64) if self.use_mask: masks = torch.as_tensor(masks, dtype=torch.uint8) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) target = {} target["boxes"] = boxes target["labels"] = labels if self.use_mask: target["masks"] = masks target["image_id"] = image_id target["iscrowd"] = iscrowd target["area"] = area if self.transform is not None: image, target = self.transform(image, target) return image, target def __len__(self): return len(self.coco.getImgIds())
def coco_image_segmentation_stats(seg_mask_output_paths, annotation_paths, seg_mask_image_paths, verbose): for (seg_mask_path, annFile, image_path) in zip(seg_mask_output_paths, annotation_paths, seg_mask_image_paths): print('Loading COCO Annotations File: ', annFile) print('Segmentation Mask Output Folder: ', seg_mask_path) print('Source Image Folder: ', image_path) stats_json = os.path.join(seg_mask_path, 'image_segmentation_class_stats.json') print('Image stats will be saved to:', stats_json) cat_csv = os.path.join(seg_mask_path, 'class_counts_over_sum_category_counts.csv') print('Category weights will be saved to:', cat_csv) coco = COCO(annFile) print('Annotation file info:') coco.info() print('category ids, not including 0 for background:') print(coco.getCatIds()) # display COCO categories and supercategories cats = coco.loadCats(coco.getCatIds()) nms = [cat['name'] for cat in cats] print('categories: \n\n', ' '.join(nms)) nms = set([cat['supercategory'] for cat in cats]) print('supercategories: \n', ' '.join(nms)) img_ids = coco.getImgIds() use_original_dims = True # not target_shape max_ids = max(ids()) + 1 # add background category # 0 indicates no category (not even background) for counting bins max_bin_count = max_ids + 1 bin_count = np.zeros(max_bin_count) total_pixels = 0 print('Calculating image segmentation stats...') progbar = Progbar(len(img_ids), verbose=verbose) i = 0 for idx, img_id in enumerate(img_ids): img = coco.loadImgs(img_id)[0] i += 1 progbar.update(i) ann_ids = coco.getAnnIds(imgIds=img['id'], iscrowd=None) anns = coco.loadAnns(ann_ids) target_shape = (img['height'], img['width'], max_ids) # print('\ntarget_shape:', target_shape) mask_one_hot = np.zeros(target_shape, dtype=np.uint8) # Note to only count background pixels once, we define a temporary # null class of 0, and shift all class category ids up by 1 mask_one_hot[:, :, 0] = 1 # every pixel begins as background for ann in anns: mask_partial = coco.annToMask(ann) mask_one_hot[mask_partial > 0, ann['category_id']] = ann['category_id'] + 1 mask_one_hot[mask_partial > 0, 0] = 0 # print( mask_one_hot) # print('initial bin_count shape:', np.shape(bin_count)) # flat_mask_one_hot = mask_one_hot.flatten() bincount_result = np.bincount(mask_one_hot.flatten()) # print('bincount_result TYPE:', type(bincount_result)) # np.array(np.ndarray.flatten(np.bincount(np.ndarray.flatten(np.array(mask_one_hot)).astype(int))).resize(max_bin_count)) # print('bincount_result:', bincount_result) # print('bincount_result_shape', np.shape(bincount_result)) length = int(np.shape(bincount_result)[0]) zeros_to_add = max_bin_count - length z = np.zeros(zeros_to_add) # print('zeros_to_add TYPE:', type(zeros_to_add)) # this is a workaround because for some strange reason the # output type of bincount couldn't interact with other numpy arrays bincount_result_long = bincount_result.tolist() + z.tolist() # bincount_result = bincount_result.resize(max_bin_count) # print('bincount_result2:', bincount_result_long) # print('bincount_result2_shape',bincount_result_long) bin_count = bin_count + np.array(bincount_result_long) total_pixels += (img['height'] * img['width']) print('Final Tally:') # shift categories back down by 1 bin_count = bin_count[1:] category_ids = range(bin_count.size) sum_category_counts = np.sum(bin_count) # sum will be =1 as a pixel can be in multiple categories category_counts_over_sum_category_counts = \ np.true_divide(bin_count.astype(np.float64), sum_category_counts) np.savetxt(cat_csv, category_counts_over_sum_category_counts) # sum will be >1 as a pixel can be in multiple categories category_counts_over_total_pixels = \ np.true_divide(bin_count.astype(np.float64), total_pixels) # less common categories have more weight, sum = 1 category_counts_p_complement = \ [1 - x if x > 0.0 else 0.0 for x in category_counts_over_sum_category_counts] # less common categories have more weight, sum > 1 total_pixels_p_complement = \ [1 - x if x > 0.0 else 0.0 for x in category_counts_over_total_pixels] print(bin_count) stat_dict = { 'total_pixels': total_pixels, 'category_counts': dict(zip(category_ids, bin_count)), 'sum_category_counts': sum_category_counts, 'category_counts_over_sum_category_counts': dict(zip(category_ids, category_counts_over_sum_category_counts)), 'category_counts_over_total_pixels': dict(zip(category_ids, category_counts_over_total_pixels)), 'category_counts_p_complement': dict(zip(category_ids, category_counts_p_complement)), 'total_pixels_p_complement': dict(zip(category_ids, total_pixels_p_complement)), 'ids': ids(), 'categories': categories() } print(stat_dict) with open(stats_json, 'w') as fjson: json.dump(stat_dict, fjson, ensure_ascii=False)
def preproc(mode): dataset_dir = 'dataset' #os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'dataset')) val_anno_path = os.path.join( dataset_dir, "annotations/person_keypoints_%s2017.json" % mode) val_images_dir = os.path.join(dataset_dir, "%s2017" % mode) val_masks_dir = os.path.join(dataset_dir, "%smask2017" % mode) if not os.path.exists(val_masks_dir): os.makedirs(val_masks_dir) coco = COCO(val_anno_path) ids = list(coco.imgs.keys()) for i, img_id in enumerate(ids): ann_ids = coco.getAnnIds(imgIds=img_id) img_anns = coco.loadAnns(ann_ids) img_path = os.path.join(val_images_dir, "%012d.jpg" % (img_id)) mask_miss_path = os.path.join(val_masks_dir, "mask_miss_%012d.png" % img_id) mask_all_path = os.path.join(val_masks_dir, "mask_all_%012d.png" % img_id) img = cv2.imread(img_path) h, w, c = img.shape mask_all = np.zeros((h, w), dtype=np.uint8) mask_miss = np.zeros((h, w), dtype=np.uint8) flag = 0 for p in img_anns: seg = p["segmentation"] if p["iscrowd"] == 1: mask_crowd = coco.annToMask(p) temp = np.bitwise_and(mask_all, mask_crowd) mask_crowd = mask_crowd - temp flag += 1 continue else: mask = coco.annToMask(p) mask_all = np.bitwise_or(mask, mask_all) if p["num_keypoints"] <= 0: mask_miss = np.bitwise_or(mask, mask_miss) if flag < 1: mask_miss = np.logical_not(mask_miss) elif flag == 1: mask_miss = np.logical_not(np.bitwise_or(mask_miss, mask_crowd)) mask_all = np.bitwise_or(mask_all, mask_crowd) else: raise Exception("crowd segments > 1") cv2.imwrite(mask_miss_path, mask_miss * 255) cv2.imwrite(mask_all_path, mask_all * 255) if (i % 1000 == 0): print("Processed %d of %d" % (i, len(ids))) print("Done !!!")
class coco_background_Dataset(object): def __init__(self, coco_root, datasettype, transforms=None, num_classes=2): """ :param coco_root: :param model: train or val :param transforms: """ # 这里传个来自pytorch的transform函数实现数据变换 self.transforms = transforms self.annpath = os.path.join(coco_root, "annotations", 'instances_' + datasettype + '2017.json') self.image_path = os.path.join(coco_root, datasettype + "2017") self.coco = COCO(self.annpath) self.num_classes = num_classes self.image_ids = self.coco.getImgIds() def __getitem__(self, idx): # 按tumor_slices_id取一个数据 tumor_slices_id = self.image_ids[idx] imgInfo = self.coco.loadImgs(tumor_slices_id)[0] # 【0】用于取出元素 # print(f'图像{imgId}的信息如下:\n{imgInfo}') imPath = os.path.join(self.image_path, imgInfo['file_name']) # load image img = Image.open(imPath).convert("RGB") # 获取该图像对应的一系列anns的Id annIds = self.coco.getAnnIds(imgIds=imgInfo['id']) # print(f'图像{imgInfo["id"]}包含{len(annIds)}个ann对象,分别是:\n{annIds}') anns = self.coco.loadAnns(annIds) num_objs = len(anns) masks = [] boxes = [] labels = [] for ann in anns: mask = self.coco.annToMask(ann) # 01mask mask = np.asarray(mask) # 每个ann id对应一个目标 # coco mask是polygon格式编码的,不是01mask pos = np.where(mask) xmin = np.min(pos[1]) xmax = np.max(pos[1]) ymin = np.min(pos[0]) ymax = np.max(pos[0]) boxes.append([xmin, ymin, xmax, ymax]) # 与coco格式不同!!!!!!!!!!!! # COCO_bbox = [xmin, ymin, width, height] 左上角横坐标、左上角纵坐标、宽度、高度 masks.append(mask) label = int(self.coco.loadCats(ann['category_id'])[0]['id']) labels.append(label) # convert everything into a torch.Tensor boxes = torch.as_tensor(boxes, dtype=torch.float32) # there is only one class:Tumor, so set the labels to 1 if self.num_classes == 2: labels = torch.ones((num_objs, ), dtype=torch.int64) else: labels = torch.as_tensor(labels, dtype=torch.int64) masks = torch.as_tensor(masks, dtype=torch.uint8) image_id = torch.tensor([tumor_slices_id]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0] ) # 格式和coco不同!! # suppose all instances are not crowd, instances with iscrowd=True will be ignored during evaluation. iscrowd = torch.zeros((num_objs, ), dtype=torch.int64) # create return anno target = {} target["boxes"] = boxes target["labels"] = labels target["masks"] = masks target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd # 这个返回意味着,一个image 对应1个target,但是每个target内部的长度不确定(但是一致) if self.transforms is not None: img, target = self.transforms(img, target) return img, target def __len__(self): # 总长度 return len(self.image_ids)
def coco_json_to_segmentation(seg_mask_output_paths, annotation_paths, seg_mask_image_paths, verbose): for (seg_mask_path, annFile, image_path) in zip(seg_mask_output_paths, annotation_paths, seg_mask_image_paths): print('Loading COCO Annotations File: ', annFile) print('Segmentation Mask Output Folder: ', seg_mask_path) print('Source Image Folder: ', image_path) print('\n' 'WARNING: Each pixel can have multiple classes! That means' 'class data overlaps. Also, single objects can be outlined' 'multiple times because they were labeled by different people!' 'In other words, even a single object may be segmented twice.' 'This means the .png files are missing entire objects.\n\n' 'Use of categorical one-hot encoded .npy files is recommended,' 'but .npy files also have limitations, because the .npy files' 'only have one label per pixel for each class,' 'and currently take the union of multiple human class labels.' 'Improving how your data is handled will improve your results' 'so remember to consider that limitation. There is still' 'an opportunity to improve how this training data is handled &' 'integrated with your training scripts and utilities...') coco = COCO(annFile) print('Converting Annotations to Segmentation Masks...') mkdir_p(seg_mask_path) total_imgs = len(coco.imgToAnns.keys()) progbar = Progbar(total_imgs + len(coco.getImgIds()), verbose=verbose) # 'annotations' was previously 'instances' in an old version for img_num in range(total_imgs): # Both [0]'s are used to extract the element from a list img = coco.loadImgs(coco.imgToAnns[coco.imgToAnns.keys()[img_num]][0]['image_id'])[0] h = img['height'] w = img['width'] name = img['file_name'] root_name = name[:-4] filename = os.path.join(seg_mask_path, root_name + ".png") file_exists = os.path.exists(filename) if file_exists: progbar.update(img_num, [('file_fraction_already_exists', 1)]) continue else: progbar.update(img_num, [('file_fraction_already_exists', 0)]) print(filename) MASK = np.zeros((h, w), dtype=np.uint8) np.where(MASK > 0) for ann in coco.imgToAnns[coco.imgToAnns.keys()[img_num]]: mask = coco.annToMask(ann) idxs = np.where(mask > 0) MASK[idxs] = ann['category_id'] im = Image.fromarray(MASK) im.save(filename) print('\nConverting Annotations to one hot encoded' 'categorical .npy Segmentation Masks...') img_ids = coco.getImgIds() use_original_dims = True # not target_shape for idx, img_id in enumerate(img_ids): img = coco.loadImgs(img_id)[0] name = img['file_name'] root_name = name[:-4] filename = os.path.join(seg_mask_path, root_name + ".npy") file_exists = os.path.exists(filename) if file_exists: progbar.add(1, [('file_fraction_already_exists', 1)]) continue else: progbar.add(1, [('file_fraction_already_exists', 0)]) if use_original_dims: target_shape = (img['height'], img['width'], max(ids()) + 1) ann_ids = coco.getAnnIds(imgIds=img['id'], iscrowd=None) anns = coco.loadAnns(ann_ids) mask_one_hot = np.zeros(target_shape, dtype=np.uint8) mask_one_hot[:, :, 0] = 1 # every pixel begins as background # mask_one_hot = cv2.resize(mask_one_hot, # target_shape[:2], # interpolation=cv2.INTER_NEAREST) for ann in anns: mask_partial = coco.annToMask(ann) # mask_partial = cv2.resize(mask_partial, # (target_shape[1], target_shape[0]), # interpolation=cv2.INTER_NEAREST) # # width and height match # assert mask_one_hot.shape[:2] == mask_partial.shape[:2] # print('another shape:', # mask_one_hot[mask_partial > 0].shape) mask_one_hot[mask_partial > 0, ann['category_id']] = 1 mask_one_hot[mask_partial > 0, 0] = 0 np.save(filename, mask_one_hot)
class CocoDataset(CustomDataset): CLASSES = ( "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic_light", "fire_hydrant", "stop_sign", "parking_meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports_ball", "kite", "baseball_bat", "baseball_glove", "skateboard", "surfboard", "tennis_racket", "bottle", "wine_glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot_dog", "pizza", "donut", "cake", "chair", "couch", "potted_plant", "bed", "dining_table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell_phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy_bear", "hair_drier", "toothbrush", ) def load_annotations(self, ann_file): self.coco = COCO(ann_file) self.cat_ids = self.coco.getCatIds() self.cat2label = { cat_id: i + 1 for i, cat_id in enumerate(self.cat_ids) } self.img_ids = self.coco.getImgIds() img_infos = [] for i in self.img_ids: info = self.coco.loadImgs([i])[0] info["filename"] = info["file_name"] img_infos.append(info) return img_infos def get_ann_info(self, idx): img_id = self.img_infos[idx]["id"] ann_ids = self.coco.getAnnIds(imgIds=[img_id]) ann_info = self.coco.loadAnns(ann_ids) return self._parse_ann_info(ann_info, self.with_mask) def _filter_imgs(self, min_size=32): """Filter images too small or without ground truths.""" valid_inds = [] ids_with_ann = set(_["image_id"] for _ in self.coco.anns.values()) for i, img_info in enumerate(self.img_infos): if self.img_ids[i] not in ids_with_ann: continue if min(img_info["width"], img_info["height"]) >= min_size: valid_inds.append(i) return valid_inds def _parse_ann_info(self, ann_info, with_mask=True): """Parse bbox and mask annotation. Args: ann_info (list[dict]): Annotation info of an image. with_mask (bool): Whether to parse mask annotations. Returns: dict: A dict containing the following keys: bboxes, bboxes_ignore, labels, masks, mask_polys, poly_lens. """ gt_bboxes = [] gt_labels = [] gt_bboxes_ignore = [] # Two formats are provided. # 1. mask: a binary map of the same size of the image. # 2. polys: each mask consists of one or several polys, each poly is a # list of float. if with_mask: gt_masks = [] gt_mask_polys = [] gt_poly_lens = [] for i, ann in enumerate(ann_info): if ann.get("ignore", False): continue x1, y1, w, h = ann["bbox"] if ann["area"] <= 0 or w < 1 or h < 1: continue bbox = [x1, y1, x1 + w - 1, y1 + h - 1] if ann["iscrowd"]: gt_bboxes_ignore.append(bbox) else: gt_bboxes.append(bbox) gt_labels.append(self.cat2label[ann["category_id"]]) if with_mask: gt_masks.append(self.coco.annToMask(ann)) mask_polys = [ p for p in ann["segmentation"] if len(p) >= 6 ] # valid polygons have >= 3 points (6 coordinates) poly_lens = [len(p) for p in mask_polys] gt_mask_polys.append(mask_polys) gt_poly_lens.extend(poly_lens) if gt_bboxes: gt_bboxes = np.array(gt_bboxes, dtype=np.float32) gt_labels = np.array(gt_labels, dtype=np.int64) else: gt_bboxes = np.zeros((0, 4), dtype=np.float32) gt_labels = np.array([], dtype=np.int64) if gt_bboxes_ignore: gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) else: gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) ann = dict(bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) if with_mask: ann["masks"] = gt_masks # poly format is not used in the current implementation ann["mask_polys"] = gt_mask_polys ann["poly_lens"] = gt_poly_lens return ann
class TinyDataset(object): def __init__(self, root, transforms, mode): self.root = root self.transforms = transforms self.mode = mode if mode == "train": self.annojson = os.path.join(root, "pascal_train.json") self.annococo = COCO(self.annojson) elif mode == "test": self.annojson = os.path.join(root, "test.json") self.annococo = COCO(self.annojson) def __getitem__(self, idx): imgid = list(self.annococo.imgs.keys())[idx] img_info = self.annococo.loadImgs(ids=imgid) if self.mode == "train": # load images img_path = os.path.join(self.root, "train", img_info[0]['file_name']) img = Image.open(img_path).convert("RGB") # get mask annids = self.annococo.getAnnIds(imgIds=imgid) anns = self.annococo.loadAnns(annids) num_objs = len(annids) boxes = [] labels = [] iscrowd = [] masks = [] for i in range(len(annids)): bbox = anns[i]['bbox'] xmin = bbox[0] ymin = bbox[1] width = bbox[2] height = bbox[3] xmax = xmin + width ymax = ymin + height boxes.append([xmin, ymin, xmax, ymax]) labels.append(anns[i]['category_id']) iscrowd.append(anns[i]['iscrowd']) masks.append(self.annococo.annToMask(anns[i])) # convert everything into a torch.Tensor boxes = torch.as_tensor(boxes, dtype=torch.float32) masks = torch.as_tensor(masks, dtype=torch.uint8) iscrowd = torch.as_tensor(iscrowd, dtype=torch.uint8) image_id = torch.tensor([imgid]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) target = {} target["boxes"] = boxes target["labels"] = torch.as_tensor(labels, dtype=torch.int64) target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd target["masks"] = masks if self.transforms is not None: img, target = self.transforms(img, target) else: img_path = os.path.join(self.root, "test", img_info[0]['file_name']) img = Image.open(img_path).convert("RGB") target = imgid if self.transforms is not None: img = self.transforms(img) return img, target def __len__(self): return len(self.annococo.imgs.keys())
total_count = 0 all_testing = [] for i in range(np.shape(img_data)[0]): print(i) img_line = img_data[i] image_id = int(img_line[0]) ann_ids = int(img_line[6]) annotations = coco.loadAnns(ann_ids)[0] gt_vector = coco.getCatIds().index(int(img_line[5])) object_mask = coco.annToMask(annotations) try: obj_im = fit_in_square( apply_mask(coco, raw_image=load_image(image_id), mask=object_mask, keep='object', crop=0.25), object_dim) except: raise ValueError('wrong') continue object_input = prepare_input(obj_im) # all_testing.append(prepare_input(apply_mask(coco, raw_image = load_image(image_id), mask = object_mask, keep = 'object', crop = 0)))
img_path = os.path.join(val_images_dir, "%012d.jpg" % img_id) mask_miss_path = os.path.join(val_masks_dir, "mask_miss_%012d.png" % img_id) mask_all_path = os.path.join(val_masks_dir, "mask_all_%012d.png" % img_id) img = cv2.imread(img_path) h, w, c = img.shape mask_all = np.zeros((h, w), dtype=np.uint8) mask_miss = np.zeros((h, w), dtype=np.uint8) flag = 0 for p in img_anns: seg = p["segmentation"] if p["iscrowd"] == 1: mask_crowd = coco.annToMask(p) temp = np.bitwise_and(mask_all, mask_crowd) mask_crowd = mask_crowd - temp flag += 1 continue else: mask = coco.annToMask(p) mask_all = np.bitwise_or(mask, mask_all) if p["num_keypoints"] <= 0: mask_miss = np.bitwise_or(mask, mask_miss) if flag<1: mask_miss = np.logical_not(mask_miss) elif flag == 1:
class MSCOCOSeq(BaseDataset): """ The COCO dataset. COCO is an image dataset. Thus, we treat each image as a sequence of length 1. Publication: Microsoft COCO: Common Objects in Context. Tsung-Yi Lin, Michael Maire, Serge J. Belongie, Lubomir D. Bourdev, Ross B. Girshick, James Hays, Pietro Perona, Deva Ramanan, Piotr Dollar and C. Lawrence Zitnick ECCV, 2014 https://arxiv.org/pdf/1405.0312.pdf Download the images along with annotations from http://cocodataset.org/#download. The root folder should be organized as follows. - coco_root - annotations - instances_train2014.json - images - train2014 Note: You also have to install the coco pythonAPI from https://github.com/cocodataset/cocoapi. """ def __init__(self, root=None, image_loader=jpeg4py_loader): root = env_settings().coco_dir if root is None else root super().__init__(root, image_loader) self.img_pth = os.path.join(root, 'train2014/') self.anno_path = os.path.join(root, 'annotations/instances_train2014.json') # Load the COCO set. self.coco_set = COCO(self.anno_path) self.cats = self.coco_set.cats self.class_list = self.get_class_list( ) # the parent class thing would happen in the sampler self.sequence_list = self._get_sequence_list() self.seq_per_class = self._build_seq_per_class() def _get_sequence_list(self): ann_list = list(self.coco_set.anns.keys()) seq_list = [ a for a in ann_list if self.coco_set.anns[a]['iscrowd'] == 0 ] return seq_list def is_video_sequence(self): return False def get_num_classes(self): return len(self.class_list) def get_name(self): return 'coco' def has_class_info(self): return True def get_class_list(self): class_list = [] for cat_id in self.cats.keys(): class_list.append(self.cats[cat_id]['name']) return class_list def has_segmentation_info(self): return True def get_num_sequences(self): return len(self.sequence_list) def _build_seq_per_class(self): seq_per_class = {} for i, seq in enumerate(self.sequence_list): class_name = self.cats[self.coco_set.anns[seq] ['category_id']]['name'] if class_name not in seq_per_class: seq_per_class[class_name] = [i] else: seq_per_class[class_name].append(i) return seq_per_class def get_sequences_in_class(self, class_name): return self.seq_per_class[class_name] def get_sequence_info(self, seq_id): anno = self._get_anno(seq_id) bbox = torch.Tensor(anno['bbox']).view(1, 4) mask = self.coco_set.annToMask(anno) mask = np.array(mask) mask = mask.reshape(1, mask.shape[0], mask.shape[1], 1) valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0) visible = valid.clone() return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible} def _get_anno(self, seq_id): anno = self.coco_set.anns[self.sequence_list[seq_id]] return anno def _get_frames(self, seq_id): path = self.coco_set.loadImgs([ self.coco_set.anns[self.sequence_list[seq_id]]['image_id'] ])[0]['file_name'] img = self.image_loader(os.path.join(self.img_pth, path)) return img def get_meta_info(self, seq_id): try: cat_dict_current = self.cats[self.coco_set.anns[ self.sequence_list[seq_id]]['category_id']] object_meta = OrderedDict({ 'object_class': cat_dict_current['name'], 'motion_class': None, 'major_class': cat_dict_current['supercategory'], 'root_class': None, 'motion_adverb': None }) except: object_meta = OrderedDict({ 'object_class': None, 'motion_class': None, 'major_class': None, 'root_class': None, 'motion_adverb': None }) return object_meta def get_frames(self, seq_id=None, frame_ids=None, anno=None): # COCO is an image dataset. Thus we replicate the image denoted by seq_id len(frame_ids) times, and return a # list containing these replicated images. frame = self._get_frames(seq_id) frame_list = [frame.copy() for _ in frame_ids] if anno is None: anno = self.get_sequence_info(seq_id) anno_frames = {} for key, value in anno.items(): anno_frames[key] = [value[0, ...] for _ in frame_ids] object_meta = self.get_meta_info(seq_id) return frame_list, anno_frames, object_meta
class CocoDataset(BaseDataset): def get_data_info(self, ann_path): """ Load basic information of dataset such as image path, label and so on. :param ann_path: coco json file path :return: image info: [{'license': 2, 'file_name': '000000000139.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000000139.jpg', 'height': 426, 'width': 640, 'date_captured': '2013-11-21 01:34:01', 'flickr_url': 'http://farm9.staticflickr.com/8035/8024364858_9c41dc1666_z.jpg', 'id': 139}, ... ] """ self.coco_api = COCO(ann_path) self.cat_ids = sorted(self.coco_api.getCatIds()) self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)} self.cats = self.coco_api.loadCats(self.cat_ids) self.img_ids = sorted(self.coco_api.imgs.keys()) img_info = self.coco_api.loadImgs(self.img_ids) return img_info def get_img_annotation(self, idx): """ load per image annotation :param idx: index in dataloader :return: annotation dict """ img_id = self.img_ids[idx] ann_ids = self.coco_api.getAnnIds([img_id]) anns = self.coco_api.loadAnns(ann_ids) gt_bboxes = [] gt_labels = [] gt_bboxes_ignore = [] if self.use_instance_mask: gt_masks = [] if self.use_keypoint: gt_keypoints = [] for ann in anns: if ann.get('ignore', False): continue x1, y1, w, h = ann['bbox'] if ann['area'] <= 0 or w < 1 or h < 1: continue if ann['category_id'] not in self.cat_ids: continue bbox = [x1, y1, x1 + w, y1 + h] if ann['iscrowd']: gt_bboxes_ignore.append(bbox) else: gt_bboxes.append(bbox) gt_labels.append(self.cat2label[ann['category_id']]) if self.use_instance_mask: gt_masks.append(self.coco_api.annToMask(ann)) if self.use_keypoint: gt_keypoints.append(ann['keypoints']) if gt_bboxes: gt_bboxes = np.array(gt_bboxes, dtype=np.float32) gt_labels = np.array(gt_labels, dtype=np.int64) else: gt_bboxes = np.zeros((0, 4), dtype=np.float32) gt_labels = np.array([], dtype=np.int64) if gt_bboxes_ignore: gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) else: gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) annotation = dict(bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) if self.use_instance_mask: annotation['masks'] = gt_masks if self.use_keypoint: if gt_keypoints: annotation['keypoints'] = np.array(gt_keypoints, dtype=np.float32) else: annotation['keypoints'] = np.zeros((0, 51), dtype=np.float32) return annotation def get_train_data(self, idx): """ Load image and annotation :param idx: :return: meta-data (a dict containing image, annotation and other information) """ img_info = self.data_info[idx] file_name = img_info['file_name'] image_path = os.path.join(self.img_path, file_name) img = cv2.imread(image_path) ann = self.get_img_annotation(idx) meta = dict(img=img, img_info=img_info, gt_bboxes=ann['bboxes'], gt_labels=ann['labels']) if self.use_instance_mask: meta['gt_masks'] = ann['masks'] if self.use_keypoint: meta['gt_keypoints'] = ann['keypoints'] meta = self.pipeline(meta, self.input_size) meta['img'] = torch.from_numpy(meta['img'].transpose(2, 0, 1)) return meta def get_val_data(self, idx): """ Currently no difference from get_train_data. Not support TTA(testing time augmentation) yet. :param idx: :return: """ # TODO: support TTA return self.get_train_data(idx)
class Obj365Dataset(CustomDataset): CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush') def load_annotations(self, ann_file): self.coco = COCO(ann_file) self.cat_ids = _cat_ids #self.coco.getCatIds() self.cat2label = { cat_id: i + 1 for i, cat_id in enumerate(self.cat_ids) } print("######obj365 $$$$$$$$%%%%%%%% id len is ", len(self.cat_ids)) self.img_ids = self.coco.getImgIds()[:] img_infos = [] for i in self.img_ids: info = self.coco.loadImgs([i])[0] info['filename'] = info['file_name'] img_infos.append(info) return img_infos def get_ann_info(self, idx): img_id = self.img_infos[idx]['id'] #print("obj365 img id is ",img_id) #img_id_2924 = self.img_infos[2924]['id'] ann_ids = self.coco.getAnnIds(imgIds=[img_id]) #ann_ids_2924 = self.coco.getAnnIds(imgIds=[img_id_2924]) ann_info = self.coco.loadAnns(ann_ids) #ann_info_2924 = self.coco.loadAnns(ann_ids_2924) #print("this anno info ", ann_info) #print("the 2924 anno info ", ann_info_2924) #print("this anno info for ", idx) return self._parse_ann_info(ann_info, self.with_mask) def _filter_imgs(self, min_size=32): """Filter images too small or without ground truths.""" valid_inds = [] ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values()) for i, img_info in enumerate(self.img_infos): if self.img_ids[i] not in ids_with_ann: continue if min(img_info['width'], img_info['height']) >= min_size: valid_inds.append(i) return valid_inds def _parse_ann_info(self, ann_info, with_mask=True): """Parse bbox and mask annotation. Args: ann_info (list[dict]): Annotation info of an image. with_mask (bool): Whether to parse mask annotations. Returns: dict: A dict containing the following keys: bboxes, bboxes_ignore, labels, masks, mask_polys, poly_lens. """ gt_bboxes = [] gt_labels = [] gt_bboxes_ignore = [] # Two formats are provided. # 1. mask: a binary map of the same size of the image. # 2. polys: each mask consists of one or several polys, each poly is a # list of float. if with_mask: gt_masks = [] gt_mask_polys = [] gt_poly_lens = [] for i, ann in enumerate(ann_info): if ann.get('ignore', False): continue x1, y1, w, h = ann['bbox'] if ann['area'] <= 0 or w < 1 or h < 1: continue bbox = [x1, y1, x1 + w - 1, y1 + h - 1] if ann['iscrowd']: gt_bboxes_ignore.append(bbox) else: gt_bboxes.append(bbox) #gt_labels.append(self.cat2label[ann['category_id']]) gt_labels.append(ann['category_id']) if with_mask and not ann['iscrowd']: gt_masks.append(self.coco.annToMask(ann)) mask_polys = [ p for p in ann['segmentation'] if len(p) >= 6 ] # valid polygons have >= 3 points (6 coordinates) poly_lens = [len(p) for p in mask_polys] gt_mask_polys.append(mask_polys) gt_poly_lens.extend(poly_lens) if gt_bboxes: gt_bboxes = np.array(gt_bboxes, dtype=np.float32) gt_labels = np.array(gt_labels, dtype=np.int64) else: gt_bboxes = np.zeros((0, 4), dtype=np.float32) gt_labels = np.array([], dtype=np.int64) if gt_bboxes_ignore: gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) else: gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) ann = dict(bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) if with_mask: ann['masks'] = gt_masks # poly format is not used in the current implementation ann['mask_polys'] = gt_mask_polys ann['poly_lens'] = gt_poly_lens return ann
for cat in cats[args.begin:args.end]: print("Creating data for category: " + cat['name']) for num in range(2000): try: catIds = coco.getCatIds(catNms=[cat['name']]); imgIds = coco.getImgIds(catIds=catIds ); img = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0] #I = io.imread('http://mscoco.org/images/%d'%(img['id'])) #I = io.imread(img['coco_url']) I=io.imread(os.path.join(dataDir,dataType,'COCO_train2014_{:012d}.jpg'.format(img['id']))) #plt.imshow(I); plt.axis('off') annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None) anns = coco.loadAnns(annIds) #coco.showAnns(anns) bbx=anns[0]['bbox'] mask=np.array(coco.annToMask(anns[0])) #print(np.shape(mask)) #print(np.shape(I)) #pdb.set_trace() I1=I #row,col=np.where(mask>0) #print(row) #print(col) #I1=I[row,col,0] #print(np.shape(I1)) #print("I1 shape is: ") #print(I1.shape) I1[:,:,0]=np.array(I[:,:,0] * mask ) I1[:,:,1]=np.array(I[:,:,1] * mask ) I1[:,:,2]=np.array(I[:,:,2] * mask )
# 显示图片 plt.axis("off") plt.imshow(I) plt.show() # 实例分割标签 plt.axis("off") plt.imshow(I) ann_ids = coco.getAnnIds(imgIds=img["id"], catIds=cat_ids, iscrowd=None) anns = coco.loadAnns(ann_ids) coco.showAnns(anns) plt.show() # 单个对象的掩码 mask = coco.annToMask(anns[0]) plt.axis("off") plt.imshow(mask) plt.show() # 多个对象的掩码 mask_all = coco.annToMask(anns[0]) for ann_one in anns[1:]: mask_all += coco.annToMask(ann_one) plt.axis("off") plt.imshow(mask_all) plt.show() # 骨骼 ann_file = os.path.join(data_dir, ann_dir, "person_keypoints_{}.json".format(data_type))
class COCODataset(torch.utils.data.Dataset): """ COCODataset """ def __init__(self, img_dir, json_path, aff_r): """__init__ Args: img_dir: img 路径 json_path: COCO annotation路径 """ self.img_dir = img_dir self.jsp = json_path self.img_size = 512 self.coco = COCO(json_path) self.img_file = [] for json_img in self.coco.dataset["images"]: self.img_file.append(json_img["file_name"]) self.aff_r = aff_r #affinity self.aff_resolution = 5 mean = [0.477, 0.451, 0.411] std = [0.284, 0.280, 0.292] self.transform = transforms.ToTensor() self.transform_img = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) f = open('./data/t_color.txt', "rb") self.t_color = pickle.load(f) f = open('./data/t_class_name.txt', "rb") self.t_color_name = pickle.load(f) self.labels = self.t_color def __len__(self): return len(self.img_file) def __getitem__(self, idx): """__getitem__ Args: idx (int): 遍历 Returns: Tensor: img (n_batch, ch, height, width) Tensor: sem_seg (n_batch, class数, height, width) Tensor: aff_map (n_batch, aff_r, aff_r**2, height, width) """ #read img_file img_name = self.img_file[idx] img = np.array(Image.open(self.img_dir + img_name)) width, height = img.shape[0], img.shape[1] if len(img.shape) < 3: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) img_seg = np.zeros((width, height, 3), dtype=int) img_ins = np.zeros((width, height, 3), dtype=int) #read ins id_ = int(img_name.split("/")[-1][-10:-4]) pre_color = [] for i in range(len(self.t_color_name) - 1): catIds = self.coco.getCatIds(catNms=[self.t_color_name[i]]) annIds = self.coco.getAnnIds(imgIds=id_, catIds=catIds, iscrowd=False) if not annIds: continue anns = self.coco.loadAnns(annIds) for ann in anns: while (True): color = np.random.randint(1, 255, 3) if not [ j for j in range(len(pre_color)) if np.sum(pre_color[j] == color) == 3 ]: pre_color.append(color) break mask = self.coco.annToMask(ann) mask = np.array(mask, dtype=int) mask_seg = mask[..., None] * self.t_color[i] mask_ins = mask[..., None] * color img_seg = np.where(img_seg == 0, mask_seg, img_seg) img_ins = np.where(img_ins == 0, mask_ins, img_ins) img = Image.fromarray(img) img_seg = Image.fromarray(np.uint8(img_seg)) img_ins = Image.fromarray(np.uint8(img_ins)) #resize w, h = self.get_size((width, height)) img = img.resize((w, h)) img_seg = img_seg.resize((w, h)) img_ins = img_ins.resize((w, h)) #crop crop_size = self.img_size x = np.random.randint((w - crop_size) + 1) y = np.random.randint((h - crop_size) + 1) img = img.crop((x, y, x + crop_size, y + crop_size)) img_seg = img_seg.crop((x, y, x + crop_size, y + crop_size)) img_ins = img_ins.crop((x, y, x + crop_size, y + crop_size)) #获得了img,seg,ins, 继续获取aff_gt sem_seg = np.array(img_seg) img_ins = np.array(img_ins) img_t_cls = np.zeros((img.size[0], img.size[1], len(self.labels))) # semantic标签 for i in range(len(self.labels)): img_t_cls[:, :, i] = np.where( (sem_seg[:, :, 0] == self.labels[i][0]) & (sem_seg[:, :, 1] == self.labels[i][1]) & (sem_seg[:, :, 2] == self.labels[i][2]), 1, 0) out_data = torch.zeros((3, self.img_size, self.img_size)) out_t = torch.zeros((len(self.labels), self.img_size, self.img_size)) aff_map = self.Affinity_generator_new(img_ins) # convert to torch tensor img = self.transform_img(img) sem_seg = self.transform(img_t_cls) # aff_map = self.transform(aff_map) return img, sem_seg, aff_map def get_size(self, img_wh): width = img_wh[0] height = img_wh[1] if width < height: w = self.img_size h = int(self.img_size * height / width) else: h = self.img_size w = int(self.img_size * width / height) return w, h def Affinity_generator(self, img_ins): """ SSAP resolution 1/2, 1/4, 1/16, 1/32, 1/64 """ # img_ins = Image.fromarray(img_ins) # 初始化一个aff_r * aff_r^2 * size * size aff_map = torch.zeros( (self.aff_r, self.aff_r**2, self.img_size, self.img_size)) ins_width, ins_height = img_ins.shape[0], img_ins.shape[1] for mul in range(self.aff_resolution): #resize大小后的ins, resize后的图片大小 # ins_downsampe = cv2.resize(ins,cv2.INTER_NEAREST) img_t_aff_mul = img_ins[0:self.img_size:2**mul, 0:self.img_size:2**mul] img_size = self.img_size // (2**mul) # 上下左右放大2个pixel img_t_aff_mul_2_pix = np.zeros( (img_size + (self.aff_r // 2) * 2, img_size + (self.aff_r // 2) * 2, 3)) img_t_aff_mul_2_pix[self.aff_r//2:img_size+self.aff_r//2, self.aff_r//2:img_size+self.aff_r//2] \ = img_t_aff_mul img_t_aff_compare = np.zeros( (self.aff_r**2, img_size, img_size, 3)) # 对25个affinity map进行错位填充ins for i in range(self.aff_r): for j in range(self.aff_r): img_t_aff_compare[i * self.aff_r + j] = img_t_aff_mul_2_pix[i:i + img_size, j:j + img_size] # 相同物体affinity=1 不同affinity=0 aff_data = np.where( (img_t_aff_compare[:, :, :, 0] == img_t_aff_mul[:, :, 0]) & (img_t_aff_compare[:, :, :, 1] == img_t_aff_mul[:, :, 1]) & (img_t_aff_compare[:, :, :, 2] == img_t_aff_mul[:, :, 2]), 1, 0) aff_data = self.transform(aff_data.transpose(1, 2, 0)) aff_map[mul, :, 0:img_size, 0:img_size] = aff_data return aff_map def Affinity_generator_new(self, img_ins): """ SSAP resolution 1/2, 1/4, 1/16, 1/32, 1/64 """ # img_ins = Image.fromarray(img_ins) # 初始化一个aff_r * aff_r^2 * size * size aff_map = np.zeros( (self.aff_r, self.aff_r**2, self.img_size, self.img_size)) ins_width, ins_height = img_ins.shape[0], img_ins.shape[1] for mul in range(self.aff_resolution): #resize大小后的ins, resize后的图片大小,instance最近邻插值 img_size = self.img_size // (2**mul) ins_downsampe = cv2.resize(img_ins, (img_size, img_size), cv2.INTER_NEAREST) # tree-ins_downsampe #按affinity kernel半径padding ins_pad = cv2.copyMakeBorder(ins_downsampe, int(self.aff_r), int(self.aff_r), int(self.aff_r), int(self.aff_r), cv2.BORDER_CONSTANT, value=(0, 0, 0)) aff_compare = np.zeros((self.aff_r**2, img_size, img_size, 3)) # 对25个affinity kernel上进行错位填充ins for i in range(self.aff_r): for j in range(self.aff_r): aff_compare[i * self.aff_r + j] = ins_pad[i:i + img_size, j:j + img_size] # 相同物体affinity=1 不同affinity=0 aff_data = np.where( (aff_compare[:, :, :, 0] == ins_downsampe[:, :, 0]) & (aff_compare[:, :, :, 1] == ins_downsampe[:, :, 1]) & (aff_compare[:, :, :, 2] == ins_downsampe[:, :, 2]), 1, 0) # aff_data = self.transform(aff_data.transpose(1, 2, 0)) aff_map[mul, :, 0:img_size, 0:img_size] = aff_data return aff_map
for i, img_id in enumerate(tqdm(img_ids)): filepath = os.path.join(IMG_DIR, coco.imgs[img_id]['file_name']) img = cv2.imread(filepath, cv2.CV_LOAD_IMAGE_COLOR) h, w, c = img.shape crowd_mask = np.zeros((h, w), dtype='bool') unannotated_mask = np.zeros((h, w), dtype='bool') instance_masks = [] keypoints = [] img_anns = coco.loadAnns(coco.getAnnIds(imgIds=img_id)) if len(img_anns) == 0: continue for anno in img_anns: mask = coco.annToMask(anno) # if crowd, don't compute loss if anno['iscrowd'] == 1: crowd_mask = np.logical_or(crowd_mask, mask) # if tiny instance, don't compute loss elif anno['num_keypoints'] == 0: unannotated_mask = np.logical_or(unannotated_mask, mask) instance_masks.append(mask) keypoints.append(anno['keypoints']) else: instance_masks.append(mask) keypoints.append(anno['keypoints']) # Construct encoding:
def processing(ann_path, filelist_path, masklist_path, json_path, mask_dir): coco = COCO(ann_path) ids = list(coco.imgs.keys()) lists = [] filelist_fp = open(filelist_path, 'w') masklist_fp = open(masklist_path, 'w') for i, img_id in enumerate(ids): ann_ids = coco.getAnnIds(imgIds=img_id) img_anns = coco.loadAnns(ann_ids) numPeople = len(img_anns) name = coco.imgs[img_id]['file_name'] height = coco.imgs[img_id]['height'] width = coco.imgs[img_id]['width'] person_centers = [] info = dict() info['filename'] = name info['info'] = [] for p in range(numPeople): if img_anns[p]['num_keypoints'] < 5 or img_anns[p]['area'] < 32 * 32: continue kpt = img_anns[p]['keypoints'] dic = dict() # person center person_center = [img_anns[p]['bbox'][0] + img_anns[p]['bbox'][2] / 2.0, img_anns[p]['bbox'][1] + img_anns[p]['bbox'][3] / 2.0] scale = img_anns[p]['bbox'][3] / float(cfg.INPUT_SIZE) # skip this person if the distance to exiting person is too small flag = 0 for pc in person_centers: dis = math.sqrt((person_center[0] - pc[0]) * (person_center[0] - pc[0]) + (person_center[1] - pc[1]) * (person_center[1] - pc[1])) if dis < pc[2] * 0.3: flag = 1; break if flag == 1: continue dic['pos'] = person_center dic['keypoints'] = np.zeros((18, 3)).tolist() dic['scale'] = scale for part in range(17): dic['keypoints'][COCO_TO_OURS[part]][0] = kpt[part * 3] dic['keypoints'][COCO_TO_OURS[part]][1] = kpt[part * 3 + 1] # visiable is 2, unvisiable is 1 and not labeled is 0 dic['keypoints'][COCO_TO_OURS[part]][2] = kpt[part * 3 + 2] # generate neck point based on LShoulder and RShoulder dic['keypoints'][1][0] = (kpt[5 * 3] + kpt[6 * 3]) * 0.5 dic['keypoints'][1][1] = (kpt[5 * 3 + 1] + kpt[6 * 3 + 1]) * 0.5 if kpt[5 * 3 + 2] == 0 or kpt[6 * 3 + 2] == 0: dic['keypoints'][1][2] = 0 else: dic['keypoints'][1][2] = 1 info['info'].append(dic) person_centers.append(np.append(person_center, max(img_anns[p]['bbox'][2], img_anns[p]['bbox'][3]))) if len(info['info']) > 0: lists.append(info) filelist_fp.write(name + '\n') mask_all = np.zeros((height, width), dtype=np.uint8) mask_miss = np.zeros((height, width), dtype=np.uint8) flag = 0 for p in img_anns: if p['iscrowd'] == 1: mask_crowd = coco.annToMask(p) temp = np.bitwise_and(mask_all, mask_crowd) mask_crowd = mask_crowd - temp flag += 1 continue else: mask = coco.annToMask(p) mask_all = np.bitwise_or(mask, mask_all) if p['num_keypoints'] <= 0: mask_miss = np.bitwise_or(mask, mask_miss) if flag < 1: mask_miss = np.logical_not(mask_miss) elif flag == 1: mask_miss = np.logical_not(np.bitwise_or(mask_miss, mask_crowd)) mask_all = np.bitwise_or(mask_all, mask_crowd) else: raise Exception('crowd segments > 1') pickle.dump(mask_miss, open(os.path.join(mask_dir, name.split('.')[0] + '.npy'), 'w')) masklist_fp.write(os.path.join(mask_dir, name.split('.')[0] + '.npy') + '\n') if i % 1000 == 0: print "Processed {} of {}".format(i, len(ids)) masklist_fp.close() filelist_fp.close() fp = open(json_path, 'w') fp.write(json.dumps(lists)) fp.close() print 'done!'
class CocoDataset(Dataset): def __init__(self, anns_file, images_dir, transform=None): self._anns_file = anns_file self._images_dir = images_dir self._transform = transform self._coco = COCO(anns_file) self._img_ids = self._coco.getImgIds() def __len__(self): """ Get the number of samples in the dataset. :return: the number of samples in the dataset. """ return len(self._img_ids) def __getitem__(self, item): """ Get the sample that correspond to the given item. :param item: index of sample to return :return: sample of type dictionary with keys: 'image' and 'labels' """ img_id = self._img_ids[item] coco_img = self._coco.imgs[img_id] coco_anns = self._coco.loadAnns(self._coco.getAnnIds(imgIds=img_id)) image_path = os.path.join(self._images_dir, coco_img['file_name']) image = self.load_image(image_path) labels = self._anns_to_tensor(coco_img, coco_anns) sample = {'image': image, 'labels': labels} if self._transform: sample = self._transform(sample) return sample @staticmethod def load_image(file_path): """ Load image from file to Tensor of type uint8. :param file_path: path to image file :return: Tensor that contains the image. """ original_img = Image.open(file_path) original_np_img = np.array(original_img) # Handle 1D images if len(original_np_img.shape) == 2: tmp = original_np_img original_np_img = np.zeros( [original_np_img.shape[0], original_np_img.shape[1], 3]) original_np_img[:, :, 0] = tmp # Handle RGBA images if original_np_img.shape[2] == 4: original_np_img = original_np_img[:, :, 0:3] return torch.tensor(original_np_img, dtype=torch.float) def _anns_to_tensor(self, img, anns): """ Convert coco annotations to numpy array that represent labels. :param img: Image dictionary in coco format. :param anns: Annotations of the given image. :return: Tensor. """ image_size = (img['height'], img['width']) labels = np.zeros(image_size) for i in range(len(anns)): ann = anns[i] label_mask = self._coco.annToMask(ann) == 1 new_label = i + 1 labels[label_mask] = new_label return torch.tensor(labels.astype('uint8'), dtype=torch.uint8)
class Trainer: def __init__(self, args): #save args self.args = args #init coco utils self.coco_train = COCO("../annotations/instances_train2014.json") self.coco_val = COCO("../annotations/instances_val2014.json") #init tensorflow session tf.reset_default_graph() config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) #init model self.input_img = tf.placeholder(tf.float32, shape=(None, None, None, 3)) self.label = tf.placeholder(tf.float32, shape=(None, None, None, args.nb_classes)) self.model = Unet(input_img=self.input_img, nb_classes=args.nb_classes) #define loss : Cross Entropy and Dice with tf.variable_scope('optimization'): with tf.variable_scope('loss'): if args.loss == 'crossentropy': """logits = tf.reshape(self.model.output_log, [-1, args.nb_classes]) labels = tf.reshape(self.label, [-1, args.nb_classes])""" self.loss = -tf.reduce_mean( tf.multiply(self.label, tf.log( self.model.output_proba))) elif args.loss == "dice": labels = self.label proba = self.model.output_proba intersection = tf.reduce_sum(proba * labels) union = tf.reduce_sum(proba + labels) self.loss = -intersection / union #Optimizer self.optimizer = tf.train.MomentumOptimizer( learning_rate=args.learning_rate, momentum=0.99) self.train_op = self.optimizer.minimize(self.loss) #summary file for tensorboard self.tf_train_loss = tf.Variable(0.0, trainable=False, name='Train_Loss') self.tf_train_loss_summary = tf.summary.scalar("Loss", self.tf_train_loss) self.tf_train_accuracy = tf.Variable(0.0, trainable=False, name='Train_Accuracy') self.tf_train_accuracy_summary = tf.summary.scalar( "Train Accuracy", self.tf_train_accuracy) self.tf_train_dice = tf.Variable(0.0, trainable=False, name="Train_Dice_Coef") self.tf_train_dice_summary = tf.summary.scalar("Train Dice Coef", self.tf_train_dice) self.tf_eval_accuracy = tf.Variable(0.0, trainable=False, name='Eval_accuracy') self.tf_eval_accuracy_summary = tf.summary.scalar( 'Evaluation Accuracy', self.tf_eval_accuracy) self.tf_eval_dice = tf.Variable(0.0, trainable=False, name="Eval_Dice_Coef") self.tf_eval_dice_summary = tf.summary.scalar("Evaluation Dice Coef", self.tf_eval_dice) self.writer = tf.summary.FileWriter('./graphs', self.sess.graph) #saver self.saver = tf.train.Saver() self.sess.run(tf.initialize_all_variables()) def save_model(self, filename): with tf.Graph().as_default(): self.saver.save(self.sess, filename) def train(self): with tf.Graph().as_default(): for i_epoch in range(1, self.args.epochs + 1): #init paramters for summary loss_train = [] accuracy_train = [] accuracy_val = [] dice_train = [] dice_val = [] #streaming image #images_train = img_generator('images_train.json') #images_val = img_generator('images_val.json') #checkpoint self.save_model( filename='./checkpoints/checkpoint_epoch-{}.ckpt'.format( i_epoch)) #train catIDs = list(range(1, self.args.nb_classes + 1)) print("Epoch {} \n".format(i_epoch)) print("Train \n") #minibatch minibatch_image = [] minibatch_label = [] count = 0 #Find images with categories imgIds = self.coco_train.getImgIds(catIds=catIDs) catIDs = [x - 1 for x in catIDs] for imgId in tqdm(imgIds): count += 1 #get image image = self.coco_train.loadImgs([imgId]) #create grouth truth map y = np.zeros((512, 512, self.args.nb_classes)) for cat in catIDs: annIds = self.coco_train.getAnnIds( imgIds=image[0]['id'], catIds=[cat + 1]) anns = self.coco_train.loadAnns(annIds) if len(anns) > 0: for ann in anns: mask = self.coco_train.annToMask(ann) mask = resize(mask, (512, 512), interpolation=cv2.INTER_NEAREST) y[:, :, cat] = np.logical_or(y[:, :, cat], mask).astype(np.float32) #import image img = io.imread("../train2014/{}".format( image[0]["file_name"])) img = resize(img, (512, 512)) if img.shape == (512, 512): img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) minibatch_image.append(img) minibatch_label.append(y) if len(minibatch_image ) == self.args.batch_size or count == len(imgIds): # get loss training loss_train.append( self.sess.run(self.loss, feed_dict={ self.input_img: np.asarray(minibatch_image), self.label: np.asarray(minibatch_label) })) #feed forward + back propagation self.sess.run(self.train_op, feed_dict={ self.input_img: np.asarray(minibatch_image), self.label: np.asarray(minibatch_label) }) #get accuracy training softmax = self.sess.run(self.model.output_proba, feed_dict={ self.input_img: np.asarray(minibatch_image) }) nb_total_bit = 512 * 512 * self.args.nb_classes for i_batch in range(softmax.shape[0]): predicted_mask = probaToBinaryMask( softmax[i_batch]) nb_TP_bit = np.sum( np.logical_and(predicted_mask, minibatch_label[i_batch])) accuracy_train.append(nb_TP_bit / nb_total_bit) #get dice coef training intersection = nb_TP_bit union = np.sum(predicted_mask) + np.sum( minibatch_label[i_batch]) dice_train.append(2 * intersection / union) #reset minibatch minibatch_label.clear() minibatch_image.clear() #evaluation #Find image with categories catIDs = list(range(1, self.args.nb_classes + 1)) imgIds = self.coco_val.getImgIds(catIds=catIDs) catIDs = [x - 1 for x in catIDs] print("Evaluation \n") for imgId in tqdm(imgIds): #get image image = self.coco_val.loadImgs([imgId]) #create grouth truth map y = np.zeros((512, 512, self.args.nb_classes)) for cat in catIDs: annIds = self.coco_val.getAnnIds(imgIds=image[0]['id'], catIds=[cat]) anns = self.coco_val.loadAnns(annIds) if len(anns) > 0: for ann in anns: mask = self.coco_val.annToMask(ann) mask = resize(mask, (512, 512), interpolation=cv2.INTER_NEAREST) y[:, :, cat] = np.logical_or(y[:, :, cat], mask).astype(np.float32) #import image img = io.imread("../val2014/{}".format( image[0]["file_name"])) img = resize(img, (512, 512)) if img.shape == (512, 512): img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) #predict softmax = self.sess.run(self.model.output_proba, feed_dict={ self.input_img: np.expand_dims(img, axis=0) }) #get accuracy val predicted_mask = probaToBinaryMask(softmax) nb_TP_bit = np.sum(np.logical_and(predicted_mask, y)) np_total_bit = 512 * 512 * self.args.nb_classes accuracy_val.append(append(nb_TP_bit / nb_total_bit)) #get dice val intersection = nb_TP_bit union = np.sum(predicted_mask) + np.sum(y) dice_val.append(2 * intersection / union) #write event for tensorboard summary = self.sess.run(self.tf_train_accuracy_summary, feed_dict={ self.tf_train_accuracy: np.mean(np.asarray(accuracy_train)) }) self.writer.add_summary(summary, i_epoch) summary = self.sess.run(self.tf_train_loss_summary, feed_dict={ self.tf_train_loss: np.mean(np.asarray(loss_train)) }) self.writer.add_summary(summary, i_epoch) summary = self.sess.run(self.tf_train_dice_summary, feed_dict={ self.tf_train_dice: np.mean(np.asarray(dice_train)) }) self.writer.add_summary(summary, i_epoch) summary = self.sess.run(self.tf_eval_accuracy_summary, feed_dict={ self.tf_eval_accuracy: np.mean(np.asarray(accuracy_val)) }) self.writer.add_summary(summary, i_epoch) summary = self.sess.run(self.tf_eval_dice_summary, feed_dict={ self.tf_eval_dice: np.mean(np.asarray(dice_val)) }) self.save_model(filename='./model-{}.ckpt'.format(i_epoch))
# while counter < nb_images: for counter in tqdm(range(0, nb_images), desc='image'): imId = imgIds.pop(np.random.randint(0, len(imgIds))) img = coco.loadImgs(imId)[0] annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None) anns = coco.loadAnns(annIds) counteri = 0 if len(anns) < 3: continue name = str(anns[0]['image_id']) namei = [] maski = [] for i in range(len(anns)): mask = coco.annToMask(anns[i]) if i == 0: insseg = np.round(mask) * anns[i][ 'category_id'] #segDict[anns[i]['category_id']] else: insseg += np.round(mask) * anns[i][ 'category_id'] #segDict[anns[i]['category_id']] ratio = mask.sum() / np.size(mask) if (ratio > 0.4): continue if (ratio < 0.03): continue minim = cv2.resize(mask, bsize, interpolation=cv2.INTER_NEAREST) im = Image.open(os.path.join(dataDir, dataType, img['file_name'])) if im.mode != 'RGB':
class COCODataset(GeneralizedDataset): def __init__(self, data_dir, split, train=False): super().__init__() from pycocotools.coco import COCO self.data_dir = data_dir self.split = split self.train = train if train: ann_file = '/data/zihaosh/data_hw3/pascal_train.json' else: ann_file = '/data/zihaosh/hw3/test.json' print(ann_file) self.coco = COCO(ann_file) self.ids = [str(k) for k in self.coco.imgs] self._classes = {k: v["name"] for k, v in self.coco.cats.items()} self.classes = tuple(self.coco.cats[k]["name"] for k in sorted(self.coco.cats)) # resutls' labels convert to annotation labels self.ann_labels = {self.classes.index(v): k for k, v in self._classes.items()} checked_id_file = os.path.join(data_dir, "checked_{}.txt".format(split)) if train: if not os.path.exists(checked_id_file): self._aspect_ratios = [v["width"] / v["height"] for v in self.coco.imgs.values()] self.check_dataset(checked_id_file) def get_image(self, img_id): img_id = int(img_id) img_info = self.coco.imgs[img_id] image = Image.open(os.path.join(self.data_dir, "{}".format(self.split), img_info["file_name"])) return image.convert("RGB") @staticmethod def convert_to_xyxy(box): # box format: (xmin, ymin, w, h) new_box = torch.zeros_like(box) new_box[:, 0] = box[:, 0] new_box[:, 1] = box[:, 1] new_box[:, 2] = box[:, 0] + box[:, 2] new_box[:, 3] = box[:, 1] + box[:, 3] return new_box # new_box format: (xmin, ymin, xmax, ymax) def get_target(self, img_id): img_id = int(img_id) ann_ids = self.coco.getAnnIds(img_id) anns = self.coco.loadAnns(ann_ids) boxes = [] labels = [] masks = [] if len(anns) > 0: for ann in anns: boxes.append(ann['bbox']) name = self._classes[ann["category_id"]] labels.append(self.classes.index(name)) mask = self.coco.annToMask(ann) mask = torch.tensor(mask, dtype=torch.uint8) masks.append(mask) boxes = torch.tensor(boxes, dtype=torch.float32) boxes = self.convert_to_xyxy(boxes) labels = torch.tensor(labels) masks = torch.stack(masks) target = dict(image_id=torch.tensor([img_id]), boxes=boxes, labels=labels, masks=masks) return target
class COCO2014: def __init__(self, root_path='./COCO/', mode='train'): print('Init COCO2014 Object......') # set paths self.train_image_dir = root_path + 'images/train2014' self.val_image_dir = root_path + 'images/val2014' train_ann_path = root_path + 'annotations/instances_train2014.json' val_ann_path = root_path + 'annotations/instances_val2014.json' # Initialize COCO api for instance annotations. if mode == 'train': self.coco_train = COCO(train_ann_path) self.train_image_ids = self.coco_train.getImgIds() else: self.coco_val = COCO(val_ann_path) self.val_image_ids = self.coco_val.getImgIds() # get image ids voc_cat_ids = [5,2,16,9,44,6,3,17,62,21,67,18,19,4,1,64,20,7,72] unvoc_cat_ids = list(set(np.arange(0, 80, 1).tolist()) - set(voc_cat_ids)) self.val_image_ids = set() for i, cat_id in enumerate(unvoc_cat_ids): one_cat_img_ids = set(self.coco_val.getImgIds(catIds=[cat_id])) while len(self.val_image_ids) != (i + 1) * 10: if len(one_cat_img_ids) > 0: self.val_image_ids.add(one_cat_img_ids.pop()) else: break self.val_image_ids = list(self.val_image_ids) def random_pos_points(self, mask, num_points): index_xs, index_ys = np.where(mask == 1) index = np.stack([index_xs, index_ys], axis=1) real_num_points = min(len(index), num_points) temp = np.arange(len(index)) if len(temp) != 0: random_points_indice = np.random.choice(temp, real_num_points) else: return [] random_points = [] for i in random_points_indice: random_points.append(index[i]) return random_points def random_neg_points(self, mask, num_points): index_xs, index_ys = np.where(mask == 0) index = np.stack([index_xs, index_ys], axis=1) real_num_points = min(len(index), num_points) temp = np.arange(len(index)) if len(temp) != 0: random_points_indice = np.random.choice(temp, real_num_points) else: return [] random_points = [] for i in random_points_indice: random_points.append(index[i]) return random_points def read_one_train_image(self): # read one image and get its masks if hasattr(self, 'train_location') == False: self.train_location = 0 # read image while 1: image_id = int(self.train_image_ids[self.train_location]) self.train_location = (self.train_location + 1) % len(self.train_image_ids) image = self.coco_train.loadImgs(image_id)[0] filename = image['file_name'] image = io.imread('{}/{}'.format(self.train_image_dir, filename)) if len(np.shape(image)) != 2: break # read anns annIds = self.coco_train.getAnnIds(imgIds=image_id) anns = self.coco_train.loadAnns(annIds) # ann to masks masks = [] for ann in anns: single_mask = self.coco_train.annToMask(ann) # change ann to single mask single_mask = single_mask.astype(np.uint8) masks.append(single_mask) return image, masks def read_one_val_image(self): # read one image and get its masks if hasattr(self, 'val_location') == False: self.val_location = 0 # read image while 1: image_id = int(self.val_image_ids[self.val_location]) self.val_location = (self.val_location + 1) % len(self.val_image_ids) image = self.coco_val.loadImgs(image_id)[0] filename = image['file_name'] image = io.imread('{}/{}'.format(self.val_image_dir, filename)) # read anns annIds = self.coco_val.getAnnIds(imgIds=image_id) anns = self.coco_val.loadAnns(annIds) # ann to masks masks = [] for ann in anns: single_mask = self.coco_val.annToMask(ann) # change ann to single mask single_mask = single_mask.astype(np.uint8) masks.append(single_mask) if len(np.shape(image)) == 3 and len(np.shape(masks)) == 3: break return image, masks def simulate(self, image, masks):# for every mask, get positive and negtive inputs images = [] for mask in masks:# every mask produces two planes # get points pos_points = deepcopy(self.random_pos_points(mask, 15)) neg_points = deepcopy(self.random_neg_points(mask, 15)) # get planes pos_plane = np.zeros_like(mask)[:, :, np.newaxis] neg_plane = np.zeros_like(mask)[:, :, np.newaxis] pos_plane = pos_plane.copy() neg_plane = neg_plane.copy() for i in range(len(pos_points)): cv2.circle(pos_plane, (pos_points[i][1], pos_points[i][0]), 5, 1, thickness=-1) for i in range(len(neg_points)): cv2.circle(neg_plane, (neg_points[i][1], neg_points[i][0]), 5, 1, thickness=-1) images.append(np.concatenate([image, pos_plane, neg_plane], axis=2)) # get 5 channels return images, masks # images:[x, height, width, 5], masks:[x, height, width] def add_queue(self, max_queue_size=10): if hasattr(self, 'queue') == False: self.queue = queue.Queue(maxsize=max_queue_size) while 1: image, masks = self.read_one_train_image() images, masks = self.simulate(image, masks) for i in range(len(images)): self.queue.put([images[i], masks[i]]) def start_queue(self, max_queue_size=10): if hasattr(self, 'queue') == False: queue_thread = threading.Thread(target=self.add_queue, args=(max_queue_size, )) queue_thread.start() def get_batch_train(self, batch_size=4, image_size=(513, 513)): while hasattr(self, 'queue') == False: self.start_queue() batch_x = [] batch_y = [] for i in range(batch_size): image, mask = self.queue.get() image = cv2.resize(image, image_size) mask = cv2.resize(mask, image_size, interpolation=cv2.INTER_NEAREST) batch_x.append(image) batch_y.append(mask) return batch_x, batch_y def get_one_val(self): image, masks = self.read_one_val_image() images, masks = self.simulate(image, masks) return [images[0]], [masks[0]] def add_queue_val(self, max_queue_size=10): if hasattr(self, 'val_queue') == False: self.val_queue = queue.Queue(maxsize=max_queue_size) while 1: image, masks = self.read_one_val_image() images, masks = self.simulate(image, masks) for i in range(len(images)): self.val_queue.put([images[i], masks[i]]) def start_queue_val(self, max_queue_size=10): if hasattr(self, 'val_queue') == False: queue_thread = threading.Thread(target=self.add_queue_val, args=(max_queue_size, )) queue_thread.start() def get_batch_val(self, batch_size=4): while hasattr(self, 'val_queue') == False: self.start_queue_val() batch_x = [] batch_y = [] for i in range(batch_size): image, mask = self.val_queue.get() if batch_x == []: batch_x.append(image) batch_y.append(mask) elif np.shape(batch_x)[1] == np.shape(image)[0] and np.shape(batch_x)[2] == np.shape(image)[1]: batch_x.append(image) batch_y.append(mask) return batch_x, batch_y
class COCODetection(data.Dataset): """`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset. Args: root (string): Root directory where images are downloaded to. set_name (string): Name of the specific set of COCO images. transform (callable, optional): A function/transform that augments the raw images` target_transform (callable, optional): A function/transform that takes in the target (bbox) and transforms it. prep_crowds (bool): Whether or not to prepare crowds for the evaluation step. """ def __init__(self, image_path, info_file, transform=None, target_transform=None, dataset_name='MS COCO', has_gt=True): # Do this here because we have too many things named COCO from pycocotools.coco import COCO if target_transform is None: target_transform = COCOAnnotationTransform() self.root = image_path self.coco = COCO(info_file) #将标签文件导入coco API #self.coco.imgToAnns包含了标签文件中的所有bbox,category_id,imge_id,segmentation的信息 #这里就是取出所有训练的图片中的信息 self.ids = list(self.coco.imgToAnns.keys()) print("self.ids:") print(len(self.ids)) if len(self.ids) == 0 or not has_gt: self.ids = list(self.coco.imgs.keys()) # transform是SSDAugmentation的实例对象 # COCOAnnotationTransform这个类作用:将COCO的标签转换成bbox coords and label index # 的张量 self.transform = transform self.target_transform = COCOAnnotationTransform() self.name = dataset_name self.has_gt = has_gt def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, (target, masks, num_crowds)). target is the object returned by ``coco.loadAnns``. """ im, gt, masks, h, w, num_crowds = self.pull_item(index) return im, (gt, masks, num_crowds) def __len__(self): return len(self.ids) def pull_item(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, target, masks, height, width, crowd). target is the object returned by ``coco.loadAnns``. Note that if no crowd annotations exist, crowd will be None """ img_id = self.ids[index] if self.has_gt: ann_ids = self.coco.getAnnIds(imgIds=img_id) # Target has {'segmentation', 'area', iscrowd', 'image_id', 'bbox', 'category_id'} target = [ x for x in self.coco.loadAnns(ann_ids) if x['image_id'] == img_id ] else: target = [] # Separate out crowd annotations. These are annotations that signify a large crowd of # objects of said class, where there is no annotation for each individual object. Both # during testing and training, consider these crowds as neutral. crowd = [x for x in target if ('iscrowd' in x and x['iscrowd'])] target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])] num_crowds = len(crowd) for x in crowd: x['category_id'] = -1 # This is so we ensure that all crowd annotations are at the end of the array target += crowd # The split here is to have compatibility with both COCO2014 and 2017 annotations. # In 2014, images have the pattern COCO_{train/val}2014_%012d.jpg, while in 2017 it's %012d.jpg. # Our script downloads the images as %012d.jpg so convert accordingly. file_name = self.coco.loadImgs(img_id)[0]['file_name'] if file_name.startswith('COCO'): file_name = file_name.split('_')[-1] path = osp.join(self.root, file_name) assert osp.exists(path), 'Image path does not exist: {}'.format(path) img = cv2.imread(path) height, width, _ = img.shape if len(target) > 0: # Pool all the masks for this image into one [num_objects,height,width] matrix masks = [self.coco.annToMask(obj).reshape(-1) for obj in target] masks = np.vstack(masks) masks = masks.reshape(-1, height, width) if self.target_transform is not None and len(target) > 0: target = self.target_transform(target, width, height) if self.transform is not None: if len(target) > 0: target = np.array(target) img, masks, boxes, labels = self.transform( img, masks, target[:, :4], { 'num_crowds': num_crowds, 'labels': target[:, 4] }) # I stored num_crowds in labels so I didn't have to modify the entirety of augmentations num_crowds = labels['num_crowds'] labels = labels['labels'] target = np.hstack((boxes, np.expand_dims(labels, axis=1))) else: img, _, _, _ = self.transform( img, np.zeros((1, height, width), dtype=np.float), np.array([[0, 0, 1, 1]]), { 'num_crowds': 0, 'labels': np.array([0]) }) masks = None target = None if target.shape[0] == 0: print( 'Warning: Augmentation output an example with no ground truth. Resampling...' ) return self.pull_item(random.randint(0, len(self.ids) - 1)) return torch.from_numpy(img).permute( 2, 0, 1), target, masks, height, width, num_crowds def pull_image(self, index): '''Returns the original image object at index in PIL form Note: not using self.__getitem__(), as any transformations passed in could mess up this functionality. Argument: index (int): index of img to show Return: cv2 img ''' img_id = self.ids[index] path = self.coco.loadImgs(img_id)[0]['file_name'] return cv2.imread(osp.join(self.root, path), cv2.IMREAD_COLOR) def pull_anno(self, index): '''Returns the original annotation of image at index Note: not using self.__getitem__(), as any transformations passed in could mess up this functionality. Argument: index (int): index of img to get annotation of Return: list: [img_id, [(label, bbox coords),...]] eg: ('001718', [('dog', (96, 13, 438, 332))]) ''' img_id = self.ids[index] ann_ids = self.coco.getAnnIds(imgIds=img_id) return self.coco.loadAnns(ann_ids) def __repr__(self): fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) fmt_str += ' Root Location: {}\n'.format(self.root) tmp = ' Transforms (if any): ' fmt_str += '{0}{1}\n'.format( tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) tmp = ' Target Transforms (if any): ' fmt_str += '{0}{1}'.format( tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) return fmt_str
class COCODetection(data.Dataset): def __init__(self, image_path, info_file, augmentation=None): self.image_path = image_path self.coco = COCO(info_file) self.ids = list(self.coco.imgToAnns.keys()) self.augmentation = augmentation self.label_map = cfg.label_map def __getitem__(self, index): im, gt, masks, h, w, num_crowds = self.pull_item(index) return im, gt, masks, num_crowds def __len__(self): return len(self.ids) def pull_item(self, index): img_ids = self.ids[index] ann_ids = self.coco.getAnnIds(imgIds=img_ids) # 'target' includes {'segmentation', 'area', iscrowd', 'image_id', 'bbox', 'category_id'} target = self.coco.loadAnns(ann_ids) # Separate out crowd annotations. These are annotations that signify a large crowd of objects, where there is # no annotation for each individual object. When testing and training, treat these crowds as neutral. crowd = [x for x in target if ('iscrowd' in x and x['iscrowd'])] target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])] num_crowds = len(crowd) # Ensure that all crowd annotations are at the end of the array. target += crowd file_name = self.coco.loadImgs(img_ids)[0]['file_name'] img_path = osp.join(self.image_path, file_name) assert osp.exists(img_path), f'Image path does not exist: {img_path}' img = cv2.imread(img_path) height, width, _ = img.shape if len(target) > 0: masks = [self.coco.annToMask(aa).reshape(-1) for aa in target] masks = np.vstack(masks) masks = masks.reshape( (-1, height, width)) # between 0~1, (num_objs, height, width) # Uncomment this to visualize the masks. # cv2.imshow('aa', masks[0]*255) # cv2.waitKey() scale = np.array([width, height, width, height]) box_list = [] for obj in target: if 'bbox' in obj: bbox = obj['bbox'] label_idx = self.label_map[obj['category_id']] - 1 final_box = list( np.array([ bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3] ]) / scale) final_box.append(label_idx) box_list += [ final_box ] # (xmin, ymin, xmax, ymax, label_idx), between 0~1 else: print("No bbox found for object ", obj) if self.augmentation is not None: if len(box_list) > 0: box_array = np.array(box_list) img, masks, boxes, labels = self.augmentation( img, masks, box_array[:, :4], { 'num_crowds': num_crowds, 'labels': box_array[:, 4] }) # I stored num_crowds in labels so I didn't have to modify the entirety of augmentations. num_crowds = labels['num_crowds'] labels = labels['labels'] boxes = np.hstack((boxes, np.expand_dims(labels, axis=1))) return torch.from_numpy(img).permute( 2, 0, 1), boxes, masks, height, width, num_crowds
def coco_to_TFrecords(keypoint_annotations_file, transformed_annotations_file, config): """This script transforms the COCO 2017 keypoint train,val files into a format with all keypoints and joints for an image, in a more convenient format, where the first axes is the body part or joint, the second is the object, and the third are the components (x,y,a) for keypoint and (x1,y1,x2,y2,a) for joint. The script saves it into matching pickle files. Meant to run once. normalizes size the pixel coords to be normalized by size to 0..1 range """ print("\nReading " + keypoint_annotations_file) coco = COCO(keypoint_annotations_file) category = 1 imgIds = coco.getImgIds(catIds=[category]) imgIds.sort() print("Found %d images" % len(imgIds)) files_path = transformed_annotations_file + "-{:03}.tfrecords" with FileSharder(tf.io.TFRecordWriter, files_path, config.IMAGES_PER_TFRECORD) as writer: for img_id in imgIds: img_info = coco.loadImgs(img_id)[0] size = [img_info['height'], img_info['width']] annIds = coco.getAnnIds(imgIds=[img_id]) anns = coco.loadAnns(annIds) persons_kpts = [] for annotation in anns: if annotation['num_keypoints'] > 0: kpts = annotation['keypoints'] # map to new kpts kpts = reshape_kpts(kpts, config) kpts = map_new_kpts(kpts, config) persons_kpts.append(kpts) if not persons_kpts: continue # this means that the image has no people with keypoints annotations persons_kpts = np.array( persons_kpts, dtype=np.float32) # convert from list to array keypoints = transform_keypts(persons_kpts, np.array(size, dtype=np.int)) tr_joint = create_all_joints(keypoints, config) tr_keypoints = keypoints.transpose( (1, 0, 2)) # transpose keypoints for later stages total_mask = np.zeros(size, dtype=np.float32) for annotation in anns: if annotation[ 'num_keypoints'] == 0: # only mask those without keypoints single_mask = coco.annToMask(annotation) total_mask = np.max([total_mask, single_mask], axis=0) total_mask = cv2.resize(total_mask, (config.LABEL_HEIGHT, config.LABEL_WIDTH)) total_mask = (total_mask > 0.01).astype(np.int16) kernel = np.ones((5, 5), np.uint8) total_mask = cv2.dilate(total_mask, kernel) # get more area after downsample total_mask = total_mask.astype(np.bool) total_mask = np.invert( total_mask) # invert for loss multiplication later total_mask = total_mask.astype(np.float32) try: img_path = config.IMAGES_PATH + "/" + img_info['file_name'] image_raw = tf.io.read_file(img_path) except: print("Couldn't read file %s" % img_path) continue example = encode_example(img_id, image_raw, size, tr_keypoints, tr_joint, total_mask) writer.write(example)
class CocoPoseGenerator(object): def __init__(self, args, json_dir=JOSN_DIR, mask_dir=MASK_DIR, image_dir=IMAGE_DIR): self.args = args self.json_dir = os.path.join(self.args.root_dir, json_dir) if not os.path.exists(self.json_dir): os.makedirs(self.json_dir) self.image_dir = os.path.join(self.args.root_dir, image_dir) if not os.path.exists(self.image_dir): os.makedirs(self.image_dir) self.mask_dir = os.path.join(self.args.root_dir, mask_dir) if not os.path.exists(self.mask_dir): os.makedirs(self.mask_dir) self.coco = COCO(self.args.anno_file) self.img_ids = list(self.coco.imgs.keys()) def generate_label(self): for i, img_id in enumerate(self.img_ids): json_dict = dict() ann_ids = self.coco.getAnnIds(imgIds=img_id) img_anns = self.coco.loadAnns(ann_ids) num_persons = len(img_anns) filename = self.coco.imgs[img_id]['file_name'] width = self.coco.imgs[img_id]['width'] height = self.coco.imgs[img_id]['height'] json_dict['height'] = height json_dict['width'] = width mask_list = list() persons = list() person_centers = list() for p in range(num_persons): if img_anns[p]['num_keypoints'] < 5 or img_anns[p]['area'] < 32 * 32: mask_list.append(p) continue kpt = img_anns[p]['keypoints'] dic = dict() # person center person_center = [img_anns[p]['bbox'][0] + img_anns[p]['bbox'][2] / 2.0, img_anns[p]['bbox'][1] + img_anns[p]['bbox'][3] / 2.0] scale = img_anns[p]['bbox'][3] / self.args.input_size # skip this person if the distance to exiting person is too small flag = 0 for pc in person_centers: dis = math.sqrt((person_center[0] - pc[0]) * (person_center[0] - pc[0]) + (person_center[1] - pc[1]) * (person_center[1] - pc[1])) if dis < pc[2] * 0.3: flag = 1 break if flag == 1: mask_list.append(p) continue dic['bbox'] = img_anns[p]['bbox'] dic['objpos'] = person_center dic['keypoints'] = np.zeros((17, 3)).tolist() dic['scale'] = scale for part in range(17): dic['keypoints'][part][0] = kpt[part * 3] dic['keypoints'][part][1] = kpt[part * 3 + 1] # visiable is 1, unvisiable is 0 and not labeled is 2 if kpt[part * 3 + 2] == 2: dic['keypoints'][part][2] = 1 elif kpt[part * 3 + 2] == 1: dic['keypoints'][part][2] = 0 else: dic['keypoints'][part][2] = 2 persons.append(dic) person_centers.append(np.append(person_center, max(img_anns[p]['bbox'][2], img_anns[p]['bbox'][3]))) if len(persons) > 0: persons = self.__coco_to_ours(persons) json_dict['persons'] = persons fw = open(os.path.join(self.json_dir, '{}.json'.format(filename.split('.')[0])), 'w') fw.write(json.dumps(json_dict)) fw.close() mask_all = np.zeros((height, width), dtype=np.uint8) mask_miss = np.zeros((height, width), dtype=np.uint8) flag = 0 for p in range(num_persons): if img_anns[p]['iscrowd'] == 1: mask_crowd = self.coco.annToMask(img_anns[p]) temp = np.bitwise_and(mask_all, mask_crowd) mask_crowd = mask_crowd - temp flag += 1 continue else: mask = self.coco.annToMask(img_anns[p]) mask_all = np.bitwise_or(mask, mask_all) if p in mask_list: mask_miss = np.bitwise_or(mask, mask_miss) if flag < 1: mask_miss = np.logical_not(mask_miss) elif flag == 1: mask_miss = np.logical_not(np.bitwise_or(mask_miss, mask_crowd)) else: raise Exception('crowd segments > 1') mask_miss_vis = np.zeros((height, width), dtype=np.uint8) mask_miss_vis[:, :] = mask_miss * 255 mask_image = Image.fromarray(mask_miss, mode='P') mask_image.save(os.path.join(self.mask_dir, '{}.png'.format(filename.split('.')[0]))) mask_image_vis = Image.fromarray(mask_miss_vis, mode='P') mask_image_vis.save(os.path.join(self.mask_dir, '{}_vis.png'.format(filename.split('.')[0]))) shutil.copyfile(os.path.join(self.args.img_dir, filename), os.path.join(self.image_dir, filename)) if i % 1000 == 0: print("Processed {} of {}".format(i, len(self.img_ids))) def __coco_to_ours(self, persons): our_persons = list() for person in persons: dic = dict() dic['bbox'] = person['bbox'] dic['pos_center'] = person['objpos'] dic['keypoints'] = np.zeros((18,3)).tolist() dic['scale'] = person['scale'] for i in range(17): dic['keypoints'][COCO_TO_OURS[i]][0] = person['keypoints'][i][0] dic['keypoints'][COCO_TO_OURS[i]][1] = person['keypoints'][i][1] dic['keypoints'][COCO_TO_OURS[i]][2] = person['keypoints'][i][2] dic['keypoints'][1][0] = (person['keypoints'][5][0] + person['keypoints'][6][0]) * 0.5 dic['keypoints'][1][1] = (person['keypoints'][5][1] + person['keypoints'][6][1]) * 0.5 if person['keypoints'][5][2] == person['keypoints'][6][2]: dic['keypoints'][1][2] = person['keypoints'][5][2] elif person['keypoints'][5][2] == 2 or person['keypoints'][6][2] == 2: dic['keypoints'][1][2] = 2 else: dic['keypoints'][1][2] = 0 our_persons.append(dic) return our_persons