def __init__(self, dataset_dir, subset, flip_ratio=0, pad_mode='fixed', mean=(0, 0, 0), std=(1, 1, 1), scale=(1024, 800), debug=False): """Load a subset of the COCO dataset. Attributes --- dataset_dir: The root directory of the COCO dataset. subset: What to load (train, val). flip_ratio: Float. The ratio of flipping an image and its bounding boxes. pad_mode: Which padded method to use (fixed, non-fixed) mean: Tuple. Image mean. std: Tuple. Image standard deviation. scale: Tuple of two integers. """ if subset not in ['train', 'val']: raise AssertionError('subset must be "train" or "val".') self.coco = COCO("{}/annotations/instances_{}2017.json".format( dataset_dir, subset)) # get the mapping from original category ids to labels self.cat_ids = self.coco.getCatIds() self.cat2label = { cat_id: i + 1 for i, cat_id in enumerate(self.cat_ids) } self.label2cat = { i + 1: cat_id for i, cat_id in enumerate(self.cat_ids) } self.img_ids, self.img_infos = self._filter_imgs() if debug: self.img_ids, self.img_infos = self.img_ids[: 20], self.img_infos[: 20] self.image_dir = "{}/{}2017".format(dataset_dir, subset) self.flip_ratio = flip_ratio if pad_mode in ['fixed', 'non-fixed']: self.pad_mode = pad_mode elif subset == 'train': self.pad_mode = 'fixed' else: self.pad_mode = 'non-fixed' self.img_transform = transforms.ImageTransform(scale, mean, std, pad_mode) self.bbox_transform = transforms.BboxTransform()
def __init__(self, dataset_dir, subset, flip_ratio=0, pad_mode='fixed', config=None, debug=False): '''Load a subset of the COCO dataset. Attributes --- dataset_dir: The root directory of the COCO dataset. subset: What to load (train, val). flip_ratio: Float. The ratio of flipping an image and its bounding boxes. pad_mode: Which padded method to use (fixed, non-fixed) scale: Tuple of two integers. ''' self.config = config self.debug = debug if subset not in ['train', 'val']: raise AssertionError('subset must be "train" or "val".') self.coco = COCO( f"{dataset_dir}/annotations/instances_{subset}2017.json") # get the mapping from original category ids to labels self.cat_ids = self.coco.getCatIds() self.cat2label = { cat_id: i + 1 for i, cat_id in enumerate(self.cat_ids) } self.img_ids, self.img_infos = self._filter_imgs() if debug: self.img_ids, self.img_infos = self.img_ids[: 50], self.img_infos[: 50] self.image_dir = f"{dataset_dir}/{subset}2017" self.flip_ratio = flip_ratio if pad_mode in ['fixed', 'non-fixed']: self.pad_mode = pad_mode elif subset == 'train': self.pad_mode = 'fixed' else: self.pad_mode = 'non-fixed' self.img_transform = transforms.ImageTransform( (config.IMAGE_MIN_DIM, config.IMAGE_MAX_DIM), config.MEAN_PIXEL, config.STD_PIXEL, pad_mode) self.bbox_transform = transforms.BboxTransform()
def __init__(self, dataset_dir, subset, flip_ratio=0, pad_mode='fixed', mean=(0, 0, 0), std=(1, 1, 1), scale=(1024, 800), debug=False): """加载coco数据集 Attributes --- dataset_dir: 数据集根目录 subset: 'train'或者'val' flip_ratio: Float. 按照flip_ratio为概率决定图片是否进行左右翻转 pad_mode: 使用那种pad模式(fixed, non-fixed),如果是fixed则会填充至scale中的较大值, 否则以高、宽都达到64的倍数为目的进行最小填充 mean: [3] 均值,用于归一化 std: [3] 标准差,用于归一化 scale: [2] 用于决定放缩大小 """ if subset not in ['train', 'val', 'test']: raise AssertionError('subset must be "train" or "val" or "test".') self.coco = COCO("{}/{}/{}.json".format(dataset_dir, subset, subset)) self.image_dir = "{}/{}/images".format(dataset_dir, subset) # 图片路径 self.flip_ratio = flip_ratio if pad_mode in ['fixed', 'non-fixed']: self.pad_mode = pad_mode elif subset == 'train': # 对于训练集,进行固定填充,即将图片宽、高都填充至固定值(正方形) self.pad_mode = 'fixed' else: # 对于非训练集,进行非固定填充,即讲图片宽、高都填充至一个固定值的整数倍 self.pad_mode = 'non-fixed' self.cat_ids = self.coco.getCatIds() # 标注文件中的所有类别号 self.cat2label = { # 标注文件中的类别编号:新的类别编号 cat_id: i + 1 for i, cat_id in enumerate(self.cat_ids) } self.label2cat = { # 新的类别编号:标注文件中的类别编号 i + 1: cat_id for i, cat_id in enumerate(self.cat_ids) } self.img_ids, self.img_infos = self._filter_imgs() # 图片编号以及相应的标签信息 if debug: self.img_ids, self.img_infos = self.img_ids[:50], self.img_infos[:50] self.img_transform = transforms.ImageTransform(scale, mean, std, pad_mode) # 图片预处理类 self.bbox_transform = transforms.BboxTransform() # 边界框相应处理类