def get_ade20k(root, url): data_root = download.get_dataset_directory(root) if os.path.exists(os.path.join(data_root, 'ADEChallengeData2016')): return data_root cache_path = utils.cached_download(url) utils.extractall(cache_path, data_root, os.path.splitext(url)[1]) return data_root
def get_voc(year, split): if year not in urls: raise ValueError key = year if split == 'test' and year == '2007': key = '2007_test' # To support ChainerMN, the target directory should be locked. with filelock.FileLock( os.path.join( download.get_dataset_directory('pfnet/chainercv/.lock'), 'voc.lock')): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'VOCdevkit/VOC{}'.format(year)) split_file = os.path.join(base_path, 'ImageSets/Main/{}.txt'.format(split)) if os.path.exists(split_file): # skip downloading return base_path download_file_path = utils.cached_download(urls[key]) ext = os.path.splitext(urls[key])[1] utils.extractall(download_file_path, data_root, ext) return base_path
def get_fashion_landmark(): data_dir = download.get_dataset_directory(root) url = 'https://drive.google.com/uc?id=0B7EVK8r0v71pSU9nOXVDMk9WbWM' img_root = os.path.join(data_dir, 'img') anno_root = os.path.join(data_dir, 'Anno') eval_root = os.path.join(data_dir, 'Eval') download_file_path = cached_gdown_download(url) if not os.path.exists(img_root): utils.extractall(download_file_path, data_dir, '.zip') landmark_annotation_url = 'https://drive.google.com/uc?id='\ '0B7EVK8r0v71pZ3pGVFZ0YjZVTjg' download_file_path = cached_gdown_download(landmark_annotation_url) try: os.makedirs(anno_root) except OSError: if not os.path.exists(anno_root): raise shutil.copy(download_file_path, os.path.join(anno_root, 'list_landmarks.txt')) eval_list_url = 'https://drive.google.com/uc?id='\ '0B7EVK8r0v71pakJzTEM0a2Q4Qm8' download_file_path = cached_gdown_download(eval_list_url) try: os.makedirs(eval_root) except OSError: if not os.path.exists(eval_root): raise shutil.copy(download_file_path, os.path.join(eval_root, 'list_eval_partition.txt')) return data_dir
def get_coco(split, img_split, data_dir=None): url = img_urls[img_split] if data_dir is None: data_dir = download.get_dataset_directory(root) img_root = os.path.join(data_dir, 'images') created_img_root = os.path.join(img_root, img_split) if 'test' in split: annos_root = data_dir else: annos_root = os.path.join(data_dir, 'annotations') if 'test' in split: anno_prefix = 'image_info' else: anno_prefix = 'instances' anno_fn = os.path.join(annos_root, '{0}_{1}.json'.format(anno_prefix, split)) if not os.path.exists(created_img_root): download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, img_root, ext) if not os.path.exists(anno_fn): anno_url = anno_urls[split] download_file_path = utils.cached_download(anno_url) ext = os.path.splitext(anno_url)[1] utils.extractall(download_file_path, annos_root, ext) return data_dir
def get_voc(year, split): if year not in urls: raise ValueError key = year if split == 'test' and year == '2007': key = '2007_test' if not os.path.isdir(voc_dir): os.makedirs(voc_dir) filelock_path = os.path.join(voc_dir, 'voc.lock') # To support ChainerMN, the target directory should be locked. with filelock.FileLock(filelock_path): base_path = os.path.join(voc_dir, 'VOCdevkit/VOC{}'.format(year)) split_file = os.path.join(base_path, 'ImageSets/Main/{}.txt'.format(split)) if os.path.exists(split_file): # skip downloading if os.path.isfile(filelock_path): os.remove(filelock_path) # remove lockfile return base_path download_file_path = utils.cached_download(urls[key]) ext = os.path.splitext(urls[key])[1] utils.extractall(download_file_path, voc_dir, ext) if os.path.isfile(filelock_path): os.remove(filelock_path) # remove lockfile return base_path
def get_sbd(data_dir=None): if data_dir is None: data_dir = download.get_dataset_directory(root) label_dir = osp.join(data_dir, 'fcis_label') if not osp.exists(label_dir): os.makedirs(label_dir) fcn.data.cached_download( url=val_url, path=osp.join(label_dir, 'val.txt'), md5='905db61182fcaaf6b981af6ae6dd7ff2' ) fcn.data.cached_download( url=train_url, path=osp.join(label_dir, 'train.txt'), md5='79bff800c5f0b1ec6b21080a3c066722' ) base_path = osp.join(data_dir, 'benchmark_RELEASE/dataset') if osp.exists(base_path): return base_path download_file_path = utils.cached_download(url) ext = osp.splitext(url)[1] utils.extractall(download_file_path, data_dir, ext) return base_path
def get_ade20k(root, url): data_root = download.get_dataset_directory(root) # To support ChainerMN, the target directory should be locked. with filelock.FileLock(os.path.join(data_root, 'lock')): if os.path.exists(os.path.join(data_root, 'ADEChallengeData2016')): return data_root cache_path = utils.cached_download(url) utils.extractall(cache_path, data_root, os.path.splitext(url)[1]) return data_root
def _get_sintel(): data_root = download.get_dataset_directory(root) if os.path.exists(os.path.join(data_root, 'training')): # skip downloading return data_root download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) return data_root
def get_cub_mask(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'segmentations') if os.path.exists(base_path): # skip downloading return base_path download_file_path_mask = utils.cached_download(mask_url) ext_mask = os.path.splitext(mask_url)[1] utils.extractall(download_file_path_mask, data_root, ext_mask) return base_path
def _get_online_products(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'Stanford_Online_Products') if os.path.exists(base_path): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) return base_path
def get_sbd(data_dir=None): if data_dir is None: data_dir = download.get_dataset_directory(root) base_path = osp.join(data_dir, 'benchmark_RELEASE/dataset') if osp.exists(base_path): return base_path download_file_path = utils.cached_download(url) ext = osp.splitext(url)[1] utils.extractall(download_file_path, data_dir, ext) return base_path
def get_cub(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'CUB_200_2011') if os.path.exists(base_path): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) return base_path
def get_voc(data_dir=None): if data_dir is None: data_dir = download.get_dataset_directory(root) base_path = osp.join(data_dir, 'VOCdevkit/VOC2012') if osp.exists(base_path): return base_path download_file_path = utils.cached_download(url) ext = osp.splitext(url)[1] utils.extractall(download_file_path, data_dir, ext) return base_path
def get_cub_prob_map(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'segmentations') if os.path.exists(base_path): # skip downloading return base_path prob_map_download_file_path = utils.cached_download(prob_map_url) prob_map_ext = os.path.splitext(prob_map_url)[1] utils.extractall(prob_map_download_file_path, data_root, prob_map_ext) return base_path
def _get_online_products(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'Stanford_Online_Products') if os.path.exists(base_path): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) return base_path
def get_cub(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'CUB_200_2011') if os.path.exists(base_path): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) return base_path
def get_camvid(): data_root = download.get_dataset_directory(root) download_file_path = utils.cached_download(url) if len(glob.glob(os.path.join(data_root, '*'))) != 9: utils.extractall( download_file_path, data_root, os.path.splitext(url)[1]) data_dir = os.path.join(data_root, 'SegNet-Tutorial-master/CamVid') if os.path.exists(data_dir): for fn in glob.glob(os.path.join(data_dir, '*')): shutil.move(fn, os.path.join(data_root, os.path.basename(fn))) shutil.rmtree(os.path.dirname(data_dir)) return data_root
def get_camvid(): data_root = download.get_dataset_directory(root) download_file_path = utils.cached_download(url) if len(glob.glob(os.path.join(data_root, '*'))) != 9: utils.extractall( download_file_path, data_root, os.path.splitext(url)[1]) data_dir = os.path.join(data_root, 'SegNet-Tutorial-master/CamVid') if os.path.exists(data_dir): for fn in glob.glob(os.path.join(data_dir, '*')): shutil.move(fn, os.path.join(data_root, os.path.basename(fn))) shutil.rmtree(os.path.dirname(data_dir)) return data_root
def get_cub_prob_map(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'segmentations') if os.path.exists(base_path): # skip downloading return base_path prob_map_download_file_path = utils.cached_download(prob_map_url) prob_map_ext = os.path.splitext(prob_map_url)[1] utils.extractall( prob_map_download_file_path, data_root, prob_map_ext) return base_path
def get_cub(): data_root = download.get_dataset_directory(root) # To support ChainerMN, the target directory should be locked. with filelock.FileLock(os.path.join(data_root, 'lock')): base_path = os.path.join(data_root, 'CUB_200_2011') if os.path.exists(base_path): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) return base_path
def get_camvid(): data_root = download.get_dataset_directory(root) # To support ChainerMN, the target directory should be locked. with filelock.FileLock(os.path.join(data_root, 'lock')): download_file_path = utils.cached_download(url) if len(glob.glob(os.path.join(data_root, '*'))) != 10: utils.extractall(download_file_path, data_root, os.path.splitext(url)[1]) data_dir = os.path.join(data_root, 'SegNet-Tutorial-master/CamVid') if os.path.exists(data_dir): for fn in glob.glob(os.path.join(data_dir, '*')): shutil.move(fn, os.path.join(data_root, os.path.basename(fn))) shutil.rmtree(os.path.dirname(data_dir)) return data_root
def get_pascal_voc(year): if year not in urls: raise ValueError data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'VOCdevkit/VOC{}'.format(year)) if os.path.exists(base_path): # skip downloading return base_path download_file_path = utils.cached_download(urls[year]) ext = os.path.splitext(urls[year])[1] utils.extractall(download_file_path, data_root, ext) return base_path
def get_coco(split, img_split, year, mode): data_dir = download.get_dataset_directory(root) annos_root = os.path.join(data_dir, 'annotations') img_root = os.path.join(data_dir, 'images') created_img_root = os.path.join(img_root, '{}{}'.format(img_split, year)) img_url = img_urls[year][img_split] if mode == 'instances': anno_url = instances_anno_urls[year][split] anno_path = os.path.join(annos_root, 'instances_{}{}.json'.format(split, year)) elif mode == 'panoptic': anno_url = panoptic_anno_url anno_path = os.path.join(annos_root, 'panoptic_{}{}.json'.format(split, year)) if not os.path.exists(created_img_root): download_file_path = utils.cached_download(img_url) ext = os.path.splitext(img_url)[1] utils.extractall(download_file_path, img_root, ext) if not os.path.exists(anno_path): download_file_path = utils.cached_download(anno_url) ext = os.path.splitext(anno_url)[1] if split in ['train', 'val']: utils.extractall(download_file_path, data_dir, ext) elif split in ['valminusminival', 'minival']: utils.extractall(download_file_path, annos_root, ext) if mode == 'panoptic': pixelmap_path = os.path.join(annos_root, 'panoptic_{}{}'.format(split, year)) if not os.path.exists(pixelmap_path): utils.extractall(pixelmap_path + '.zip', annos_root, '.zip') return data_dir
def get_cub_prob_map(): data_root = download.get_dataset_directory(root) # To support ChainerMN, the target directory should be locked. with filelock.FileLock(os.path.join(data_root, 'lock')): base_path = os.path.join(data_root, 'segmentations') if os.path.exists(base_path): # skip downloading return base_path prob_map_download_file_path = utils.cached_download(prob_map_url) prob_map_ext = os.path.splitext(prob_map_url)[1] utils.extractall( prob_map_download_file_path, data_root, prob_map_ext) return base_path
def get_mot(year, split): if year not in urls: raise ValueError data_root = download.get_dataset_directory(root) if year == '2015': mot_dirname = '2DMOT{}'.format(year) else: mot_dirname = 'MOT{}'.format(year[2:]) base_path = os.path.join(data_root, mot_dirname) anno_path = os.path.join(base_path, 'annotations') anno_txt_path = os.path.join(anno_path, '{}.txt'.format(split)) if not os.path.exists(base_path): download_file_path = utils.cached_download(urls[year]) ext = os.path.splitext(urls[year])[1] utils.extractall(download_file_path, data_root, ext) if not os.path.exists(os.path.join(data_root, 'motchallenge-devkit')): download_devfile_path = utils.cached_download(dev_urls) dev_ext = os.path.splitext(dev_urls)[1] utils.extractall(download_devfile_path, data_root, dev_ext) if not os.path.exists(anno_path): os.mkdir(anno_path) if split == 'train': split_dirs = ['train'] elif split == 'val': split_dirs = ['test'] elif split == 'trainval': split_dirs = ['train', 'test'] else: raise ValueError data_ids = [] for split_d in split_dirs: seq_dirs = sorted(os.listdir(os.path.join(base_path, split_d))) for seq_d in seq_dirs: img_dir = os.path.join(base_path, split_d, seq_d, 'img1') img_names = sorted(os.listdir(img_dir)) for img_name in img_names: data_id = '{0}_{1}_{2}'.format( split_d, seq_d, img_name.split('.')[0]) data_ids.append(data_id) with open(anno_txt_path, 'w') as anno_f: anno_f.write('\n'.join(data_ids)) return base_path
def _get_online_products(): # To support ChainerMN, the target directory should be locked. with filelock.FileLock( os.path.join( download.get_dataset_directory('pfnet/chainercv/.lock'), 'online_products.lock')): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'Stanford_Online_Products') if os.path.exists(base_path): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) return base_path
def _get_imagenet(urls): data_root = download.get_dataset_directory(root) # this is error prone if os.path.exists(os.path.join(data_root, 'train')): return data_root for key, url in urls.items(): download_file_path = utils.cached_download(url) d = os.path.join(data_root, key) if not os.path.exists(d): os.makedirs(d) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, d, ext) # train dataset contains tar inside of tar train_dir = os.path.join(data_root, 'train') for tar_fn in os.listdir(train_dir): if tar_fn[-3:] == 'tar': with tarfile.TarFile(os.path.join(train_dir, tar_fn), 'r') as t: t.extractall(train_dir) # parse developers kit developers_kit_dir = os.path.join(data_root, 'developers_kit') synsets = _parse_meta_mat(developers_kit_dir) wnid_to_ilsvrc_id = {val['WNID']: key for key, val in synsets.items()} # prepare train_pairs.txt train_pairs_fn = os.path.join(data_root, 'train_pairs.txt') with open(train_pairs_fn, 'w') as f: for fn in os.listdir(train_dir): synset = fn[:9] if synset in wnid_to_ilsvrc_id and fn[-4:] == 'JPEG': int_key = wnid_to_ilsvrc_id[synset] # starting from 0 f.write('{} {}\n'.format(fn, int_key)) # prepare val_pairs.txt val_pairs_fn = os.path.join(data_root, 'val_pairs.txt') val_gt_fn = os.path.join(developers_kit_dir, 'ILSVRC2012_devkit_t12/data', 'ILSVRC2012_validation_ground_truth.txt') with open(val_pairs_fn, 'w') as f: for i, l in enumerate(open(val_gt_fn)): key = int(l) # starting from 0 index = i + 1 fn = 'ILSVRC2012_val_{0:08}.JPEG'.format(index) f.write('{} {}\n'.format(fn, key)) return data_root
def get_coco(split, img_split): url = img_urls[img_split] data_dir = download.get_dataset_directory(root) img_root = os.path.join(data_dir, 'images') created_img_root = os.path.join(img_root, '{}2014'.format(img_split)) annos_root = os.path.join(data_dir, 'annotations') anno_path = os.path.join(annos_root, 'instances_{}2014.json'.format(split)) if not os.path.exists(created_img_root): download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, img_root, ext) if not os.path.exists(anno_path): anno_url = anno_urls[split] download_file_path = utils.cached_download(anno_url) ext = os.path.splitext(anno_url)[1] utils.extractall(download_file_path, annos_root, ext) return data_dir
def get_sbd(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'benchmark_RELEASE/dataset') train_voc2012_file = os.path.join(base_path, 'train_voc2012.txt') if os.path.exists(train_voc2012_file): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) six.moves.urllib.request.urlretrieve(train_voc2012_url, train_voc2012_file) _generate_voc2012_txt(base_path) return base_path
def get_voc(year, split): if year not in urls: raise ValueError key = year if split == 'test' and year == '2007': key = '2007_test' data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'VOCdevkit/VOC{}'.format(year)) split_file = os.path.join(base_path, 'ImageSets/Main/{}.txt'.format(split)) if os.path.exists(split_file): # skip downloading return base_path download_file_path = utils.cached_download(urls[key]) ext = os.path.splitext(urls[key])[1] utils.extractall(download_file_path, data_root, ext) return base_path
def get_voc(year, split): if year not in urls: raise ValueError key = year if split == 'test' and year == '2007': key = '2007_test' data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'VOCdevkit/VOC{}'.format(year)) split_file = os.path.join(base_path, 'ImageSets/Main/{}.txt'.format(split)) if os.path.exists(split_file): # skip downloading return base_path download_file_path = utils.cached_download(urls[key]) ext = os.path.splitext(urls[key])[1] utils.extractall(download_file_path, data_root, ext) return base_path
def get_sbd(): data_root = download.get_dataset_directory(root) # To support ChainerMN, the target directory should be locked. with filelock.FileLock(os.path.join(data_root, 'lock')): base_path = os.path.join(data_root, 'benchmark_RELEASE/dataset') train_voc2012_file = os.path.join(base_path, 'train_voc2012.txt') if os.path.exists(train_voc2012_file): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) six.moves.urllib.request.urlretrieve(train_voc2012_url, train_voc2012_file) _generate_voc2012_txt(base_path) return base_path
def get_coco(split, year, mode='person_keypoints'): if year not in ['2017']: raise ValueError if split not in ['train', 'val']: raise ValueError data_dir = download.get_dataset_directory(root) annos_root = os.path.join(data_dir, 'annotations') img_root = os.path.join(data_dir, 'images') created_img_root = os.path.join(img_root, '{}{}'.format(split, year)) img_url = img_urls[year][split] if mode == 'person_keypoints': anno_url = person_keypoints_anno_urls[year][split] anno_path = os.path.join( annos_root, 'person_keypoints_{}{}.json'.format(split, year)) else: raise ValueError('invalid mode {}'.format(mode)) if not os.path.exists(created_img_root): download_file_path = utils.cached_download(img_url) ext = os.path.splitext(img_url)[1] utils.extractall(download_file_path, img_root, ext) if not os.path.exists(anno_path): download_file_path = utils.cached_download(anno_url) ext = os.path.splitext(anno_url)[1] if split in ['train', 'val']: utils.extractall(download_file_path, data_dir, ext) elif split in ['valminusminival', 'minival']: utils.extractall(download_file_path, annos_root, ext) return data_dir
def _get_imagenet(urls): data_root = download.get_dataset_directory(root) # this is error prone if os.path.exists(os.path.join(data_root, 'train')): return data_root for key, url in urls.items(): download_file_path = utils.cached_download(url) d = os.path.join(data_root, key) if not os.path.exists(d): os.makedirs(d) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, d, ext) # this is an extra step needed for train dataset train_dir = os.path.join(data_root, 'train') for tar_fn in os.listdir(train_dir): if tar_fn[-3:] == 'tar': with tarfile.TarFile(os.path.join(train_dir, tar_fn), 'r') as t: t.extractall(train_dir) return data_root
def get_coco(split, img_split, year, mode): if not os.path.isdir(coco_dir): os.makedirs(coco_dir) # To support ChainerMN, the target directory should be locked. lockfile_path = os.path.join(coco_dir, 'coco.lock') with filelock.FileLock(lockfile_path): annos_root = os.path.join(coco_dir, 'annotations') img_root = os.path.join(coco_dir, 'images') created_img_root = os.path.join(img_root, '{}{}'.format(img_split, year)) img_url = img_urls[year][img_split] if mode == 'instances': anno_url = instances_anno_urls[year][split] anno_path = os.path.join(annos_root, 'instances_{}{}.json'.format(split, year)) elif mode == 'panoptic': anno_url = panoptic_anno_url anno_path = os.path.join(annos_root, 'panoptic_{}{}.json'.format(split, year)) if not os.path.exists(created_img_root): download_file_path = utils.cached_download(img_url) ext = os.path.splitext(img_url)[1] utils.extractall(download_file_path, img_root, ext) if not os.path.exists(anno_path): download_file_path = utils.cached_download(anno_url) ext = os.path.splitext(anno_url)[1] if split in ['train', 'val']: utils.extractall(download_file_path, coco_dir, ext) elif split in ['valminusminival', 'minival']: utils.extractall(download_file_path, annos_root, ext) if mode == 'panoptic': pixelmap_path = os.path.join(annos_root, 'panoptic_{}{}'.format(split, year)) if not os.path.exists(pixelmap_path): utils.extractall(pixelmap_path + '.zip', annos_root, '.zip') if os.path.isfile(lockfile_path): os.remove(lockfile_path) # remove lockfile return coco_dir
def get_ade20k(root, url): data_root = download.get_dataset_directory(root) cache_path = utils.cached_download(url) utils.extractall(cache_path, data_root, os.path.splitext(url)[1]) return data_root