def setUp(self): self.mnist_root = download.get_dataset_directory( os.path.join('pfnet', 'chainer', 'mnist')) self.kuzushiji_mnist_root = download.get_dataset_directory( os.path.join('pfnet', 'chainer', 'kuzushiji_mnist')) self.fashion_mnist_root = download.get_dataset_directory( os.path.join('pfnet', 'chainer', 'fashion-mnist'))
def download_and_store_model(alg, url, env, model_type): """Downloads a model file and puts it under model directory. It downloads a file from the URL and puts it under model directory. If there is already a file at the destination path, it just returns the path without downloading the same file. Args: alg (string): String representation of algorithm used in MODELS dict. url (string): URL to download from. env (string): Environment in which pretrained model was trained. model_type (string): Either `best` or `final`. Returns: string: Path to the downloaded file. bool: whether the model was alredy cached. """ with filelock.FileLock( os.path.join( get_dataset_directory( os.path.join('pfnet', 'chainerrl', '.lock')), 'models.lock')): root = get_dataset_directory( os.path.join('pfnet', 'chainerrl', 'models', alg, env)) url_basepath = os.path.join(url, alg, env) file = model_type + ".zip" path = os.path.join(root, file) is_cached = os.path.exists(path) if not is_cached: cache_path = cached_download(os.path.join(url_basepath, file)) os.rename(cache_path, path) with zipfile.ZipFile(path, 'r') as zip_ref: zip_ref.extractall(root) return os.path.join(root, model_type), is_cached
def get_voc(year, split): if year not in urls: raise ValueError key = year if split == 'test' and year == '2007': key = '2007_test' # To support ChainerMN, the target directory should be locked. with filelock.FileLock( os.path.join( download.get_dataset_directory('pfnet/chainercv/.lock'), 'voc.lock')): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'VOCdevkit/VOC{}'.format(year)) split_file = os.path.join(base_path, 'ImageSets/Main/{}.txt'.format(split)) if os.path.exists(split_file): # skip downloading return base_path download_file_path = utils.cached_download(urls[key]) ext = os.path.splitext(urls[key])[1] utils.extractall(download_file_path, data_root, ext) return base_path
def download_model(url): """Downloads a model file and puts it under model directory. It downloads a file from the URL and puts it under model directory. For exmaple, if :obj:`url` is `http://example.com/subdir/model.npz`, the pretrained weights file will be saved to `$CHAINER_DATASET_ROOT/pfnet/chainercv/models/model.npz`. If there is already a file at the destination path, it just returns the path without downloading the same file. Args: url (string): URL to download from. Returns: string: Path to the downloaded file. """ # To support ChainerMN, the target directory should be locked. with filelock.FileLock( os.path.join( get_dataset_directory( os.path.join('pfnet', 'chainercv', '.lock')), 'models.lock')): root = get_dataset_directory( os.path.join('pfnet', 'chainercv', 'models')) basename = os.path.basename(url) path = os.path.join(root, basename) if not os.path.exists(path): cache_path = cached_download(url) os.rename(cache_path, path) return path
def get_ade20k(root, url): # To support ChainerMN, the target directory should be locked. with filelock.FileLock( os.path.join( download.get_dataset_directory('pfnet/chainercv/.lock'), 'ade20k.lock')): data_root = download.get_dataset_directory(root) if os.path.exists(os.path.join(data_root, 'ADEChallengeData2016')): return data_root cache_path = utils.cached_download(url) utils.extractall(cache_path, data_root, os.path.splitext(url)[1]) return data_root
def get_cub(): # To support ChainerMN, the target directory should be locked. with filelock.FileLock(os.path.join(download.get_dataset_directory( 'pfnet/chainercv/.lock'), 'cub.lock')): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'CUB_200_2011') if os.path.exists(base_path): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) return base_path
def get_cub_prob_map(): # To support ChainerMN, the target directory should be locked. with filelock.FileLock(os.path.join(download.get_dataset_directory( 'pfnet/chainercv/.lock'), 'cub.lock')): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'segmentations') if os.path.exists(base_path): # skip downloading return base_path prob_map_download_file_path = utils.cached_download(prob_map_url) prob_map_ext = os.path.splitext(prob_map_url)[1] utils.extractall( prob_map_download_file_path, data_root, prob_map_ext) return base_path
def get_sequences(split, map_name): if split == 'train': splits = ['train'] elif split == 'val': splits = ['test'] elif split == 'trainval': if map_name in ['c2', 'c3', 'c4']: splits = ['train', 'test'] else: splits = ['all'] else: raise ValueError seq_map = [] data_root = download.get_dataset_directory(root) seq_path = os.path.join( data_root, 'motchallenge-devkit/motchallenge/seqmaps') for sp in splits: seqmap_path = os.path.join( seq_path, '{0}-{1}.txt'.format(map_name, sp)) with open(seqmap_path, 'r') as f: seq_m = f.read().split('\n') seq_map.extend(seq_m[1:-1]) if map_name == 'c9': seq_map = ['{}-DPM'.format(x) for x in seq_map] return seq_map
def _retrieve_word_vocabulary(): def creator(path): words = _load_words(_train_url) vocab = {} index = 0 with open(path, 'w') as f: for word in words: if word not in vocab: vocab[word] = index index += 1 f.write(word + '\n') return vocab def loader(path): vocab = {} with open(path) as f: for i, word in enumerate(f): vocab[word.strip()] = i return vocab root = download.get_dataset_directory('txt') path = os.path.join(root, 'vocab.txt') print root return download.cache_or_load_file(path, creator, loader)
def get_facade(): root = download.get_dataset_directory('study_chainer/facade') npz_path = os.path.join(root, 'base.npz') url = 'http://cmp.felk.cvut.cz/~tylecr1/facade/CMP_facade_DB_base.zip' def creator(path): archive_path = download.cached_download(url) images = [] labels = [] with zipfile.ZipFile(archive_path, 'r') as archive: for i in range(1, 378 + 1): image_name = 'base/cmp_b{:04d}.jpg'.format(i) label_name = 'base/cmp_b{:04d}.png'.format(i) image = Image.open(io.BytesIO(archive.read(image_name))) image = np.asarray(image) images.append(image) label = Image.open(io.BytesIO(archive.read(label_name))) label = np.asarray(label) labels.append(label) np.savez_compressed(path, images=images, labels=labels) return {'images': images, 'labels': labels} raw = download.cache_or_load_file(npz_path, creator, np.load) return raw['images'], raw['labels']
def get_sbd(data_dir=None): if data_dir is None: data_dir = download.get_dataset_directory(root) label_dir = osp.join(data_dir, 'fcis_label') if not osp.exists(label_dir): os.makedirs(label_dir) fcn.data.cached_download( url=val_url, path=osp.join(label_dir, 'val.txt'), md5='905db61182fcaaf6b981af6ae6dd7ff2' ) fcn.data.cached_download( url=train_url, path=osp.join(label_dir, 'train.txt'), md5='79bff800c5f0b1ec6b21080a3c066722' ) base_path = osp.join(data_dir, 'benchmark_RELEASE/dataset') if osp.exists(base_path): return base_path download_file_path = utils.cached_download(url) ext = osp.splitext(url)[1] utils.extractall(download_file_path, data_dir, ext) return base_path
def _check_pretrained_model(n_fg_class, pretrained_model, models): if pretrained_model in models: model = models[pretrained_model] if n_fg_class and not n_fg_class == model['n_fg_class']: raise ValueError('n_fg_class mismatch') n_fg_class = model['n_fg_class'] root = get_dataset_directory('pfnet/chainercv/models') basename = os.path.basename(model['url']) path = os.path.join(root, basename) if not os.path.exists(path): download_file = download.cached_download(model['url']) os.rename(download_file, path) if not _available: warnings.warn( 'cv2 is not installed on your environment. ' 'Pretrained models are trained with cv2. ' 'The performace may change with Pillow backend.', RuntimeWarning) elif pretrained_model: path = pretrained_model else: path = None return n_fg_class, path
def _retrieve_emnist(name, archives): # the path to store the cached file to root = download.get_dataset_directory('pfnet/chainer/emnist') path = os.path.join(root, name) return download.cache_or_load_file(path, lambda path: _make_npz(path, archives), numpy.load)
def __init__(self, data_dir=None, label_resolution=None, split='train', ignore_labels=True): if data_dir is None: data_dir = download.get_dataset_directory( 'pfnet/chainercv/cityscapes') img_dir = os.path.join(data_dir, os.path.join('leftImg8bit', split)) resol = label_resolution label_dir = os.path.join(data_dir, resol) if not os.path.exists(img_dir) or not os.path.exists(label_dir): raise ValueError( 'Cityscapes dataset does not exist at the expected location.' 'Please download it from https://www.cityscapes-dataset.com/.' 'Then place directory leftImg8bit at {} and {} at {}.'.format( os.path.join(data_dir, 'leftImg8bit'), resol, label_dir)) self.ignore_labels = ignore_labels self.label_paths = list() self.img_paths = list() city_dnames = list() for dname in glob.glob(os.path.join(label_dir, '*')): if split in dname: for city_dname in glob.glob(os.path.join(dname, '*')): for label_path in glob.glob( os.path.join(city_dname, '*_labelIds.png')): self.label_paths.append(label_path) city_dnames.append(os.path.basename(city_dname)) for city_dname, label_path in zip(city_dnames, self.label_paths): label_path = os.path.basename(label_path) img_path = label_path.replace( '{}_labelIds'.format(resol), 'leftImg8bit') img_path = os.path.join(img_dir, city_dname, img_path) self.img_paths.append(img_path)
def get_coco(split, img_split, year, mode): data_dir = download.get_dataset_directory(root) annos_root = os.path.join(data_dir, 'annotations') img_root = os.path.join(data_dir, 'images') created_img_root = os.path.join(img_root, '{}{}'.format(img_split, year)) img_url = img_urls[year][img_split] if mode == 'instances': anno_url = instances_anno_urls[year][split] anno_path = os.path.join(annos_root, 'instances_{}{}.json'.format(split, year)) elif mode == 'panoptic': anno_url = panoptic_anno_url anno_path = os.path.join(annos_root, 'panoptic_{}{}.json'.format(split, year)) if not os.path.exists(created_img_root): download_file_path = utils.cached_download(img_url) ext = os.path.splitext(img_url)[1] utils.extractall(download_file_path, img_root, ext) if not os.path.exists(anno_path): download_file_path = utils.cached_download(anno_url) ext = os.path.splitext(anno_url)[1] if split in ['train', 'val']: utils.extractall(download_file_path, data_dir, ext) elif split in ['valminusminival', 'minival']: utils.extractall(download_file_path, annos_root, ext) if mode == 'panoptic': pixelmap_path = os.path.join(annos_root, 'panoptic_{}{}'.format(split, year)) if not os.path.exists(pixelmap_path): utils.extractall(pixelmap_path + '.zip', annos_root, '.zip') return data_dir
def _get_tox21_filepath(dataset_type): """Returns a file path in which the tox21 dataset is cached. This function returns a file path in which `dataset_type` of the tox21 dataset is cached. Note that this function does not check if the dataset has actually been downloaded or not. Args: dataset_type(str): Name of the target dataset type. Either 'train', 'val', or 'test'. Returns (str): file path for the tox21 dataset """ if dataset_type not in _config.keys(): raise ValueError("Invalid dataset type '{}'. Accepted values are " "'train', 'val' or 'test'.".format(dataset_type)) c = _config[dataset_type] sdffile = c['filename'] cache_root = download.get_dataset_directory(_root) cache_path = os.path.join(cache_root, sdffile) return cache_path
def _retrieve_cifar_100(): root = download.get_dataset_directory('pfnet/chainer/cifar') path = os.path.join(root, 'cifar-100.npz') url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' def creator(path): def load(archive, file_name): d = _pickle_load(archive.extractfile(file_name)) x = d['data'].reshape((-1, 3072)) y = numpy.array(d['fine_labels'], dtype=numpy.uint8) return x, y archive_path = download.cached_download(url) with tarfile.open(archive_path, 'r:gz') as archive: train_x, train_y = load(archive, 'cifar-100-python/train') test_x, test_y = load(archive, 'cifar-100-python/test') numpy.savez_compressed(path, train_x=train_x, train_y=train_y, test_x=test_x, test_y=test_y) return { 'train_x': train_x, 'train_y': train_y, 'test_x': test_x, 'test_y': test_y } return download.cache_or_load_file(path, creator, numpy.load)
def get_fashion_landmark(): data_dir = download.get_dataset_directory(root) url = 'https://drive.google.com/uc?id=0B7EVK8r0v71pSU9nOXVDMk9WbWM' img_root = os.path.join(data_dir, 'img') anno_root = os.path.join(data_dir, 'Anno') eval_root = os.path.join(data_dir, 'Eval') download_file_path = cached_gdown_download(url) if not os.path.exists(img_root): utils.extractall(download_file_path, data_dir, '.zip') landmark_annotation_url = 'https://drive.google.com/uc?id='\ '0B7EVK8r0v71pZ3pGVFZ0YjZVTjg' download_file_path = cached_gdown_download(landmark_annotation_url) try: os.makedirs(anno_root) except OSError: if not os.path.exists(anno_root): raise shutil.copy(download_file_path, os.path.join(anno_root, 'list_landmarks.txt')) eval_list_url = 'https://drive.google.com/uc?id='\ '0B7EVK8r0v71pakJzTEM0a2Q4Qm8' download_file_path = cached_gdown_download(eval_list_url) try: os.makedirs(eval_root) except OSError: if not os.path.exists(eval_root): raise shutil.copy(download_file_path, os.path.join(eval_root, 'list_eval_partition.txt')) return data_dir
def download_model(url): """Downloads a model file and puts it under model directory. It downloads a file from the URL and puts it under model directory. For exmaple, if :obj:`url` is `http://example.com/subdir/model.npz`, the pretrained weights file will be saved to `$CHAINER_DATASET_ROOT/pfnet/AU_rcnn/models/model.npz`. If there is already a file at the destination path, it just returns the path without downloading the same file. Args: url (str): URL to download from. Returns: str: Path to the downloaded file. """ root = get_dataset_directory( os.path.join('pfnet', 'AU_rcnn', 'models')) basename = os.path.basename(url) path = os.path.join(root, basename) if not os.path.exists(path): cache_path = cached_download(url) os.rename(cache_path, path) return path
def get_ade20k(root, url): data_root = download.get_dataset_directory(root) if os.path.exists(os.path.join(data_root, 'ADEChallengeData2016')): return data_root cache_path = utils.cached_download(url) utils.extractall(cache_path, data_root, os.path.splitext(url)[1]) return data_root
def __init__(self, data_dir='auto', mode='train', year='2012', use_difficult=False, use_cache=False, delete_cache=False): if data_dir == 'auto' and year in voc_utils.urls: data_dir = voc_utils.get_pascal_voc(year) if mode not in ['train', 'trainval', 'val']: warnings.warn( 'please pick mode from \'train\', \'trainval\', \'val\'') id_list_file = os.path.join(data_dir, 'ImageSets/Main/{0}.txt'.format(mode)) self.ids = [id_.strip() for id_ in open(id_list_file)] self.data_dir = data_dir self.use_difficult = use_difficult # cache objects data_root = download.get_dataset_directory(voc_utils.root) pkl_file = os.path.join( data_root, 'detection_objects_{}_{}.pkl'.format(year, mode)) self.objects = cache_load(pkl_file, self._collect_objects, delete_cache, use_cache, args=(self.data_dir, self.ids, self.use_difficult)) self.keys = self.objects.keys()
def _retrieve(name_npz, name_caffemodel, model): root = download.get_dataset_directory('pfnet/chainer/models/') path = os.path.join(root, name_npz) path_caffemodel = os.path.join(root, name_caffemodel) return download.cache_or_load_file( path, lambda path: _make_npz(path, path_caffemodel, model), lambda path: npz.load_npz(path, model))
def _retrieve(n_layers, name_npz, name_caffemodel, model): root = download.get_dataset_directory('pfnet/chainer/models/') path = os.path.join(root, name_npz) path_caffemodel = os.path.join(root, name_caffemodel) return download.cache_or_load_file( path, lambda path: _make_npz(path, path_caffemodel, model, n_layers), lambda path: npz.load_npz(path, model))
def get_coco(split, img_split, data_dir=None): url = img_urls[img_split] if data_dir is None: data_dir = download.get_dataset_directory(root) img_root = os.path.join(data_dir, 'images') created_img_root = os.path.join(img_root, img_split) if 'test' in split: annos_root = data_dir else: annos_root = os.path.join(data_dir, 'annotations') if 'test' in split: anno_prefix = 'image_info' else: anno_prefix = 'instances' anno_fn = os.path.join(annos_root, '{0}_{1}.json'.format(anno_prefix, split)) if not os.path.exists(created_img_root): download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, img_root, ext) if not os.path.exists(anno_fn): anno_url = anno_urls[split] download_file_path = utils.cached_download(anno_url) ext = os.path.splitext(anno_url)[1] utils.extractall(download_file_path, annos_root, ext) return data_dir
def get_coco(split, year, mode='person_keypoints'): if year not in ['2017']: raise ValueError if split not in ['train', 'val']: raise ValueError data_dir = download.get_dataset_directory(root) annos_root = os.path.join(data_dir, 'annotations') img_root = os.path.join(data_dir, 'images') created_img_root = os.path.join(img_root, '{}{}'.format(split, year)) img_url = img_urls[year][split] if mode == 'person_keypoints': anno_url = person_keypoints_anno_urls[year][split] anno_path = os.path.join( annos_root, 'person_keypoints_{}{}.json'.format(split, year)) else: raise ValueError('invalid mode {}'.format(mode)) if not os.path.exists(created_img_root): download_file_path = utils.cached_download(img_url) ext = os.path.splitext(img_url)[1] utils.extractall(download_file_path, img_root, ext) if not os.path.exists(anno_path): download_file_path = utils.cached_download(anno_url) ext = os.path.splitext(anno_url)[1] if split in ['train', 'val']: utils.extractall(download_file_path, data_dir, ext) elif split in ['valminusminival', 'minival']: utils.extractall(download_file_path, annos_root, ext) return data_dir
def get_camvid(): # To support ChainerMN, the target directory should be locked. with filelock.FileLock( os.path.join( download.get_dataset_directory('pfnet/chainercv/.lock'), 'camvid.lock')): data_root = download.get_dataset_directory(root) download_file_path = utils.cached_download(url) if len(glob.glob(os.path.join(data_root, '*'))) != 9: utils.extractall(download_file_path, data_root, os.path.splitext(url)[1]) data_dir = os.path.join(data_root, 'SegNet-Tutorial-master/CamVid') if os.path.exists(data_dir): for fn in glob.glob(os.path.join(data_dir, '*')): shutil.move(fn, os.path.join(data_root, os.path.basename(fn))) shutil.rmtree(os.path.dirname(data_dir)) return data_root
def gen_action_labels(out_fn="epic_kitchen_action_labels.py"): data_dir = download.get_dataset_directory(root) images_root = osp.join(data_dir, "object_detection_images") annos_root = osp.join(data_dir, "annotations-{version}".format(version=version)) anno_fn = "EPIC_verb_classes.csv" gen_labels(osp.join(annos_root, anno_fn), out_fn, "epic_kitchen_action", "verb_id", "class_key")
def get_kaggle(): root = 'pfnet/chainer/pubchem' cache_root = download.get_dataset_directory(root) fname = 'pubchem.h5' cache_path = os.path.join(cache_root, fname) dataset = download.cache_or_load_file(cache_path, creator, loader) N = len(dataset) return D.split_dataset_random(dataset, int(N * 0.75))
def _get_sintel(): data_root = download.get_dataset_directory(root) if os.path.exists(os.path.join(data_root, 'training')): # skip downloading return data_root download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) return data_root
def _get_cifar(name, withlabel, ndim, scale): root = download.get_dataset_directory(os.path.join('pfnet', 'chainer', 'cifar')) npz_path = os.path.join(root, '{}.npz'.format(name)) url = 'https://www.cs.toronto.edu/~kriz/{}-python.tar.gz'.format(name) def creator(path): archive_path = download.cached_download(url) if name == 'cifar-10': train_x = numpy.empty((5, 10000, 3072), dtype=numpy.uint8) train_y = numpy.empty((5, 10000), dtype=numpy.uint8) test_y = numpy.empty(10000, dtype=numpy.uint8) dir_name = '{}-batches-py'.format(name) with tarfile.open(archive_path, 'r:gz') as archive: # training set for i in range(5): file_name = '{}/data_batch_{}'.format(dir_name, i + 1) d = _pickle_load(archive.extractfile(file_name)) train_x[i] = d['data'] train_y[i] = d['labels'] # test set file_name = '{}/test_batch'.format(dir_name) d = _pickle_load(archive.extractfile(file_name)) test_x = d['data'] test_y[...] = d['labels'] # copy to array train_x = train_x.reshape(50000, 3072) train_y = train_y.reshape(50000) else: # name == 'cifar-100' def load(archive, file_name): d = _pickle_load(archive.extractfile(file_name)) x = d['data'].reshape((-1, 3072)) y = numpy.array(d['fine_labels'], dtype=numpy.uint8) return x, y with tarfile.open(archive_path, 'r:gz') as archive: train_x, train_y = load(archive, 'cifar-100-python/train') test_x, test_y = load(archive, 'cifar-100-python/test') numpy.savez_compressed(path, train_x=train_x, train_y=train_y, test_x=test_x, test_y=test_y) return {'train_x': train_x, 'train_y': train_y, 'test_x': test_x, 'test_y': test_y} raw = download.cache_or_load_file(npz_path, creator, numpy.load) train = _preprocess_cifar(raw['train_x'], raw['train_y'], withlabel, ndim, scale) test = _preprocess_cifar(raw['test_x'], raw['test_y'], withlabel, ndim, scale) return train, test
def _get_zinc250k_filepath(): """Construct a filepath which stores ZINC250k dataset in csv This method does not check if the file is already downloaded or not. Returns (str): filepath for ZINC250k dataset """ cache_root = download.get_dataset_directory(_root) cache_path = os.path.join(cache_root, file_name_250k) return cache_path
def get_cub(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'CUB_200_2011') if os.path.exists(base_path): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) return base_path
def _get_online_products(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'Stanford_Online_Products') if os.path.exists(base_path): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) return base_path
def get_cub_mask(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'segmentations') if os.path.exists(base_path): # skip downloading return base_path download_file_path_mask = utils.cached_download(mask_url) ext_mask = os.path.splitext(mask_url)[1] utils.extractall(download_file_path_mask, data_root, ext_mask) return base_path
def get_cub_prob_map(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'segmentations') if os.path.exists(base_path): # skip downloading return base_path prob_map_download_file_path = utils.cached_download(prob_map_url) prob_map_ext = os.path.splitext(prob_map_url)[1] utils.extractall( prob_map_download_file_path, data_root, prob_map_ext) return base_path
def get_camvid(): data_root = download.get_dataset_directory(root) download_file_path = utils.cached_download(url) if len(glob.glob(os.path.join(data_root, '*'))) != 9: utils.extractall( download_file_path, data_root, os.path.splitext(url)[1]) data_dir = os.path.join(data_root, 'SegNet-Tutorial-master/CamVid') if os.path.exists(data_dir): for fn in glob.glob(os.path.join(data_dir, '*')): shutil.move(fn, os.path.join(data_root, os.path.basename(fn))) shutil.rmtree(os.path.dirname(data_dir)) return data_root
def _get_molnet_filepath(file_name): """Construct a filepath which stores MoleculeNet dataset in csv This method does not check if the file is already downloaded or not. Args: file_name (str): file name of MoleculeNet dataset Returns (str): filepath for one of MoleculeNet dataset """ cache_root = download.get_dataset_directory(_root) cache_path = os.path.join(cache_root, file_name) return cache_path
def _get_pdbbind_time_filepath(file_name): """Construct a filepath which stores year table in csv. This method does not check if the file is already downloaded or not. Args: file_name(str): file name of year table Returns(str): filepath for one of year table """ cache_root = download.get_dataset_directory('pfnet/chainer/molnet') cache_path = os.path.join(cache_root, file_name) return cache_path
def _retrieve_ptb_words(name, url): def creator(path): vocab = _retrieve_word_vocabulary() words = _load_words(url) x = numpy.empty(len(words), dtype=numpy.int32) for i, word in enumerate(words): x[i] = vocab[word] numpy.savez_compressed(path, x=x) return {'x': x} root = download.get_dataset_directory('pfnet/chainer/ptb') path = os.path.join(root, name) loaded = download.cache_or_load_file(path, creator, numpy.load) return loaded['x']
def get_sbd(): data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'benchmark_RELEASE/dataset') train_voc2012_file = os.path.join(base_path, 'train_voc2012.txt') if os.path.exists(train_voc2012_file): # skip downloading return base_path download_file_path = utils.cached_download(url) ext = os.path.splitext(url)[1] utils.extractall(download_file_path, data_root, ext) six.moves.urllib.request.urlretrieve(train_voc2012_url, train_voc2012_file) _generate_voc2012_txt(base_path) return base_path
def __init__(self, data_dir='auto'): if data_dir == 'auto': data_dir = download.get_dataset_directory( 'pfnet/chainercv/cityscapes') img_dir = os.path.join(data_dir, os.path.join('leftImg8bit', 'test')) if not os.path.exists(img_dir): raise ValueError( 'Cityscapes dataset does not exist at the expected location.' 'Please download it from https://www.cityscapes-dataset.com/.' 'Then place directory leftImg8bit at {}.'.format( os.path.join(data_dir, 'leftImg8bit'))) self.img_paths = [] for city_dname in sorted(glob.glob(os.path.join(img_dir, '*'))): for img_path in sorted(glob.glob( os.path.join(city_dname, '*_leftImg8bit.png'))): self.img_paths.append(img_path)
def get_voc(year, split): if year not in urls: raise ValueError key = year if split == 'test' and year == '2007': key = '2007_test' data_root = download.get_dataset_directory(root) base_path = os.path.join(data_root, 'VOCdevkit/VOC{}'.format(year)) split_file = os.path.join(base_path, 'ImageSets/Main/{}.txt'.format(split)) if os.path.exists(split_file): # skip downloading return base_path download_file_path = utils.cached_download(urls[key]) ext = os.path.splitext(urls[key])[1] utils.extractall(download_file_path, data_root, ext) return base_path
def __init__(self, n_layers): root = download.get_dataset_directory('pfnet/chainer/models/') caffemodel_path = os.path.join( root, 'ResNet-{}-model.caffemodel'.format(n_layers)) if not os.path.exists(caffemodel_path): if n_layers == 50: cache_path = download.cached_download(self.URLS['resnet50']) elif n_layers == 101: cache_path = download.cached_download(self.URLS['resnet101']) elif n_layers == 152: cache_path = download.cached_download(self.URLS['resnet152']) shutil.move(cache_path, caffemodel_path) super(ResNet, self).__init__( os.path.basename(caffemodel_path), n_layers=n_layers) self._children.remove('fc6') del self.fc6 del self.functions['fc6'] del self.functions['prob'] self.train = True
def _retrieve_cifar(name): root = download.get_dataset_directory('pfnet/chainer/cifar') path = os.path.join(root, '{}.npz'.format(name)) url = 'https://www.cs.toronto.edu/~kriz/{}-python.tar.gz'.format(name) def creator(path): archive_path = download.cached_download(url) train_x = numpy.empty((5, 10000, 3072), dtype=numpy.uint8) train_y = numpy.empty((5, 10000), dtype=numpy.uint8) test_y = numpy.empty(10000, dtype=numpy.uint8) dir_name = '{}-batches-py'.format(name) with tarfile.open(archive_path, 'r:gz') as archive: # training set for i in range(5): file_name = '{}/data_batch_{}'.format(dir_name, i + 1) d = pickle.load(archive.extractfile(file_name)) train_x[i] = d['data'] train_y[i] = d['labels'] # test set file_name = '{}/test_batch'.format(dir_name) d = pickle.load(archive.extractfile(file_name)) test_x = d['data'] test_y[...] = d['labels'] # copy to array train_x = train_x.reshape(50000, 3072) train_y = train_y.reshape(50000) numpy.savez_compressed(path, train_x=train_x, train_y=train_y, test_x=test_x, test_y=test_y) return {'train_x': train_x, 'train_y': train_y, 'test_x': test_x, 'test_y': test_y} return download.cache_or_load_file(path, creator, numpy.load)
def _retrieve_cifar_100(): root = download.get_dataset_directory('pfnet/chainer/cifar') path = os.path.join(root, 'cifar-100.npz') url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' def creator(path): def load(archive, file_name): d = _pickle_load(archive.extractfile(file_name)) x = d['data'].reshape((-1, 3072)) y = numpy.array(d['fine_labels'], dtype=numpy.uint8) return x, y archive_path = download.cached_download(url) with tarfile.open(archive_path, 'r:gz') as archive: train_x, train_y = load(archive, 'cifar-100-python/train') test_x, test_y = load(archive, 'cifar-100-python/test') numpy.savez_compressed(path, train_x=train_x, train_y=train_y, test_x=test_x, test_y=test_y) return {'train_x': train_x, 'train_y': train_y, 'test_x': test_x, 'test_y': test_y} return download.cache_or_load_file(path, creator, numpy.load)
def _retrieve_svhn(name, url): root = download.get_dataset_directory('pfnet/chainer/svhn') path = os.path.join(root, name) return download.cache_or_load_file( path, lambda path: _make_npz(path, url), numpy.load)
def setUp(self): self.root = download.get_dataset_directory( os.path.join('pfnet', 'chainer', 'cifar'))
def _retrieve_fashion_mnist(name, urls): root = download.get_dataset_directory('pfnet/chainer/fashion-mnist') path = os.path.join(root, name) return download.cache_or_load_file( path, lambda path: make_npz(path, urls), numpy.load)
def get_ade20k(root, url): data_root = download.get_dataset_directory(root) cache_path = utils.cached_download(url) utils.extractall(cache_path, data_root, os.path.splitext(url)[1]) return data_root