Esempio n. 1
0
 def setUp(self):
     self.mnist_root = download.get_dataset_directory(
         os.path.join('pfnet', 'chainer', 'mnist'))
     self.kuzushiji_mnist_root = download.get_dataset_directory(
         os.path.join('pfnet', 'chainer', 'kuzushiji_mnist'))
     self.fashion_mnist_root = download.get_dataset_directory(
         os.path.join('pfnet', 'chainer', 'fashion-mnist'))
Esempio n. 2
0
def download_and_store_model(alg, url, env, model_type):
    """Downloads a model file and puts it under model directory.

    It downloads a file from the URL and puts it under model directory.
    If there is already a file at the destination path,
    it just returns the path without downloading the same file.
    Args:
        alg (string): String representation of algorithm used in MODELS dict.
        url (string): URL to download from.
        env (string): Environment in which pretrained model was trained.
        model_type (string): Either `best` or `final`.
    Returns:
        string: Path to the downloaded file.
        bool: whether the model was alredy cached.
    """
    with filelock.FileLock(
            os.path.join(
                get_dataset_directory(
                    os.path.join('pfnet', 'chainerrl', '.lock')),
                'models.lock')):
        root = get_dataset_directory(
            os.path.join('pfnet', 'chainerrl', 'models', alg, env))
        url_basepath = os.path.join(url, alg, env)
        file = model_type + ".zip"
        path = os.path.join(root, file)
        is_cached = os.path.exists(path)
        if not is_cached:
            cache_path = cached_download(os.path.join(url_basepath, file))
            os.rename(cache_path, path)
            with zipfile.ZipFile(path, 'r') as zip_ref:
                zip_ref.extractall(root)
        return os.path.join(root, model_type), is_cached
Esempio n. 3
0
 def setUp(self):
     self.mnist_root = download.get_dataset_directory(
         os.path.join('pfnet', 'chainer', 'mnist'))
     self.kuzushiji_mnist_root = download.get_dataset_directory(
         os.path.join('pfnet', 'chainer', 'kuzushiji_mnist'))
     self.fashion_mnist_root = download.get_dataset_directory(
         os.path.join('pfnet', 'chainer', 'fashion-mnist'))
Esempio n. 4
0
def get_voc(year, split):
    if year not in urls:
        raise ValueError
    key = year

    if split == 'test' and year == '2007':
        key = '2007_test'

    # To support ChainerMN, the target directory should be locked.
    with filelock.FileLock(
            os.path.join(
                download.get_dataset_directory('pfnet/chainercv/.lock'),
                'voc.lock')):
        data_root = download.get_dataset_directory(root)
        base_path = os.path.join(data_root, 'VOCdevkit/VOC{}'.format(year))
        split_file = os.path.join(base_path,
                                  'ImageSets/Main/{}.txt'.format(split))
        if os.path.exists(split_file):
            # skip downloading
            return base_path

        download_file_path = utils.cached_download(urls[key])
        ext = os.path.splitext(urls[key])[1]
        utils.extractall(download_file_path, data_root, ext)
    return base_path
Esempio n. 5
0
def download_model(url):
    """Downloads a model file and puts it under model directory.

    It downloads a file from the URL and puts it under model directory.
    For exmaple, if :obj:`url` is `http://example.com/subdir/model.npz`,
    the pretrained weights file will be saved to
    `$CHAINER_DATASET_ROOT/pfnet/chainercv/models/model.npz`.
    If there is already a file at the destination path,
    it just returns the path without downloading the same file.

    Args:
        url (string): URL to download from.

    Returns:
        string: Path to the downloaded file.

    """
    # To support ChainerMN, the target directory should be locked.
    with filelock.FileLock(
            os.path.join(
                get_dataset_directory(
                    os.path.join('pfnet', 'chainercv', '.lock')),
                'models.lock')):
        root = get_dataset_directory(
            os.path.join('pfnet', 'chainercv', 'models'))
        basename = os.path.basename(url)
        path = os.path.join(root, basename)
        if not os.path.exists(path):
            cache_path = cached_download(url)
            os.rename(cache_path, path)
        return path
Esempio n. 6
0
def get_ade20k(root, url):
    # To support ChainerMN, the target directory should be locked.
    with filelock.FileLock(
            os.path.join(
                download.get_dataset_directory('pfnet/chainercv/.lock'),
                'ade20k.lock')):
        data_root = download.get_dataset_directory(root)
        if os.path.exists(os.path.join(data_root, 'ADEChallengeData2016')):
            return data_root
        cache_path = utils.cached_download(url)
        utils.extractall(cache_path, data_root, os.path.splitext(url)[1])
    return data_root
Esempio n. 7
0
def get_cub():
    # To support ChainerMN, the target directory should be locked.
    with filelock.FileLock(os.path.join(download.get_dataset_directory(
            'pfnet/chainercv/.lock'), 'cub.lock')):
        data_root = download.get_dataset_directory(root)
        base_path = os.path.join(data_root, 'CUB_200_2011')
        if os.path.exists(base_path):
            # skip downloading
            return base_path

        download_file_path = utils.cached_download(url)
        ext = os.path.splitext(url)[1]
        utils.extractall(download_file_path, data_root, ext)
    return base_path
Esempio n. 8
0
def get_cub_prob_map():
    # To support ChainerMN, the target directory should be locked.
    with filelock.FileLock(os.path.join(download.get_dataset_directory(
            'pfnet/chainercv/.lock'), 'cub.lock')):
        data_root = download.get_dataset_directory(root)
        base_path = os.path.join(data_root, 'segmentations')
        if os.path.exists(base_path):
            # skip downloading
            return base_path

        prob_map_download_file_path = utils.cached_download(prob_map_url)
        prob_map_ext = os.path.splitext(prob_map_url)[1]
        utils.extractall(
            prob_map_download_file_path, data_root, prob_map_ext)
    return base_path
Esempio n. 9
0
def get_sequences(split, map_name):
    if split == 'train':
        splits = ['train']
    elif split == 'val':
        splits = ['test']
    elif split == 'trainval':
        if map_name in ['c2', 'c3', 'c4']:
            splits = ['train', 'test']
        else:
            splits = ['all']
    else:
        raise ValueError

    seq_map = []
    data_root = download.get_dataset_directory(root)
    seq_path = os.path.join(
        data_root, 'motchallenge-devkit/motchallenge/seqmaps')
    for sp in splits:
        seqmap_path = os.path.join(
            seq_path, '{0}-{1}.txt'.format(map_name, sp))
        with open(seqmap_path, 'r') as f:
            seq_m = f.read().split('\n')
        seq_map.extend(seq_m[1:-1])
    if map_name == 'c9':
        seq_map = ['{}-DPM'.format(x) for x in seq_map]
    return seq_map
Esempio n. 10
0
def _retrieve_word_vocabulary():
    def creator(path):
        words = _load_words(_train_url)
        vocab = {}
        index = 0
        with open(path, 'w') as f:
            for word in words:
                if word not in vocab:
                    vocab[word] = index
                    index += 1
                    f.write(word + '\n')

        return vocab

    def loader(path):
        vocab = {}
        with open(path) as f:
            for i, word in enumerate(f):
                vocab[word.strip()] = i
        return vocab

    root = download.get_dataset_directory('txt')
    path = os.path.join(root, 'vocab.txt')
    print root
    return download.cache_or_load_file(path, creator, loader)
Esempio n. 11
0
def get_facade():
    root = download.get_dataset_directory('study_chainer/facade')
    npz_path = os.path.join(root, 'base.npz')
    url = 'http://cmp.felk.cvut.cz/~tylecr1/facade/CMP_facade_DB_base.zip'

    def creator(path):
        archive_path = download.cached_download(url)

        images = []
        labels = []

        with zipfile.ZipFile(archive_path, 'r') as archive:
            for i in range(1, 378 + 1):
                image_name = 'base/cmp_b{:04d}.jpg'.format(i)
                label_name = 'base/cmp_b{:04d}.png'.format(i)

                image = Image.open(io.BytesIO(archive.read(image_name)))
                image = np.asarray(image)
                images.append(image)
                label = Image.open(io.BytesIO(archive.read(label_name)))
                label = np.asarray(label)
                labels.append(label)

        np.savez_compressed(path, images=images, labels=labels)
        return {'images': images, 'labels': labels}

    raw = download.cache_or_load_file(npz_path, creator, np.load)
    return raw['images'], raw['labels']
Esempio n. 12
0
def get_sbd(data_dir=None):
    if data_dir is None:
        data_dir = download.get_dataset_directory(root)
    label_dir = osp.join(data_dir, 'fcis_label')
    if not osp.exists(label_dir):
        os.makedirs(label_dir)

    fcn.data.cached_download(
        url=val_url,
        path=osp.join(label_dir, 'val.txt'),
        md5='905db61182fcaaf6b981af6ae6dd7ff2'
    )
    fcn.data.cached_download(
        url=train_url,
        path=osp.join(label_dir, 'train.txt'),
        md5='79bff800c5f0b1ec6b21080a3c066722'
    )

    base_path = osp.join(data_dir, 'benchmark_RELEASE/dataset')
    if osp.exists(base_path):
        return base_path

    download_file_path = utils.cached_download(url)
    ext = osp.splitext(url)[1]
    utils.extractall(download_file_path, data_dir, ext)

    return base_path
Esempio n. 13
0
def _check_pretrained_model(n_fg_class, pretrained_model, models):
    if pretrained_model in models:
        model = models[pretrained_model]
        if n_fg_class and not n_fg_class == model['n_fg_class']:
            raise ValueError('n_fg_class mismatch')
        n_fg_class = model['n_fg_class']

        root = get_dataset_directory('pfnet/chainercv/models')
        basename = os.path.basename(model['url'])
        path = os.path.join(root, basename)
        if not os.path.exists(path):
            download_file = download.cached_download(model['url'])
            os.rename(download_file, path)

        if not _available:
            warnings.warn(
                'cv2 is not installed on your environment. '
                'Pretrained models are trained with cv2. '
                'The performace may change with Pillow backend.',
                RuntimeWarning)
    elif pretrained_model:
        path = pretrained_model
    else:
        path = None

    return n_fg_class, path
Esempio n. 14
0
def _retrieve_emnist(name, archives):
    # the path to store the cached file to
    root = download.get_dataset_directory('pfnet/chainer/emnist')
    path = os.path.join(root, name)
    return download.cache_or_load_file(path,
                                       lambda path: _make_npz(path, archives),
                                       numpy.load)
Esempio n. 15
0
    def __init__(self, data_dir=None, label_resolution=None, split='train',
                 ignore_labels=True):
        if data_dir is None:
            data_dir = download.get_dataset_directory(
                'pfnet/chainercv/cityscapes')

        img_dir = os.path.join(data_dir, os.path.join('leftImg8bit', split))
        resol = label_resolution
        label_dir = os.path.join(data_dir, resol)
        if not os.path.exists(img_dir) or not os.path.exists(label_dir):
            raise ValueError(
                'Cityscapes dataset does not exist at the expected location.'
                'Please download it from https://www.cityscapes-dataset.com/.'
                'Then place directory leftImg8bit at {} and {} at {}.'.format(
                    os.path.join(data_dir, 'leftImg8bit'), resol, label_dir))

        self.ignore_labels = ignore_labels

        self.label_paths = list()
        self.img_paths = list()
        city_dnames = list()
        for dname in glob.glob(os.path.join(label_dir, '*')):
            if split in dname:
                for city_dname in glob.glob(os.path.join(dname, '*')):
                    for label_path in glob.glob(
                            os.path.join(city_dname, '*_labelIds.png')):
                        self.label_paths.append(label_path)
                        city_dnames.append(os.path.basename(city_dname))
        for city_dname, label_path in zip(city_dnames, self.label_paths):
            label_path = os.path.basename(label_path)
            img_path = label_path.replace(
                '{}_labelIds'.format(resol), 'leftImg8bit')
            img_path = os.path.join(img_dir, city_dname, img_path)
            self.img_paths.append(img_path)
Esempio n. 16
0
def get_coco(split, img_split, year, mode):
    data_dir = download.get_dataset_directory(root)
    annos_root = os.path.join(data_dir, 'annotations')
    img_root = os.path.join(data_dir, 'images')
    created_img_root = os.path.join(img_root, '{}{}'.format(img_split, year))
    img_url = img_urls[year][img_split]
    if mode == 'instances':
        anno_url = instances_anno_urls[year][split]
        anno_path = os.path.join(annos_root,
                                 'instances_{}{}.json'.format(split, year))
    elif mode == 'panoptic':
        anno_url = panoptic_anno_url
        anno_path = os.path.join(annos_root,
                                 'panoptic_{}{}.json'.format(split, year))

    if not os.path.exists(created_img_root):
        download_file_path = utils.cached_download(img_url)
        ext = os.path.splitext(img_url)[1]
        utils.extractall(download_file_path, img_root, ext)
    if not os.path.exists(anno_path):
        download_file_path = utils.cached_download(anno_url)
        ext = os.path.splitext(anno_url)[1]
        if split in ['train', 'val']:
            utils.extractall(download_file_path, data_dir, ext)
        elif split in ['valminusminival', 'minival']:
            utils.extractall(download_file_path, annos_root, ext)

    if mode == 'panoptic':
        pixelmap_path = os.path.join(annos_root,
                                     'panoptic_{}{}'.format(split, year))
        if not os.path.exists(pixelmap_path):
            utils.extractall(pixelmap_path + '.zip', annos_root, '.zip')
    return data_dir
Esempio n. 17
0
def _get_tox21_filepath(dataset_type):
    """Returns a file path in which the tox21 dataset is cached.

    This function returns a file path in which `dataset_type`
    of the tox21 dataset is cached.
    Note that this function does not check if the dataset has actually
    been downloaded or not.

    Args:
        dataset_type(str): Name of the target dataset type.
            Either 'train', 'val', or 'test'.

    Returns (str): file path for the tox21 dataset

    """
    if dataset_type not in _config.keys():
        raise ValueError("Invalid dataset type '{}'. Accepted values are "
                         "'train', 'val' or 'test'.".format(dataset_type))

    c = _config[dataset_type]
    sdffile = c['filename']

    cache_root = download.get_dataset_directory(_root)
    cache_path = os.path.join(cache_root, sdffile)
    return cache_path
Esempio n. 18
0
def _retrieve_cifar_100():
    root = download.get_dataset_directory('pfnet/chainer/cifar')
    path = os.path.join(root, 'cifar-100.npz')
    url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'

    def creator(path):
        def load(archive, file_name):
            d = _pickle_load(archive.extractfile(file_name))
            x = d['data'].reshape((-1, 3072))
            y = numpy.array(d['fine_labels'], dtype=numpy.uint8)
            return x, y

        archive_path = download.cached_download(url)
        with tarfile.open(archive_path, 'r:gz') as archive:
            train_x, train_y = load(archive, 'cifar-100-python/train')
            test_x, test_y = load(archive, 'cifar-100-python/test')

        numpy.savez_compressed(path,
                               train_x=train_x,
                               train_y=train_y,
                               test_x=test_x,
                               test_y=test_y)
        return {
            'train_x': train_x,
            'train_y': train_y,
            'test_x': test_x,
            'test_y': test_y
        }

    return download.cache_or_load_file(path, creator, numpy.load)
def get_fashion_landmark():
    data_dir = download.get_dataset_directory(root)
    url = 'https://drive.google.com/uc?id=0B7EVK8r0v71pSU9nOXVDMk9WbWM'
    img_root = os.path.join(data_dir, 'img')
    anno_root = os.path.join(data_dir, 'Anno')
    eval_root = os.path.join(data_dir, 'Eval')
    download_file_path = cached_gdown_download(url)
    if not os.path.exists(img_root):
        utils.extractall(download_file_path, data_dir, '.zip')

    landmark_annotation_url = 'https://drive.google.com/uc?id='\
        '0B7EVK8r0v71pZ3pGVFZ0YjZVTjg'
    download_file_path = cached_gdown_download(landmark_annotation_url)
    try:
        os.makedirs(anno_root)
    except OSError:
        if not os.path.exists(anno_root):
            raise
    shutil.copy(download_file_path,
                os.path.join(anno_root, 'list_landmarks.txt'))

    eval_list_url = 'https://drive.google.com/uc?id='\
        '0B7EVK8r0v71pakJzTEM0a2Q4Qm8'
    download_file_path = cached_gdown_download(eval_list_url)
    try:
        os.makedirs(eval_root)
    except OSError:
        if not os.path.exists(eval_root):
            raise
    shutil.copy(download_file_path,
                os.path.join(eval_root, 'list_eval_partition.txt'))
    return data_dir
Esempio n. 20
0
def download_model(url):
    """Downloads a model file and puts it under model directory.

    It downloads a file from the URL and puts it under model directory.
    For exmaple, if :obj:`url` is `http://example.com/subdir/model.npz`,
    the pretrained weights file will be saved to
    `$CHAINER_DATASET_ROOT/pfnet/AU_rcnn/models/model.npz`.
    If there is already a file at the destination path,
    it just returns the path without downloading the same file.

    Args:
        url (str): URL to download from.

    Returns:
        str: Path to the downloaded file.

    """
    root = get_dataset_directory(
        os.path.join('pfnet', 'AU_rcnn', 'models'))
    basename = os.path.basename(url)
    path = os.path.join(root, basename)
    if not os.path.exists(path):
        cache_path = cached_download(url)
        os.rename(cache_path, path)
    return path
Esempio n. 21
0
def get_ade20k(root, url):
    data_root = download.get_dataset_directory(root)
    if os.path.exists(os.path.join(data_root, 'ADEChallengeData2016')):
        return data_root
    cache_path = utils.cached_download(url)
    utils.extractall(cache_path, data_root, os.path.splitext(url)[1])
    return data_root
Esempio n. 22
0
    def __init__(self,
                 data_dir='auto',
                 mode='train',
                 year='2012',
                 use_difficult=False,
                 use_cache=False,
                 delete_cache=False):
        if data_dir == 'auto' and year in voc_utils.urls:
            data_dir = voc_utils.get_pascal_voc(year)

        if mode not in ['train', 'trainval', 'val']:
            warnings.warn(
                'please pick mode from \'train\', \'trainval\', \'val\'')

        id_list_file = os.path.join(data_dir,
                                    'ImageSets/Main/{0}.txt'.format(mode))

        self.ids = [id_.strip() for id_ in open(id_list_file)]

        self.data_dir = data_dir
        self.use_difficult = use_difficult

        # cache objects
        data_root = download.get_dataset_directory(voc_utils.root)

        pkl_file = os.path.join(
            data_root, 'detection_objects_{}_{}.pkl'.format(year, mode))
        self.objects = cache_load(pkl_file,
                                  self._collect_objects,
                                  delete_cache,
                                  use_cache,
                                  args=(self.data_dir, self.ids,
                                        self.use_difficult))
        self.keys = self.objects.keys()
Esempio n. 23
0
def _retrieve(name_npz, name_caffemodel, model):
    root = download.get_dataset_directory('pfnet/chainer/models/')
    path = os.path.join(root, name_npz)
    path_caffemodel = os.path.join(root, name_caffemodel)
    return download.cache_or_load_file(
        path, lambda path: _make_npz(path, path_caffemodel, model),
        lambda path: npz.load_npz(path, model))
Esempio n. 24
0
def _retrieve(n_layers, name_npz, name_caffemodel, model):
    root = download.get_dataset_directory('pfnet/chainer/models/')
    path = os.path.join(root, name_npz)
    path_caffemodel = os.path.join(root, name_caffemodel)
    return download.cache_or_load_file(
        path, lambda path: _make_npz(path, path_caffemodel, model, n_layers),
        lambda path: npz.load_npz(path, model))
Esempio n. 25
0
def get_coco(split, img_split, data_dir=None):
    url = img_urls[img_split]
    if data_dir is None:
        data_dir = download.get_dataset_directory(root)
    img_root = os.path.join(data_dir, 'images')
    created_img_root = os.path.join(img_root, img_split)
    if 'test' in split:
        annos_root = data_dir
    else:
        annos_root = os.path.join(data_dir, 'annotations')
    if 'test' in split:
        anno_prefix = 'image_info'
    else:
        anno_prefix = 'instances'
    anno_fn = os.path.join(annos_root,
                           '{0}_{1}.json'.format(anno_prefix, split))
    if not os.path.exists(created_img_root):
        download_file_path = utils.cached_download(url)
        ext = os.path.splitext(url)[1]
        utils.extractall(download_file_path, img_root, ext)
    if not os.path.exists(anno_fn):
        anno_url = anno_urls[split]
        download_file_path = utils.cached_download(anno_url)
        ext = os.path.splitext(anno_url)[1]
        utils.extractall(download_file_path, annos_root, ext)
    return data_dir
Esempio n. 26
0
def get_coco(split, year, mode='person_keypoints'):
    if year not in ['2017']:
        raise ValueError
    if split not in ['train', 'val']:
        raise ValueError
    data_dir = download.get_dataset_directory(root)
    annos_root = os.path.join(data_dir, 'annotations')
    img_root = os.path.join(data_dir, 'images')
    created_img_root = os.path.join(img_root, '{}{}'.format(split, year))
    img_url = img_urls[year][split]

    if mode == 'person_keypoints':
        anno_url = person_keypoints_anno_urls[year][split]
        anno_path = os.path.join(
            annos_root, 'person_keypoints_{}{}.json'.format(split, year))
    else:
        raise ValueError('invalid mode {}'.format(mode))

    if not os.path.exists(created_img_root):
        download_file_path = utils.cached_download(img_url)
        ext = os.path.splitext(img_url)[1]
        utils.extractall(download_file_path, img_root, ext)
    if not os.path.exists(anno_path):
        download_file_path = utils.cached_download(anno_url)
        ext = os.path.splitext(anno_url)[1]
        if split in ['train', 'val']:
            utils.extractall(download_file_path, data_dir, ext)
        elif split in ['valminusminival', 'minival']:
            utils.extractall(download_file_path, annos_root, ext)
    return data_dir
Esempio n. 27
0
def get_camvid():
    # To support ChainerMN, the target directory should be locked.
    with filelock.FileLock(
            os.path.join(
                download.get_dataset_directory('pfnet/chainercv/.lock'),
                'camvid.lock')):
        data_root = download.get_dataset_directory(root)
        download_file_path = utils.cached_download(url)
        if len(glob.glob(os.path.join(data_root, '*'))) != 9:
            utils.extractall(download_file_path, data_root,
                             os.path.splitext(url)[1])
        data_dir = os.path.join(data_root, 'SegNet-Tutorial-master/CamVid')
        if os.path.exists(data_dir):
            for fn in glob.glob(os.path.join(data_dir, '*')):
                shutil.move(fn, os.path.join(data_root, os.path.basename(fn)))
            shutil.rmtree(os.path.dirname(data_dir))
    return data_root
Esempio n. 28
0
def gen_action_labels(out_fn="epic_kitchen_action_labels.py"):
    data_dir = download.get_dataset_directory(root)
    images_root = osp.join(data_dir, "object_detection_images")
    annos_root = osp.join(data_dir,
                          "annotations-{version}".format(version=version))
    anno_fn = "EPIC_verb_classes.csv"

    gen_labels(osp.join(annos_root, anno_fn), out_fn, "epic_kitchen_action",
               "verb_id", "class_key")
Esempio n. 29
0
def get_kaggle():
    root = 'pfnet/chainer/pubchem'
    cache_root = download.get_dataset_directory(root)
    fname = 'pubchem.h5'
    cache_path = os.path.join(cache_root, fname)

    dataset = download.cache_or_load_file(cache_path, creator, loader)
    N = len(dataset)
    return D.split_dataset_random(dataset, int(N * 0.75))
Esempio n. 30
0
def _get_sintel():
    data_root = download.get_dataset_directory(root)
    if os.path.exists(os.path.join(data_root, 'training')):
        # skip downloading
        return data_root

    download_file_path = utils.cached_download(url)
    ext = os.path.splitext(url)[1]
    utils.extractall(download_file_path, data_root, ext)
    return data_root
Esempio n. 31
0
def _get_cifar(name, withlabel, ndim, scale):
    root = download.get_dataset_directory(os.path.join('pfnet', 'chainer',
                                                       'cifar'))
    npz_path = os.path.join(root, '{}.npz'.format(name))
    url = 'https://www.cs.toronto.edu/~kriz/{}-python.tar.gz'.format(name)

    def creator(path):
        archive_path = download.cached_download(url)

        if name == 'cifar-10':
            train_x = numpy.empty((5, 10000, 3072), dtype=numpy.uint8)
            train_y = numpy.empty((5, 10000), dtype=numpy.uint8)
            test_y = numpy.empty(10000, dtype=numpy.uint8)

            dir_name = '{}-batches-py'.format(name)

            with tarfile.open(archive_path, 'r:gz') as archive:
                # training set
                for i in range(5):
                    file_name = '{}/data_batch_{}'.format(dir_name, i + 1)
                    d = _pickle_load(archive.extractfile(file_name))
                    train_x[i] = d['data']
                    train_y[i] = d['labels']

                # test set
                file_name = '{}/test_batch'.format(dir_name)
                d = _pickle_load(archive.extractfile(file_name))
                test_x = d['data']
                test_y[...] = d['labels']  # copy to array

            train_x = train_x.reshape(50000, 3072)
            train_y = train_y.reshape(50000)
        else:
            # name == 'cifar-100'
            def load(archive, file_name):
                d = _pickle_load(archive.extractfile(file_name))
                x = d['data'].reshape((-1, 3072))
                y = numpy.array(d['fine_labels'], dtype=numpy.uint8)
                return x, y

            with tarfile.open(archive_path, 'r:gz') as archive:
                train_x, train_y = load(archive, 'cifar-100-python/train')
                test_x, test_y = load(archive, 'cifar-100-python/test')

        numpy.savez_compressed(path, train_x=train_x, train_y=train_y,
                               test_x=test_x, test_y=test_y)
        return {'train_x': train_x, 'train_y': train_y,
                'test_x': test_x, 'test_y': test_y}

    raw = download.cache_or_load_file(npz_path, creator, numpy.load)
    train = _preprocess_cifar(raw['train_x'], raw['train_y'], withlabel,
                              ndim, scale)
    test = _preprocess_cifar(raw['test_x'], raw['test_y'], withlabel, ndim,
                             scale)
    return train, test
Esempio n. 32
0
def _get_zinc250k_filepath():
    """Construct a filepath which stores ZINC250k dataset in csv

    This method does not check if the file is already downloaded or not.

    Returns (str): filepath for ZINC250k dataset

    """
    cache_root = download.get_dataset_directory(_root)
    cache_path = os.path.join(cache_root, file_name_250k)
    return cache_path
Esempio n. 33
0
def get_cub():
    data_root = download.get_dataset_directory(root)
    base_path = os.path.join(data_root, 'CUB_200_2011')
    if os.path.exists(base_path):
        # skip downloading
        return base_path

    download_file_path = utils.cached_download(url)
    ext = os.path.splitext(url)[1]
    utils.extractall(download_file_path, data_root, ext)
    return base_path
def _get_online_products():
    data_root = download.get_dataset_directory(root)
    base_path = os.path.join(data_root, 'Stanford_Online_Products')
    if os.path.exists(base_path):
        # skip downloading
        return base_path

    download_file_path = utils.cached_download(url)
    ext = os.path.splitext(url)[1]
    utils.extractall(download_file_path, data_root, ext)
    return base_path
Esempio n. 35
0
def get_cub():
    data_root = download.get_dataset_directory(root)
    base_path = os.path.join(data_root, 'CUB_200_2011')
    if os.path.exists(base_path):
        # skip downloading
        return base_path

    download_file_path = utils.cached_download(url)
    ext = os.path.splitext(url)[1]
    utils.extractall(download_file_path, data_root, ext)
    return base_path
Esempio n. 36
0
def get_cub_mask():
    data_root = download.get_dataset_directory(root)
    base_path = os.path.join(data_root, 'segmentations')
    if os.path.exists(base_path):
        # skip downloading
        return base_path

    download_file_path_mask = utils.cached_download(mask_url)
    ext_mask = os.path.splitext(mask_url)[1]
    utils.extractall(download_file_path_mask, data_root, ext_mask)
    return base_path
Esempio n. 37
0
def _get_online_products():
    data_root = download.get_dataset_directory(root)
    base_path = os.path.join(data_root, 'Stanford_Online_Products')
    if os.path.exists(base_path):
        # skip downloading
        return base_path

    download_file_path = utils.cached_download(url)
    ext = os.path.splitext(url)[1]
    utils.extractall(download_file_path, data_root, ext)
    return base_path
Esempio n. 38
0
def get_cub_prob_map():
    data_root = download.get_dataset_directory(root)
    base_path = os.path.join(data_root, 'segmentations')
    if os.path.exists(base_path):
        # skip downloading
        return base_path

    prob_map_download_file_path = utils.cached_download(prob_map_url)
    prob_map_ext = os.path.splitext(prob_map_url)[1]
    utils.extractall(
        prob_map_download_file_path, data_root, prob_map_ext)
    return base_path
Esempio n. 39
0
def get_camvid():
    data_root = download.get_dataset_directory(root)
    download_file_path = utils.cached_download(url)
    if len(glob.glob(os.path.join(data_root, '*'))) != 9:
        utils.extractall(
            download_file_path, data_root, os.path.splitext(url)[1])
    data_dir = os.path.join(data_root, 'SegNet-Tutorial-master/CamVid')
    if os.path.exists(data_dir):
        for fn in glob.glob(os.path.join(data_dir, '*')):
            shutil.move(fn, os.path.join(data_root, os.path.basename(fn)))
        shutil.rmtree(os.path.dirname(data_dir))
    return data_root
Esempio n. 40
0
def _get_molnet_filepath(file_name):
    """Construct a filepath which stores MoleculeNet dataset in csv

    This method does not check if the file is already downloaded or not.

    Args:
        file_name (str): file name of MoleculeNet dataset

    Returns (str): filepath for one of MoleculeNet dataset

    """
    cache_root = download.get_dataset_directory(_root)
    cache_path = os.path.join(cache_root, file_name)
    return cache_path
Esempio n. 41
0
def _get_pdbbind_time_filepath(file_name):
    """Construct a filepath which stores year table in csv.

    This method does not check if the file is already downloaded or not.

    Args:
        file_name(str): file name of year table

    Returns(str): filepath for one of year table

    """
    cache_root = download.get_dataset_directory('pfnet/chainer/molnet')
    cache_path = os.path.join(cache_root, file_name)
    return cache_path
Esempio n. 42
0
def _retrieve_ptb_words(name, url):
    def creator(path):
        vocab = _retrieve_word_vocabulary()
        words = _load_words(url)
        x = numpy.empty(len(words), dtype=numpy.int32)
        for i, word in enumerate(words):
            x[i] = vocab[word]

        numpy.savez_compressed(path, x=x)
        return {'x': x}

    root = download.get_dataset_directory('pfnet/chainer/ptb')
    path = os.path.join(root, name)
    loaded = download.cache_or_load_file(path, creator, numpy.load)
    return loaded['x']
Esempio n. 43
0
def get_sbd():
    data_root = download.get_dataset_directory(root)
    base_path = os.path.join(data_root, 'benchmark_RELEASE/dataset')

    train_voc2012_file = os.path.join(base_path, 'train_voc2012.txt')
    if os.path.exists(train_voc2012_file):
        # skip downloading
        return base_path

    download_file_path = utils.cached_download(url)
    ext = os.path.splitext(url)[1]
    utils.extractall(download_file_path, data_root, ext)

    six.moves.urllib.request.urlretrieve(train_voc2012_url, train_voc2012_file)
    _generate_voc2012_txt(base_path)

    return base_path
    def __init__(self, data_dir='auto'):
        if data_dir == 'auto':
            data_dir = download.get_dataset_directory(
                'pfnet/chainercv/cityscapes')

        img_dir = os.path.join(data_dir, os.path.join('leftImg8bit', 'test'))
        if not os.path.exists(img_dir):
            raise ValueError(
                'Cityscapes dataset does not exist at the expected location.'
                'Please download it from https://www.cityscapes-dataset.com/.'
                'Then place directory leftImg8bit at {}.'.format(
                    os.path.join(data_dir, 'leftImg8bit')))

        self.img_paths = []
        for city_dname in sorted(glob.glob(os.path.join(img_dir, '*'))):
            for img_path in sorted(glob.glob(
                    os.path.join(city_dname, '*_leftImg8bit.png'))):
                self.img_paths.append(img_path)
Esempio n. 45
0
def get_voc(year, split):
    if year not in urls:
        raise ValueError
    key = year

    if split == 'test' and year == '2007':
        key = '2007_test'

    data_root = download.get_dataset_directory(root)
    base_path = os.path.join(data_root, 'VOCdevkit/VOC{}'.format(year))
    split_file = os.path.join(base_path, 'ImageSets/Main/{}.txt'.format(split))
    if os.path.exists(split_file):
        # skip downloading
        return base_path

    download_file_path = utils.cached_download(urls[key])
    ext = os.path.splitext(urls[key])[1]
    utils.extractall(download_file_path, data_root, ext)
    return base_path
    def __init__(self, n_layers):
        root = download.get_dataset_directory('pfnet/chainer/models/')
        caffemodel_path = os.path.join(
            root, 'ResNet-{}-model.caffemodel'.format(n_layers))
        if not os.path.exists(caffemodel_path):
            if n_layers == 50:
                cache_path = download.cached_download(self.URLS['resnet50'])
            elif n_layers == 101:
                cache_path = download.cached_download(self.URLS['resnet101'])
            elif n_layers == 152:
                cache_path = download.cached_download(self.URLS['resnet152'])
            shutil.move(cache_path, caffemodel_path)
        super(ResNet, self).__init__(
            os.path.basename(caffemodel_path), n_layers=n_layers)

        self._children.remove('fc6')
        del self.fc6
        del self.functions['fc6']
        del self.functions['prob']
        self.train = True
Esempio n. 47
0
def _retrieve_cifar(name):
    root = download.get_dataset_directory('pfnet/chainer/cifar')
    path = os.path.join(root, '{}.npz'.format(name))
    url = 'https://www.cs.toronto.edu/~kriz/{}-python.tar.gz'.format(name)

    def creator(path):
        archive_path = download.cached_download(url)

        train_x = numpy.empty((5, 10000, 3072), dtype=numpy.uint8)
        train_y = numpy.empty((5, 10000), dtype=numpy.uint8)
        test_y = numpy.empty(10000, dtype=numpy.uint8)

        dir_name = '{}-batches-py'.format(name)

        with tarfile.open(archive_path, 'r:gz') as archive:
            # training set
            for i in range(5):
                file_name = '{}/data_batch_{}'.format(dir_name, i + 1)
                d = pickle.load(archive.extractfile(file_name))
                train_x[i] = d['data']
                train_y[i] = d['labels']

            # test set
            file_name = '{}/test_batch'.format(dir_name)
            d = pickle.load(archive.extractfile(file_name))
            test_x = d['data']
            test_y[...] = d['labels']  # copy to array

        train_x = train_x.reshape(50000, 3072)
        train_y = train_y.reshape(50000)

        numpy.savez_compressed(path, train_x=train_x, train_y=train_y,
                               test_x=test_x, test_y=test_y)
        return {'train_x': train_x, 'train_y': train_y,
                'test_x': test_x, 'test_y': test_y}

    return download.cache_or_load_file(path, creator, numpy.load)
Esempio n. 48
0
def _retrieve_cifar_100():
    root = download.get_dataset_directory('pfnet/chainer/cifar')
    path = os.path.join(root, 'cifar-100.npz')
    url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'

    def creator(path):

        def load(archive, file_name):
            d = _pickle_load(archive.extractfile(file_name))
            x = d['data'].reshape((-1, 3072))
            y = numpy.array(d['fine_labels'], dtype=numpy.uint8)
            return x, y

        archive_path = download.cached_download(url)
        with tarfile.open(archive_path, 'r:gz') as archive:
            train_x, train_y = load(archive, 'cifar-100-python/train')
            test_x, test_y = load(archive, 'cifar-100-python/test')

        numpy.savez_compressed(path, train_x=train_x, train_y=train_y,
                               test_x=test_x, test_y=test_y)
        return {'train_x': train_x, 'train_y': train_y,
                'test_x': test_x, 'test_y': test_y}

    return download.cache_or_load_file(path, creator, numpy.load)
Esempio n. 49
0
def _retrieve_svhn(name, url):
    root = download.get_dataset_directory('pfnet/chainer/svhn')
    path = os.path.join(root, name)
    return download.cache_or_load_file(
        path, lambda path: _make_npz(path, url), numpy.load)
Esempio n. 50
0
 def setUp(self):
     self.root = download.get_dataset_directory(
         os.path.join('pfnet', 'chainer', 'cifar'))
Esempio n. 51
0
def _retrieve_fashion_mnist(name, urls):
    root = download.get_dataset_directory('pfnet/chainer/fashion-mnist')
    path = os.path.join(root, name)
    return download.cache_or_load_file(
        path, lambda path: make_npz(path, urls), numpy.load)
Esempio n. 52
0
def get_ade20k(root, url):
    data_root = download.get_dataset_directory(root)
    cache_path = utils.cached_download(url)
    utils.extractall(cache_path, data_root, os.path.splitext(url)[1])
    return data_root