def CASAS_download(directory, datasets):
    """Download CASAS datasets to directory

    Args:
        directory (:obj:`str`): path to directory to store the downloaded
        datasets (:obj:`tuple` of :obj:`str`): list of datasets to download
    """
    for dataset in datasets:
        filename = dataset_dict.get(dataset, None)
        if filename is None:
            print('Cannot find dataset %s' % dataset)
            print('Here are the available datasets:')
            for key in dataset_dict.keys():
                print('  * %s' % key)
        else:
            # Download zipped files
            default_downloader(directory=directory,
                               urls=[master_url + filename],
                               filenames=[filename],
                               clear=False)
            # Expand it in place
            file_path = os.path.join(directory, filename)
            if os.path.exists(file_path):
                zip_ref = zipfile.ZipFile(file_path, 'r')
                zip_ref.extractall(directory)
                zip_ref.close()
Exemple #2
0
    def test_convert(self):
        tempdir = self.tempdir

        with remember_cwd():
            os.chdir(tempdir)
            assert_raises(IOError,
                          adult.convert_adult,
                          directory=tempdir,
                          output_directory=tempdir)

            default_downloader(
                directory=tempdir,
                urls=[
                    'https://archive.ics.uci.edu/ml/'
                    'machine-learning-databases/adult/adult.data',
                    'https://archive.ics.uci.edu/ml/'
                    'machine-learning-databases/adult/adult.test'
                ],
                filenames=['adult.data', 'adult.test'])

            adult.convert_adult(directory=tempdir, output_directory=tempdir)

        output_file = "adult.hdf5"
        output_file = os.path.join(tempdir, output_file)

        with h5py.File(output_file, 'r') as h5:
            assert h5['features'].shape == (30162 + 15060, 104)
            assert h5['targets'].shape[0] == h5['features'].shape[0]
def CASAS_download(directory, datasets):
    """Download CASAS datasets to directory

    Args:
        directory (:obj:`str`): path to directory to store the downloaded
        datasets (:obj:`tuple` of :obj:`str`): list of datasets to download
    """
    for dataset in datasets:
        filename = dataset_dict.get(dataset, None)
        if filename is None:
            print('Cannot find dataset %s' % dataset)
            print('Here are the available datasets:')
            for key in dataset_dict.keys():
                print('  * %s' % key)
        else:
            # Download zipped files
            default_downloader(directory=directory,
                               urls=[master_url + filename],
                               filenames=[filename],
                               clear=False)
            # Expand it in place
            file_path = os.path.join(directory, filename)
            if os.path.exists(file_path):
                zip_ref = zipfile.ZipFile(file_path, 'r')
                zip_ref.extractall(directory)
                zip_ref.close()
Exemple #4
0
 def test_default_downloader_clear(self):
     file_path = os.path.join(self.tempdir, 'tmp.data')
     open(file_path, 'a').close()
     args = dict(directory=self.tempdir, clear=True, urls=[None],
                 filenames=['tmp.data'])
     default_downloader(**args)
     assert not os.path.isfile(file_path)
Exemple #5
0
    def test_convert(self):
        tempdir = self.tempdir

        with remember_cwd():
            os.chdir(tempdir)
            assert_raises(IOError,
                          adult.convert_adult,
                          directory=tempdir,
                          output_directory=tempdir)

            default_downloader(
                directory=tempdir,
                urls=['https://archive.ics.uci.edu/ml/'
                      'machine-learning-databases/adult/adult.data',
                      'https://archive.ics.uci.edu/ml/'
                      'machine-learning-databases/adult/adult.test'],
                filenames=['adult.data', 'adult.test'])

            adult.convert_adult(directory=tempdir,
                                output_directory=tempdir)

        output_file = "adult.hdf5"
        output_file = os.path.join(tempdir, output_file)

        with h5py.File(output_file, 'r') as h5:
            assert h5['features'].shape == (30162 + 15060, 104)
            assert h5['targets'].shape[0] == h5['features'].shape[0]
Exemple #6
0
 def test_default_downloader_save_no_filename(self):
     args = dict(directory=self.tempdir,
                 clear=False,
                 urls=[mock_url],
                 filenames=[None])
     default_downloader(**args)
     with open(self.filepath, 'rb') as f:
         assert_equal(f.read(), mock_content)
Exemple #7
0
def svhn_downloader(which_format, directory, clear=False):
    suffix = {1: '.tar.gz', 2: '_32x32.mat'}[which_format]
    sets = ['train', 'test', 'extra']
    default_downloader(directory=directory,
                       urls=[None for f in sets],
                       filenames=['{}{}'.format(s, suffix) for s in sets],
                       url_prefix='http://ufldl.stanford.edu/housenumbers/',
                       clear=clear)
def silhouettes_downloader(size, **kwargs):
    if size not in (16, 28):
        raise ValueError("size must be 16 or 28")

    actual_filename = FILENAME.format(size)
    actual_url = BASE_URL + actual_filename
    default_downloader(urls=[actual_url],
                       filenames=[actual_filename], **kwargs)
 def test_default_downloader_save_no_url_url_prefix(self):
     iris_path = os.path.join(self.tempdir, 'iris.data')
     args = dict(directory=self.tempdir, clear=False, urls=[None],
                 filenames=['iris.data'], url_prefix=iris_url[:-9])
     default_downloader(**args)
     with open(iris_path, 'r') as f:
         assert hashlib.md5(
             f.read().encode('utf-8')).hexdigest() == iris_hash
 def test_default_downloader_save_no_filename(self):
     iris_path = os.path.join(self.tempdir, 'iris.data')
     args = DummyArgs(directory=self.tempdir, clear=False, urls=[iris_url],
                      filenames=[None])
     default_downloader(args)
     with open(iris_path, 'r') as f:
         assert hashlib.md5(
             f.read().encode('utf-8')).hexdigest() == iris_hash
     os.remove(iris_path)
Exemple #11
0
def svhn_downloader(which_format, directory, clear=False):
    suffix = {1: '.tar.gz', 2: '_32x32.mat'}[which_format]
    sets = ['train', 'test', 'extra']
    default_downloader(
        directory=directory,
        urls=[None for f in sets],
        filenames=['{}{}'.format(s, suffix) for s in sets],
        url_prefix='http://ufldl.stanford.edu/housenumbers/',
        clear=clear)
Exemple #12
0
def silhouettes_downloader(size, **kwargs):
    if size not in (16, 28):
        raise ValueError("size must be 16 or 28")

    actual_filename = FILENAME.format(size)
    actual_url = BASE_URL + actual_filename
    default_downloader(urls=[actual_url],
                       filenames=[actual_filename],
                       **kwargs)
Exemple #13
0
def ensure_dataset_ready(basename, version, url_dir, kerosenedir):
    # setup names
    filename, url = paths_from_metadata(basename, version, url_dir)
    filetarget = os.path.join(kerosenedir, filename)
    # if file is not present, download it (also created directories if needed)
    if not os.path.isfile(filetarget):
        default_downloader(kerosenedir, [url], [filename])
    # override fuel's centralized location temporarily
    fuel.config.data_path = kerosenedir
    return filename
Exemple #14
0
def ensure_dataset_ready(basename, version, url_dir):
    # setup names
    filename, url = paths_from_metadata(basename, version, url_dir)
    kerosenedir = os.path.expanduser(os.path.join('~', '.kerosene', 'datasets'))
    filetarget = os.path.join(kerosenedir, filename)
    # if file is not present, download it (also created directories if needed)
    if not os.path.isfile(filetarget):
        default_downloader(kerosenedir, [url], [filename])
    # override fuel's centralized location temporarily
    fuel.config.data_path = kerosenedir
    return filename
Exemple #15
0
 def test_default_downloader_save_no_filename(self):
     iris_path = os.path.join(self.tempdir, 'iris.data')
     args = DummyArgs(directory=self.tempdir,
                      clear=False,
                      urls=[iris_url],
                      filenames=[None])
     default_downloader(args)
     with open(iris_path, 'r') as f:
         assert hashlib.md5(
             f.read().encode('utf-8')).hexdigest() == iris_hash
     os.remove(iris_path)
Exemple #16
0
def download_model(model_name):
    # see if this is a known model
    if not model_name in model_download_table.keys():
        print("Failure: unknown model {}".format(model_name))
        sys.exit(1)

    # resolve url
    model_url = model_download_table[model_name]
    platzoo_dir = get_platzoo_dir()
    local_gz_filename = model_url.split("/")[-1]
    temp_dir = tempfile.mkdtemp()

    # download
    default_downloader(temp_dir, [model_url], [local_gz_filename])

    if local_gz_filename.endswith(".gz"):
        local_filename = local_gz_filename[:-3]
    else:
        local_filename = "{}.2".format(local_gz_filename)
    # convert to absolute paths
    final_local_filepath = os.path.join(platzoo_dir, local_filename)
    final_local_linkpath = os.path.join(platzoo_dir, model_name)
    temp_gz_filepath = os.path.join(temp_dir, local_gz_filename)
    temp_filepath = os.path.join(temp_dir, local_filename)

    # decompress the file to temporary location
    print("Decompressing {}".format(model_name))
    with open(temp_filepath, 'wb') as f_out, gzip.open(temp_gz_filepath,
                                                       'rb') as f_in:
        shutil.copyfileobj(f_in, f_out)

    # atomic rename (prevents half-downloaded files)
    print("Installing {}".format(model_name))
    os.rename(temp_filepath, final_local_filepath)
    # symlink, removing old first if necessary
    if os.path.exists(final_local_linkpath):
        os.remove(final_local_linkpath)
    os.symlink(local_filename, final_local_linkpath)

    # cleanup temp directory
    # TODO: try/catch the download for failure cleanup
    shutil.rmtree(temp_dir)
Exemple #17
0
    def test_download_and_convert(self):
        tempdir = self.tempdir

        cwd = os.getcwd()
        os.chdir(tempdir)

        assert_raises(IOError,
                      iris.convert_iris,
                      directory=tempdir,
                      output_directory=tempdir)

        default_downloader(
            directory=tempdir,
            urls=[
                'https://archive.ics.uci.edu/ml/machine-learning-databases/'
                'iris/iris.data'
            ],
            filenames=['iris.data'])

        classes = {
            b'Iris-setosa': 0,
            b'Iris-versicolor': 1,
            b'Iris-virginica': 2
        }
        data = numpy.loadtxt(os.path.join(tempdir, 'iris.data'),
                             converters={4: lambda x: classes[x]},
                             delimiter=',')
        features = data[:, :-1].astype('float32')
        targets = data[:, -1].astype('uint8').reshape((-1, 1))

        iris.convert_iris(directory=tempdir, output_directory=tempdir)

        os.chdir(cwd)

        output_file = "iris.hdf5"
        output_file = os.path.join(tempdir, output_file)
        with h5py.File(output_file, 'r') as h5:
            assert numpy.allclose(h5['features'], features)
            assert numpy.allclose(h5['targets'], targets)
Exemple #18
0
    def test_download_and_convert(self):
        tempdir = self.tempdir

        with remember_cwd():
            os.chdir(tempdir)

            assert_raises(IOError,
                          iris.convert_iris,
                          directory=tempdir,
                          output_directory=tempdir)

            default_downloader(
                directory=tempdir,
                urls=['https://archive.ics.uci.edu/ml/'
                      'machine-learning-databases/iris/iris.data'],
                filenames=['iris.data'])

            classes = {
                b'Iris-setosa': 0,
                b'Iris-versicolor': 1,
                b'Iris-virginica': 2}
            data = numpy.loadtxt(
                os.path.join(tempdir, 'iris.data'),
                converters={4: lambda x: classes[x]},
                delimiter=',')
            features = data[:, :-1].astype('float32')
            targets = data[:, -1].astype('uint8').reshape((-1, 1))

            iris.convert_iris(directory=tempdir,
                              output_directory=tempdir)

        output_file = "iris.hdf5"
        output_file = os.path.join(tempdir, output_file)
        with h5py.File(output_file, 'r') as h5:
            assert numpy.allclose(h5['features'], features)
            assert numpy.allclose(h5['targets'], targets)
Exemple #19
0
def downloader_wrapper(format, directory, **kwargs):
    # add the right format file to the download list
    files.insert(0, "{}.tgz".format(resolve_filename(format)))
    urls = map(lambda s: 'http://vis-www.cs.umass.edu/lfw/' + s, files)
    default_downloader(directory, urls=urls, filenames=files, **kwargs)
Exemple #20
0
 def test_default_downloader_save_no_url_url_prefix(self):
     args = dict(directory=self.tempdir, clear=False, urls=[None],
                 filenames=[mock_filename], url_prefix=mock_url[:-9])
     default_downloader(**args)
     with open(self.filepath, 'rb') as f:
         assert_equal(f.read(), mock_content)
Exemple #21
0
 def download(cls, directory=None):
     if not directory:
         directory = os.getcwd()
     return default_downloader(directory, cls.urls, cls.filenames)
Exemple #22
0
def downloader_wrapper(format, directory, **kwargs):
    # add the right format file to the download list
    files.insert(0, "{}.tgz".format(resolve_filename(format)))
    urls = map(lambda s: 'http://vis-www.cs.umass.edu/lfw/' + s, files)
    default_downloader(directory, urls=urls, filenames=files, **kwargs)