def fetch_outline(name): r"""Fetch the outline of a glacier as a GeoJSON file""" names = get_glacier_names() if name not in names: raise ValueError("Glacier name '%s' not in %s" % (name, names)) downloader = pooch.HTTPDownloader(progressbar=True) return outlines.fetch(name + ".geojson", downloader=downloader)
def get_csirml6(): url = "https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/23875943/CSIRML6_CO2_19822019_figshare.nc" fname = pooch.retrieve(url, None, fname="CSIRML6_CO2_19822019_figshare.nc", path='../data-in/', downloader=pooch.HTTPDownloader(progressbar=True)) xda = xr.open_dataset(fname).spco2 return xda
def _earthdata_downloader(url, output_file, dataset): username = os.environ.get('EARTHDATA_USERNAME') if username is None: username = input('EarthData username: '******'EARTHDATA_PASSWORD') if password is None: password = getpass('EarthData password: ') login = requests.get(url) downloader = pooch.HTTPDownloader(auth=(username, password)) downloader(login.url, output_file, dataset)
def __call__(self, url, output_file, dataset): auth = self._get_credentials() downloader = pooch.HTTPDownloader(auth=auth, progressbar=True) try: login = requests.get(url) downloader(login.url, output_file, dataset) except requests.exceptions.HTTPError as error: if 'Unauthorized' in str(error): pooch.get_logger().error('Wrong username/password!') self._username = None self._password = None raise error
def get_mpiulbsomffn(): url = 'https://www.ncei.noaa.gov/data/oceans/ncei/ocads/data/0209633/MPI-ULB-SOM_FFN_clim.nc' fname = pooch.retrieve(url, None, fname='MPIULB-SOMFFN_clim.nc', path='../data-in/', downloader=pooch.HTTPDownloader(progressbar=True)) xds = xr.open_dataset(fname) xda = xds.pco2.where(xds.pco2 > 0).coarsen(lat=4, lon=4).mean() xda = xda.rename('mpiulb_somffn').rename(time='month') return xda
def _cautious_downloader(url, output_file, pooch): if pooch.allow_download: delattr(pooch, "allow_download") # HTTPDownloader() requires tqdm, a HyperSpy dependency, so # adding it to our dependencies doesn't cost anything download = ppooch.HTTPDownloader(progressbar=True) download(url, output_file, pooch) else: raise ValueError( "The dataset must be (re)downloaded from the kikuchipy-data " "repository on GitHub (https://github.com/pyxem/kikuchipy-data) to " "your local cache with the pooch Python package. Pass " "`allow_download=True` to allow this download.")
def get_mpisomffn(): url = 'https://www.nodc.noaa.gov/archive/arc0105/0160558/5.5/data/0-data/MPI_SOM-FFN_v2020/spco2_MPI-SOM_FFN_v2020.nc' fname = pooch.retrieve(url, None, fname='MPI-SOMFFN_v2020.nc', path='../data-in/', downloader=pooch.HTTPDownloader(progressbar=True)) xds = xr.open_dataset(fname, drop_variables='date') xda = xds.spco2_raw.resample(time='1MS').mean() xda = xda.rename('mpi_somffn') return xda
def _earthdata_downloader(url, output_file, dataset): username = os.environ.get('EARTHDATA_USERNAME') if username is None: username = input('EarthData username: '******'EARTHDATA_PASSWORD') if password is None: password = getpass('EarthData password: '******'Unauthorized' in str(error): pooch.get_logger().error('Wrong username/password!') raise error
def __init__(self, cache_dir=None): import json import pooch import pkg_resources if cache_dir is None: if os.path.isdir(soxs_cfg.get("soxs", "soxs_data_dir")): cache_dir = soxs_cfg.get("soxs", "soxs_data_dir") else: cache_dir = pooch.os_cache("soxs") self._registry = json.load( pkg_resources.resource_stream("soxs", "file_hash_registry.json")) self.pooch_obj = pooch.create( path=cache_dir, registry=self._registry, env="SOXS_DATA_DIR", base_url="https://hea-www.cfa.harvard.edu/soxs/soxs_responses/" ) self.dl = pooch.HTTPDownloader(progressbar=True)
def get_niesfnn(): url = 'https://ndownloader.figshare.com/files/23907317?private_link=6dfc21bc1a2c51da8081' fname = pooch.retrieve(url, None, fname='NIES-FNN_v2020.nc', path='../data-in/', downloader=pooch.HTTPDownloader(progressbar=True)) xds = xr.open_dataset(fname, drop_variables='date') yymm = np.meshgrid(xds.year, xds.month) years_months = np.c_[([y.flatten() for y in yymm])].T time = [pd.Timestamp(f'{y}-{m}') for y, m in years_months] xda = xr.DataArray(xds.co2.values.reshape(len(time), xds.lat.size, xds.lon.size), coords=dict(time=time, lat=xds.lat, lon=xds.lon), dims=['time', 'lat', 'lon']) return xda
def fetch_network_data(net_name: str = "karate_club", net_type: str = "igraph") -> object: """ Load the required network from the remote repository :param net_name: network name :param net_type: desired graph object among "networkx" and "igraph". Default, igraph. :return: a graph object :Example: >>> from cdlib import datasets >>> G = datasets.fetch_network_data(net_name="karate_club", net_type="igraph") """ download = pooch.HTTPDownloader(progressbar=True) fname = __networks.fetch(f"{net_name}.csv.gz", processor=pooch.Decompress(), downloader=download) if net_type == "networkx": g = nx.Graph() with open(fname) as f: for line in f: line = line.replace(" ", "\t").split("\t") g.add_edge(int(line[0]), int(line[1])) else: if ig is None: raise ModuleNotFoundError( "Optional dependency not satisfied: install python-igraph to use the selected " "feature.") edges = [] with open(fname) as f: for line in f: line = line.replace(" ", "\t").split("\t") edges.append((int(line[0]), int(line[1]))) g = ig.Graph.TupleList(edges) return g
def get_somffn_flux_params(): url = 'https://www.nodc.noaa.gov/archive/arc0105/0160558/5.5/data/0-data/MPI_SOM-FFN_v2020/spco2_MPI-SOM_FFN_v2020.nc' fname = pooch.retrieve(url, None, fname='MPI-SOMFFN_v2020.nc', path='../data-in/', downloader=pooch.HTTPDownloader(progressbar=True)) drop = [ 'date', 'dco2', 'spco2_raw', 'spco2_smoothed', 'fgco2_raw', 'fgco2_smoothed', 'time_bnds', 'lat_bnds', 'lon_bnds' ] xds = xr.open_dataset(fname, drop_variables=drop) attrs = {k: xds[k].attrs for k in xds} xds = xds.resample(time='1MS').mean() for k in xds: xds[k].attrs = attrs[k] xds.attrs = {} return xds
def get_jmamlr(): url = 'http://www.data.jma.go.jp/gmd/kaiyou/data/english/co2_flux/grid/{name}' xds = [] for t in pd.date_range('1990-01', '2019', freq='1AS', closed='left'): fname = 'JMA_co2map_{t:%Y}.ZIP'.format(t=t) fname = pooch.retrieve( url.format(t=t, name=fname), None, fname=fname, path='../data-in/JMA-MLR/', processor=pooch.Unzip(), downloader=pooch.HTTPDownloader(progressbar=True))[0] xda = xr.open_dataset(fname, decode_times=False).pCO2s y0, y1 = str(t.year), str(t.year + 1) time = pd.date_range(y0, y1, freq='1MS', closed='left') xda = xda.assign_coords(time=time) xds += xda, xda = (xr.concat(xds, dim='time').assign_coords(lon=(xda.lon - 180) % 360 - 180).sortby('lon')) return xda
def get_jenamls(): url = 'http://www.bgc-jena.mpg.de/CarboScope/oc/INVERSION/OUTPUT/oc_v1.7_pCO2_daily.nc' username = '******' password = '******' fname = pooch.retrieve(url, None, fname='Jena-MLS_v1.7_pCO2.nc', path='../data-in/', downloader=pooch.HTTPDownloader(progressbar=True, auth=(username, password))) xds = xr.open_dataset(fname) xda = xds.pCO2.resample(mtime='1MS').mean('mtime') xda = xda.rename("jena_mls") xda = (xda.interp( lat=np.arange(-89.5, 90), lon=np.arange(-179.5, 180), method='nearest').roll(lon=180, roll_coords=False).interpolate_na( 'lon', limit=20).roll(lon=-180, roll_coords=False).rename(mtime='time')) return xda
def fetch_ground_truth_data(net_name="karate_club", graph=None): """ Load the required ground truth clustering from the remote repository :param net_name: network name :param graph: the graph object associated to the ground truth (optional) :return: a NodeClustering object :Example: >>> from cdlib import datasets >>> gt_coms = datasets.fetch_network_data(fetch_ground_truth_data="karate_club") """ download = pooch.HTTPDownloader(progressbar=True) fname = __ground_truths.fetch(f"{net_name}.json.gz", processor=pooch.Decompress(), downloader=download) gt = read_community_json(fname) if graph is not None: gt.graph = graph return gt
def fetch_bedmap2(): downloader = pooch.HTTPDownloader(progressbar=True) filenames = bedmap2.fetch('bedmap2_tiff.zip', processor=pooch.Unzip(), downloader=downloader) return [f for f in filenames if os.path.splitext(f)[1] == '.tif']
def fetch_larsen_outline(): r"""Fetch an outline of the Larsen C Ice Shelf""" downloader = pooch.HTTPDownloader(progressbar=True) return larsen_outline.fetch('larsen.geojson', downloader=downloader)
def fetch_larsen_outline(): return larsen_outline.fetch( 'larsen.geojson', downloader=pooch.HTTPDownloader(progressbar=True))
INPUTDATA = pooch.create( # This is still the default in case the environment variable isn't defined path=INPUTDATA_DIR, version_dev='master', base_url='https://svn-ccsm-inputdata.cgd.ucar.edu/trunk/', # The name of the environment variable that can overwrite the path argument env='CESMDATAROOT', ) INPUTDATA.load_registry( pkg_resources.resource_stream('pop_tools', 'inputdata_registry.txt')) if tqdm is not None: downloader = pooch.HTTPDownloader(progressbar=True, verify=False, allow_redirects=True) else: downloader = pooch.HTTPDownloader(verify=False, allow_redirects=True) grid_def_file = pkg_resources.resource_filename('pop_tools', 'pop_grid_definitions.yaml') input_templates_dir = pkg_resources.resource_filename('pop_tools', 'input_templates') with open(grid_def_file) as f: grid_defs = yaml.safe_load(f) def get_grid(grid_name, scrip=False): """Return a xarray.Dataset() with POP grid variables.
def data_path(subject, path=None, force_update=False, update_path=None, *, verbose=None): """Get path to local copy of LIMO dataset URL. This is a low-level function useful for getting a local copy of the remote LIMO dataset :footcite:`Rousselet2016`. The complete dataset is available at datashare.is.ed.ac.uk/. Parameters ---------- subject : int Subject to download. Must be of :class:`ìnt` in the range from 1 to 18 (inclusive). path : None | str Location of where to look for the LIMO data storing directory. If None, the environment variable or config parameter ``MNE_DATASETS_LIMO_PATH`` is used. If it doesn't exist, the "~/mne_data" directory is used. If the LIMO dataset is not found under the given path, the data will be automatically downloaded to the specified folder. force_update : bool Force update of the dataset even if a local copy exists. update_path : bool | None If True, set the MNE_DATASETS_LIMO_PATH in mne-python config to the given path. If None, the user is prompted. %(verbose)s Returns ------- path : str Local path to the given data file. Notes ----- For example, one could do: >>> from mne.datasets import limo >>> limo.data_path(subject=1, path=os.getenv('HOME') + '/datasets') # doctest:+SKIP This would download the LIMO data file to the 'datasets' folder, and prompt the user to save the 'datasets' path to the mne-python config, if it isn't there already. References ---------- .. footbibliography:: """ # noqa: E501 import pooch downloader = pooch.HTTPDownloader(progressbar=True) # use tqdm # local storage patch config_key = 'MNE_DATASETS_LIMO_PATH' name = 'LIMO' subj = f'S{subject}' path = _get_path(path, config_key, name) base_path = op.join(path, 'MNE-limo-data') subject_path = op.join(base_path, subj) # the remote URLs are in the form of UUIDs: urls = dict( S18={ 'Yr.mat': '5cf839833a4d9500178a6ff8', 'LIMO.mat': '5cf83907e650a2001ad592e4' }, S17={ 'Yr.mat': '5cf838e83a4d9500168aeb76', 'LIMO.mat': '5cf83867a542b80019c87602' }, S16={ 'Yr.mat': '5cf83857e650a20019d5778f', 'LIMO.mat': '5cf837dc3a4d9500188a64fe' }, S15={ 'Yr.mat': '5cf837cce650a2001ad591e8', 'LIMO.mat': '5cf83758a542b8001ac7d11d' }, S14={ 'Yr.mat': '5cf837493a4d9500198a938f', 'LIMO.mat': '5cf836e4a542b8001bc7cc53' }, S13={ 'Yr.mat': '5cf836d23a4d9500178a6df7', 'LIMO.mat': '5cf836543a4d9500168ae7cb' }, S12={ 'Yr.mat': '5cf83643d4c7d700193e5954', 'LIMO.mat': '5cf835193a4d9500178a6c92' }, S11={ 'Yr.mat': '5cf8356ea542b8001cc81517', 'LIMO.mat': '5cf834f7d4c7d700163daab8' }, S10={ 'Yr.mat': '5cf833b0e650a20019d57454', 'LIMO.mat': '5cf83204e650a20018d59eb2' }, S9={ 'Yr.mat': '5cf83201a542b8001cc811cf', 'LIMO.mat': '5cf8316c3a4d9500168ae13b' }, S8={ 'Yr.mat': '5cf8326ce650a20017d60373', 'LIMO.mat': '5cf8316d3a4d9500198a8dc5' }, S7={ 'Yr.mat': '5cf834a03a4d9500168ae59b', 'LIMO.mat': '5cf83069e650a20017d600d7' }, S6={ 'Yr.mat': '5cf830e6a542b80019c86a70', 'LIMO.mat': '5cf83057a542b80019c869ca' }, S5={ 'Yr.mat': '5cf8115be650a20018d58041', 'LIMO.mat': '5cf80c0bd4c7d700193e213c' }, S4={ 'Yr.mat': '5cf810c9a542b80019c8450a', 'LIMO.mat': '5cf80bf83a4d9500198a6eb4' }, S3={ 'Yr.mat': '5cf80c55d4c7d700163d8f52', 'LIMO.mat': '5cf80bdea542b80019c83cab' }, S2={ 'Yr.mat': '5cde827123fec40019e01300', 'LIMO.mat': '5cde82682a50c4001677c259' }, S1={ 'Yr.mat': '5d6d3071536cf5001a8b0c78', 'LIMO.mat': '5d6d305f6f41fc001a3151d8' }, ) # these can't be in the registry file (mne/data/dataset_checksums.txt) # because of filename duplication hashes = dict( S18={ 'Yr.mat': 'md5:87f883d442737971a80fc0a35d057e51', 'LIMO.mat': 'md5:8b4879646f65d7876fa4adf2e40162c5' }, S17={ 'Yr.mat': 'md5:7b667ec9eefd7a9996f61ae270e295ee', 'LIMO.mat': 'md5:22eaca4e6fad54431fd61b307fc426b8' }, S16={ 'Yr.mat': 'md5:c877afdb4897426421577e863a45921a', 'LIMO.mat': 'md5:86672d7afbea1e8c39305bc3f852c8c2' }, S15={ 'Yr.mat': 'md5:eea9e0140af598fefc08c886a6f05de5', 'LIMO.mat': 'md5:aed5cb71ddbfd27c6a3ac7d3e613d07f' }, S14={ 'Yr.mat': 'md5:8bd842cfd8588bd5d32e72fdbe70b66e', 'LIMO.mat': 'md5:1e07d1f36f2eefad435a77530daf2680' }, S13={ 'Yr.mat': 'md5:d7925d2af7288b8a5186dfb5dbb63d34', 'LIMO.mat': 'md5:ba891015d2f9e447955fffa9833404ca' }, S12={ 'Yr.mat': 'md5:0e1d05beaa4bf2726e0d0671b78fe41e', 'LIMO.mat': 'md5:423fd479d71097995b6614ecb11df9ad' }, S11={ 'Yr.mat': 'md5:1b0016fb9832e43b71f79c1992fcbbb1', 'LIMO.mat': 'md5:1a281348c2a41ee899f42731d30cda70' }, S10={ 'Yr.mat': 'md5:13c66f60e241b9a9cc576eaf1b55a417', 'LIMO.mat': 'md5:3c4b41e221eb352a21bbef1a7e006f06' }, S9={ 'Yr.mat': 'md5:3ae1d9c3a1d9325deea2f2dddd1ab507', 'LIMO.mat': 'md5:5e204e2a4bcfe4f535b4b1af469b37f7' }, S8={ 'Yr.mat': 'md5:7e9adbca4e03d8d7ce8ea07ccecdc8fd', 'LIMO.mat': 'md5:88313c21d34428863590e586b2bc3408' }, S7={ 'Yr.mat': 'md5:6b5290a6725ecebf1022d5d2789b186d', 'LIMO.mat': 'md5:8c769219ebc14ce3f595063e84bfc0a9' }, S6={ 'Yr.mat': 'md5:420c858a8340bf7c28910b7b0425dc5d', 'LIMO.mat': 'md5:9cf4e1a405366d6bd0cc6d996e32fd63' }, S5={ 'Yr.mat': 'md5:946436cfb474c8debae56ffb1685ecf3', 'LIMO.mat': 'md5:241fac95d3a79d2cea081391fb7078bd' }, S4={ 'Yr.mat': 'md5:c8216af78ac87b739e86e57b345cafdd', 'LIMO.mat': 'md5:8e10ef36c2e075edc2f787581ba33459' }, S3={ 'Yr.mat': 'md5:ff02e885b65b7b807146f259a30b1b5e', 'LIMO.mat': 'md5:59b5fb3a9749003133608b5871309e2c' }, S2={ 'Yr.mat': 'md5:a4329022e57fd07ceceb7d1735fd2718', 'LIMO.mat': 'md5:98b284b567f2dd395c936366e404f2c6' }, S1={ 'Yr.mat': 'md5:076c0ae78fb71d43409c1877707df30e', 'LIMO.mat': 'md5:136c8cf89f8f111a11f531bd9fa6ae69' }, ) # create the download manager fetcher = pooch.create( path=subject_path, base_url='', version=None, # Data versioning is decoupled from MNE-Python version. registry=hashes[subj], urls={key: f'{root_url}{uuid}' for key, uuid in urls[subj].items()}, retry_if_failed=2 # 2 retries = 3 total attempts ) # use our logger level for pooch's logger too pooch.get_logger().setLevel(logger.getEffectiveLevel()) # fetch the data for fname in ('LIMO.mat', 'Yr.mat'): destination = op.join(subject_path, fname) if force_update and op.isfile(destination): os.remove(destination) # fetch the remote file (if local file missing or has hash mismatch) fetcher.fetch(fname=fname, downloader=downloader) # update path in config if desired _do_path_update(path, update_path, config_key, name) return base_path
def fetch_dataset( dataset_params, processor=None, path=None, force_update=False, update_path=True, download=True, check_version=False, return_version=False, accept=False, auth=None, token=None, ): """Fetch an MNE-compatible dataset. Parameters ---------- dataset_params : list of dict | dict The dataset name(s) and corresponding parameters to download the dataset(s). The dataset parameters that contains the following keys: ``archive_name``, ``url``, ``folder_name``, ``hash``, ``config_key`` (optional). See Notes. processor : None | "unzip" | "untar" | instance of pooch.Unzip | instance of pooch.Untar What to do after downloading the file. ``"unzip"`` and ``"untar"`` will decompress the downloaded file in place; for custom extraction (e.g., only extracting certain files from the archive) pass an instance of :class:`pooch.Unzip` or :class:`pooch.Untar`. If ``None`` (the default), the files are left as-is. path : None | str Directory in which to put the dataset. If ``None``, the dataset location is determined by first checking whether ``dataset_params['config_key']`` is defined, and if so, whether that config key exists in the MNE-Python config file. If so, the configured path is used; if not, the location is set to the value of the ``MNE_DATA`` config key (if it exists), or ``~/mne_data`` otherwise. force_update : bool Force update of the dataset even if a local copy exists. Default is False. update_path : bool | None If True (default), set the mne-python config to the given path. If None, the user is prompted. download : bool If False and the dataset has not been downloaded yet, it will not be downloaded and the path will be returned as ``''`` (empty string). This is mostly used for testing purposes and can be safely ignored by most users. check_version : bool Whether to check the version of the dataset or not. Each version of the dataset is stored in the root with a ``version.txt`` file. return_version : bool Whether or not to return the version of the dataset or not. Defaults to False. accept : bool Some MNE-supplied datasets require acceptance of an additional license. Default is ``False``. auth : tuple | None Optional authentication tuple containing the username and password/token, passed to :class:`pooch.HTTPDownloader` (e.g., ``auth=('foo', 012345)``). token : str | None Optional authentication token passed to :class:`pooch.HTTPDownloader`. Returns ------- data_path : str The path to the fetched dataset. version : str Only returned if ``return_version`` is True. See Also -------- mne.get_config mne.set_config mne.datasets.has_dataset Notes ----- The ``dataset_params`` argument must contain the following keys: - ``archive_name``: The name of the (possibly compressed) file to download - ``url``: URL from which the file can be downloaded - ``folder_name``: the subfolder within the ``MNE_DATA`` folder in which to save and uncompress (if needed) the file(s) - ``hash``: the cryptographic hash type of the file followed by a colon and then the hash value (examples: "sha256:19uheid...", "md5:upodh2io...") - ``config_key`` (optional): key passed to :func:`mne.set_config` to store the on-disk location of the downloaded dataset (e.g., ``"MNE_DATASETS_EEGBCI_PATH"``). This will only work for the provided datasets listed :ref:`here <datasets>`; do not use for user-defined datasets. An example would look like:: {'dataset_name': 'sample', 'archive_name': 'MNE-sample-data-processed.tar.gz', 'hash': 'md5:12b75d1cb7df9dfb4ad73ed82f61094f', 'url': 'https://osf.io/86qa2/download?version=5', 'folder_name': 'MNE-sample-data', 'config_key': 'MNE_DATASETS_SAMPLE_PATH'} For datasets where a single (possibly compressed) file must be downloaded, pass a single :class:`dict` as ``dataset_params``. For datasets where multiple files must be downloaded and (optionally) uncompressed separately, pass a list of dicts. """ # noqa E501 # import pooch library for handling the dataset downloading #pooch = _soft_import("pooch", "dataset downloading", strict=True) # JG_MOD import pooch if auth is not None: if len(auth) != 2: raise RuntimeError("auth should be a 2-tuple consisting " "of a username and password/token.") # processor to uncompress files if processor == "untar": processor = pooch.Untar(extract_dir=path) elif processor == "unzip": processor = pooch.Unzip(extract_dir=path) if isinstance(dataset_params, dict): dataset_params = [dataset_params] # extract configuration parameters names = [params["dataset_name"] for params in dataset_params] name = names[0] dataset_dict = dataset_params[0] config_key = dataset_dict.get('config_key', None) folder_name = dataset_dict["folder_name"] # get download path for specific dataset path = _get_path(path=path, key=config_key, name=name) # get the actual path to each dataset folder name final_path = op.join(path, folder_name) # handle BrainStorm datasets with nested folders for datasets if name.startswith("bst_"): final_path = op.join(final_path, name) # additional condition: check for version.txt and parse it # check if testing or misc data is outdated; if so, redownload it want_version = RELEASES.get(name, None) want_version = _FAKE_VERSION if name == "fake" else want_version # get the version of the dataset and then check if the version is outdated data_version = _dataset_version(final_path, name) outdated = (want_version is not None and LooseVersion(want_version) > LooseVersion(data_version)) if outdated: logger.info(f"Dataset {name} version {data_version} out of date, " f"latest version is {want_version}") # return empty string if outdated dataset and we don't want to download if (not force_update) and outdated and not download: return ("", data_version) if return_version else "" # reasons to bail early (hf_sef has separate code for this): if ((not force_update) and (not outdated) and (not name.startswith("hf_sef_"))): # ...if target folder exists (otherwise pooch downloads every # time because we don't save the archive files after unpacking, so # pooch can't check its checksum) if op.isdir(final_path): if config_key is not None: _do_path_update(path, update_path, config_key, name) return (final_path, data_version) if return_version else final_path # ...if download=False (useful for debugging) elif not download: return ("", data_version) if return_version else "" # ...if user didn't accept the license elif name.startswith("bst_"): if accept or "--accept-brainstorm-license" in sys.argv: answer = "y" else: # If they don't have stdin, just accept the license # https://github.com/mne-tools/mne-python/issues/8513#issuecomment-726823724 # noqa: E501 answer = _safe_input("%sAgree (y/[n])? " % _bst_license_text, use="y") if answer.lower() != "y": raise RuntimeError("You must agree to the license to use this " "dataset") # downloader & processors download_params = dict(progressbar=True) # use tqdm if name == "fake": download_params["progressbar"] = False if auth is not None: download_params["auth"] = auth if token is not None: download_params["headers"] = {"Authorization": f"token {token}"} downloader = pooch.HTTPDownloader(**download_params) # make mappings from archive names to urls and to checksums urls = dict() registry = dict() for idx, this_name in enumerate(names): this_dataset = dataset_params[idx] archive_name = this_dataset["archive_name"] dataset_url = this_dataset["url"] dataset_hash = this_dataset["hash"] urls[archive_name] = dataset_url registry[archive_name] = dataset_hash # create the download manager fetcher = pooch.create( path=final_path if processor is None else path, base_url="", # Full URLs are given in the `urls` dict. version=None, # Data versioning is decoupled from MNE-Python version. urls=urls, registry=registry, retry_if_failed=2, # 2 retries = 3 total attempts ) # use our logger level for pooch's logger too pooch.get_logger().setLevel(logger.getEffectiveLevel()) for idx in range(len(names)): # fetch and unpack the data archive_name = dataset_params[idx]["archive_name"] fetcher.fetch(fname=archive_name, downloader=downloader, processor=processor) # after unpacking, remove the archive file if processor is not None: os.remove(op.join(path, archive_name)) # remove version number from "misc" and "testing" datasets folder names if name == "misc": rmtree(final_path, ignore_errors=True) os.replace(op.join(path, MISC_VERSIONED), final_path) elif name == "testing": rmtree(final_path, ignore_errors=True) os.replace(op.join(path, TESTING_VERSIONED), final_path) # maybe update the config if config_key is not None: old_name = "brainstorm" if name.startswith("bst_") else name _do_path_update(path, update_path, config_key, old_name) # compare the version of the dataset and mne data_version = _dataset_version(path, name) # 0.7 < 0.7.git should be False, therefore strip if check_version and (LooseVersion(data_version) < LooseVersion( mne_version.strip(".git"))): warn("The {name} dataset (version {current}) is older than " "mne-python (version {newest}). If the examples fail, " "you may need to update the {name} dataset by using " "mne.datasets.{name}.data_path(force_update=True)".format( name=name, current=data_version, newest=mne_version)) return (final_path, data_version) if return_version else final_path