def fnirs_data_path(path, subject, accept): datapath = op.join(path, "NIRS", "subject {:02d}".format(subject)) if not op.isfile(op.join(datapath, "mrk.mat")): # fNIRS if not op.isfile(op.join(path, "fNIRS.zip")): if not accept: raise AttributeError( "You must accept licence term to download this dataset," "set accept=True when instanciating the dataset." ) retrieve( "http://doc.ml.tu-berlin.de/hBCI/NIRS/NIRS_01-29.zip", None, fname="fNIRS.zip", path=path, ) if not op.isdir(op.join(path, "NIRS")): os.makedirs(op.join(path, "NIRS")) with z.ZipFile(op.join(path, "fNIRS.zip"), "r") as f: f.extractall(op.join(path, "NIRS")) os.remove(op.join(path, "fNIRS.zip")) return [op.join(datapath, fn) for fn in ["cnt.mat", "mrk.mat"]]
def get_mpisomffn(): url = 'https://www.nodc.noaa.gov/archive/arc0105/0160558/5.5/data/0-data/MPI_SOM-FFN_v2020/spco2_MPI-SOM_FFN_v2020.nc' fname = pooch.retrieve(url, None, fname='MPI-SOMFFN_v2020.nc', path='../data-in/', downloader=pooch.HTTPDownloader(progressbar=True)) xds = xr.open_dataset(fname, drop_variables='date') xda = xds.spco2_raw.resample(time='1MS').mean() xda = xda.rename('mpi_somffn') return xda
def get_mpiulbsomffn(): url = 'https://www.ncei.noaa.gov/data/oceans/ncei/ocads/data/0209633/MPI-ULB-SOM_FFN_clim.nc' fname = pooch.retrieve(url, None, fname='MPIULB-SOMFFN_clim.nc', path='../data-in/', downloader=pooch.HTTPDownloader(progressbar=True)) xds = xr.open_dataset(fname) xda = xds.pco2.where(xds.pco2 > 0).coarsen(lat=4, lon=4).mean() xda = xda.rename('mpiulb_somffn').rename(time='month') return xda
def data_dl(url, sign, path=None, force_update=False, verbose=None): """Download file from url to specified path This function should replace data_path as the MNE will not support the download of dataset anymore. This version is using Pooch. Parameters ---------- url : str Path to remote location of data sign : str Signifier of dataset path : None | str Location of where to look for the data storing location. If None, the environment variable or config parameter ``MNE_DATASETS_(signifier)_PATH`` is used. If it doesn't exist, the "~/mne_data" directory is used. If the dataset is not found under the given path, the data will be automatically downloaded to the specified folder. force_update : bool Force update of the dataset even if a local copy exists. verbose : bool, str, int, or None If not None, override default verbose level (see :func:`mne.verbose`). Returns ------- path : list of str Local path to the given data file. This path is contained inside a list of length one, for compatibility. """ path = get_dataset_path(sign, path) key_dest = "MNE-{:s}-data".format(sign.lower()) destination = _url_to_local_path(url, osp.join(path, key_dest)) # Fetch the file if not osp.isfile(destination) or force_update: if osp.isfile(destination): os.remove(destination) if not osp.isdir(osp.dirname(destination)): os.makedirs(osp.dirname(destination)) known_hash = None else: known_hash = file_hash(destination) dlpath = retrieve(url, known_hash, fname=osp.basename(url), path=osp.dirname(destination)) return dlpath
def get_woa_basins(): url = ( "https://iridl.ldeo.columbia.edu/" "SOURCES/.NOAA/.NODC/.WOA09/.Masks/.basin/data.nc" ) fname = pooch.retrieve(url, None) xda = ( xr.open_dataset(fname) .rename({'X': 'lon', 'Y': 'lat', 'Z': 'depth'}) .transpose('depth', 'lat', 'lon') .basin .assign_coords(lon=(np.arange(0.5, 360) - 180) % 360 - 180) .sortby('lon') .sel(depth=0) .drop('depth')) return xda
def __getitem__(self, item): try: import pooch except ImportError: raise ModuleNotFoundError( "pooch must be installed to load example data" ) try: dataset, known_hash = self.registry[item] except KeyError: raise KeyError(f"'{item}' is not a valid example dataset") return Path( pooch.retrieve(url=self.base_url + dataset, known_hash=known_hash) )
def get_example_data(outdir='./'): """ Get example data sets and configuration files Parameters ---------- outdir : str or Path, optional Location to extract the example files into. They will be put at ``outdir/pyglider-example-data/``. Default is to unpack in the current directory. """ zipfile = pooch.retrieve("https://github.com/c-proof/pyglider-example-data/archive/refs/heads/main.zip", known_hash=None) with ZipFile(zipfile, 'r') as zipObj: # Extract all the contents of zip file in outdir zipObj.extractall(outdir)
def download_and_read_noaa_mbl(noaa_mbl_url): import re import pooch import pandas as pd # save to temporary location with pooch fname = pooch.retrieve(noaa_mbl_url, None) # find start line is_mbl_surface = False for start_line, line in enumerate(open(fname)): if re.findall('MBL.*SURFACE', line): is_mbl_surface = True if not line.startswith('#'): break if not is_mbl_surface: raise Exception( 'The file at the provided url is not an MBL SURFACE file. ' 'Please check that you have provided the surface url. ' ) # read fixed width file CO2 df = pd.read_fwf(fname, skiprows=start_line, header=None, index_col=0) df.index.name = 'date' # every second line is uncertainty df = df.iloc[:, ::2] # latitude is given as sin(lat) df.columns = np.rad2deg(np.arcsin(np.linspace(-1, 1, 41))) # resolve time properly year = (df.index.values - (df.index.values % 1)).astype(int) day_of_year = ((df.index.values - year) * 365 + 1).astype(int) date_strings = ['{}-{:03d}'.format(*a) for a in zip(year, day_of_year)] date = pd.to_datetime(date_strings, format='%Y-%j') df = df.set_index(date) # renaming indexes (have to stack for that) df = df.stack() index = df.index.set_names(['time', 'lat']) df = df.set_axis(index) df.source = noaa_mbl_url return df
def _get_southern_ocean_subregions( url='https://github.com/RECCAP2-ocean/shared-resources/raw/master/regions/RECCAP2_region_masks_all.nc', dest='../data/regions/'): import pooch import xarray as xr import pandas as pd from pathlib import Path as posixpath import itertools fname = pooch.retrieve(url, None, posixpath(url).name, dest) ds = xr.open_dataset(fname) mask = ds.southern atlantic = (((mask.lon > 290) | (mask.lon <= 20)) & (mask > 0)).astype(int) * 1 indian = (((mask.lon > 20) & (mask.lon <= 147)) & (mask > 0)).astype(int) * 2 pacific = (((mask.lon > 147) & (mask.lon <= 290)) & (mask > 0)).astype(int) * 3 mask = xr.Dataset() mask['biomes'] = ds.southern.copy() mask['basins'] = (pacific + atlantic + indian).transpose('lat', 'lon') mask['subregions'] = (mask.basins * 3 + mask.biomes - 3).where(lambda a: a > 0).fillna(0).astype(int) basin = ['ATL', 'IND', 'PAC'] biome = ['STSS', 'SPSS', 'ICE'] names = ['-'.join(l) for l in itertools.product(basin, biome)] mask['names'] = xr.DataArray(names, coords={'idx': range(1, 10)}, dims=('idx')) mask['names'].attrs['description'] = 'Names for the subregions' mask['subregions'].attrs['description'] = '(basins * 3 + biomes - 3)' mask['basins'].attrs[ 'description'] = 'Atlantic = 1, Indian = 2, Pacific = 3' mask['biomes'].attrs[ 'description'] = 'Biomes based on Fay and McKinley (2014), STSS=1, SPSS=2, ICE=3' mask.attrs['source'] = url mask.attrs['date'] = pd.Timestamp.today().strftime('%Y-%m-%d') return mask
def pytest_configure(): fnames = pooch.retrieve( url="https://zenodo.org/record/5832607/files/Data.tar.gz?download=1", processor=Untar(), known_hash= "98b2bfadefa62dd223224c797354f9266b54143c2af3c4b6fe676d8547e7d5ee", ) symlink_args = dict( src=f"{os.path.commonpath(fnames)}", dst="./oceanspy/tests/Data", target_is_directory=True, ) try: print(f"Linking {symlink_args['src']!r} to {symlink_args['dst']!r}") os.symlink(**symlink_args) except FileExistsError: os.unlink("./oceanspy/tests/Data") os.symlink(**symlink_args)
def _data_dl(url, destination, force_update=False, verbose=None): # Code taken from moabb due to problem with ':' occurring in path # On Windows ':' is a forbidden in folder name # moabb/datasets/download.py from pooch import file_hash, retrieve # keep soft depenency if not osp.isfile(destination) or force_update: if osp.isfile(destination): os.remove(destination) if not osp.isdir(osp.dirname(destination)): os.makedirs(osp.dirname(destination)) known_hash = None else: known_hash = file_hash(destination) data_path = retrieve(url, known_hash, fname=osp.basename(url), path=osp.dirname(destination)) return data_path
def get_niesfnn(): url = 'https://ndownloader.figshare.com/files/23907317?private_link=6dfc21bc1a2c51da8081' fname = pooch.retrieve(url, None, fname='NIES-FNN_v2020.nc', path='../data-in/', downloader=pooch.HTTPDownloader(progressbar=True)) xds = xr.open_dataset(fname, drop_variables='date') yymm = np.meshgrid(xds.year, xds.month) years_months = np.c_[([y.flatten() for y in yymm])].T time = [pd.Timestamp(f'{y}-{m}') for y, m in years_months] xda = xr.DataArray(xds.co2.values.reshape(len(time), xds.lat.size, xds.lon.size), coords=dict(time=time, lat=xds.lat, lon=xds.lon), dims=['time', 'lat', 'lon']) return xda
def get_somffn_flux_params(): url = 'https://www.nodc.noaa.gov/archive/arc0105/0160558/5.5/data/0-data/MPI_SOM-FFN_v2020/spco2_MPI-SOM_FFN_v2020.nc' fname = pooch.retrieve(url, None, fname='MPI-SOMFFN_v2020.nc', path='../data-in/', downloader=pooch.HTTPDownloader(progressbar=True)) drop = [ 'date', 'dco2', 'spco2_raw', 'spco2_smoothed', 'fgco2_raw', 'fgco2_smoothed', 'time_bnds', 'lat_bnds', 'lon_bnds' ] xds = xr.open_dataset(fname, drop_variables=drop) attrs = {k: xds[k].attrs for k in xds} xds = xds.resample(time='1MS').mean() for k in xds: xds[k].attrs = attrs[k] xds.attrs = {} return xds
def create_seamask(): from pooch import retrieve from pandas import Timestamp from xarray import open_dataset from numpy import arange date = Timestamp('2010-01-01') url = (f"https://www.ncei.noaa.gov/data/" f"sea-surface-temperature-optimum-interpolation" f"/v2.1/access/avhrr/{date:%Y%m}/" f"oisst-avhrr-v02r01.{date:%Y%m%d}.nc") fname = retrieve(url, None) mask = (open_dataset(fname).sst.isel(time=0, zlev=0).drop([ 'time', 'zlev' ]).interp(lat=arange(-89.5, 90, 1), lon=arange(0.5, 360)).notnull().rename('seamask').assign_attrs( dict(description=( "sea mask based on OISSTv2 coverage on " "2010-01-01 where True is sea and False is land")))) return mask
def get_jenamls(): url = 'http://www.bgc-jena.mpg.de/CarboScope/oc/INVERSION/OUTPUT/oc_v1.7_pCO2_daily.nc' username = '******' password = '******' fname = pooch.retrieve(url, None, fname='Jena-MLS_v1.7_pCO2.nc', path='../data-in/', downloader=pooch.HTTPDownloader(progressbar=True, auth=(username, password))) xds = xr.open_dataset(fname) xda = xds.pCO2.resample(mtime='1MS').mean('mtime') xda = xda.rename("jena_mls") xda = (xda.interp( lat=np.arange(-89.5, 90), lon=np.arange(-179.5, 180), method='nearest').roll(lon=180, roll_coords=False).interpolate_na( 'lon', limit=20).roll(lon=-180, roll_coords=False).rename(mtime='time')) return xda
def get_jmamlr(): url = 'http://www.data.jma.go.jp/gmd/kaiyou/data/english/co2_flux/grid/{name}' xds = [] for t in pd.date_range('1990-01', '2019', freq='1AS', closed='left'): fname = 'JMA_co2map_{t:%Y}.ZIP'.format(t=t) fname = pooch.retrieve( url.format(t=t, name=fname), None, fname=fname, path='../data-in/JMA-MLR/', processor=pooch.Unzip(), downloader=pooch.HTTPDownloader(progressbar=True))[0] xda = xr.open_dataset(fname, decode_times=False).pCO2s y0, y1 = str(t.year), str(t.year + 1) time = pd.date_range(y0, y1, freq='1MS', closed='left') xda = xda.assign_coords(time=time) xds += xda, xda = (xr.concat(xds, dim='time').assign_coords(lon=(xda.lon - 180) % 360 - 180).sortby('lon')) return xda
def open_dataset( name, cache=True, cache_dir=None, *, engine=None, **kws, ): """ Open a dataset from the online repository (requires internet). If a local copy is found then always use that to avoid network traffic. Available datasets: * ``"air_temperature"``: NCEP reanalysis subset * ``"air_temperature_gradient"``: NCEP reanalysis subset with approximate x,y gradients * ``"basin_mask"``: Dataset with ocean basins marked using integers * ``"ASE_ice_velocity"``: MEaSUREs InSAR-Based Ice Velocity of the Amundsen Sea Embayment, Antarctica, Version 1 * ``"rasm"``: Output of the Regional Arctic System Model (RASM) * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output * ``"tiny"``: small synthetic dataset with a 1D data variable * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data * ``"ersstv5"``: NOAA's Extended Reconstructed Sea Surface Temperature monthly averages Parameters ---------- name : str Name of the file containing the dataset. e.g. 'air_temperature' cache_dir : path-like, optional The directory in which to search for and write cached data. cache : bool, optional If True, then cache data locally for use on subsequent calls **kws : dict, optional Passed to xarray.open_dataset See Also -------- tutorial.load_dataset open_dataset load_dataset """ try: import pooch except ImportError as e: raise ImportError( "tutorial.open_dataset depends on pooch to download and manage datasets." " To proceed please install pooch.") from e logger = pooch.get_logger() logger.setLevel("WARNING") cache_dir = _construct_cache_dir(cache_dir) if name in external_urls: url = external_urls[name] else: path = pathlib.Path(name) if not path.suffix: # process the name default_extension = ".nc" if engine is None: _check_netcdf_engine_installed(name) path = path.with_suffix(default_extension) elif path.suffix == ".grib": if engine is None: engine = "cfgrib" url = f"{base_url}/raw/{version}/{path.name}" # retrieve the file filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir) ds = _open_dataset(filepath, engine=engine, **kws) if not cache: ds = ds.load() pathlib.Path(filepath).unlink() return ds
def open_rasterio( name, engine=None, cache=True, cache_dir=None, **kws, ): """ Open a rasterio dataset from the online repository (requires internet). If a local copy is found then always use that to avoid network traffic. Available datasets: * ``"RGB.byte"``: TIFF file derived from USGS Landsat 7 ETM imagery. * ``"shade"``: TIFF file derived from from USGS SRTM 90 data ``RGB.byte`` and ``shade`` are downloaded from the ``rasterio`` repository [1]_. Parameters ---------- name : str Name of the file containing the dataset. e.g. 'RGB.byte' cache_dir : path-like, optional The directory in which to search for and write cached data. cache : bool, optional If True, then cache data locally for use on subsequent calls **kws : dict, optional Passed to xarray.open_rasterio See Also -------- xarray.open_rasterio References ---------- .. [1] https://github.com/rasterio/rasterio """ try: import pooch except ImportError as e: raise ImportError( "tutorial.open_rasterio depends on pooch to download and manage datasets." " To proceed please install pooch.") from e logger = pooch.get_logger() logger.setLevel("WARNING") cache_dir = _construct_cache_dir(cache_dir) url = external_rasterio_urls.get(name) if url is None: raise ValueError(f"unknown rasterio dataset: {name}") # retrieve the file filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir) arr = _open_rasterio(filepath, **kws) if not cache: arr = arr.load() pathlib.Path(filepath).unlink() return arr
import numpy as np import pandas as pd import pooch import tensorflow as tf from pooch import retrieve from rdkit import RDLogger from alfabet import _model_files_baseurl from alfabet.drawing import draw_bde RDLogger.DisableLog('rdApp.*') model_files = retrieve( _model_files_baseurl + 'model.tar.gz', known_hash= 'sha256:f1c2b9436f2d18c76b45d95140e6a08c096250bd5f3e2b412492ca27ab38ad0c', processor=pooch.Untar(extract_dir='model')) model = tf.keras.models.load_model(os.path.dirname(model_files[0])) bde_dft = pd.read_csv( retrieve( _model_files_baseurl + 'bonds_for_neighbors.csv.gz', known_hash= 'sha256:d4fb825c42d790d4b2b4bd5dc2d87c844932e2da82992a31d7521ce51395adb1' )) def validate_inputs(inputs: dict) -> (bool, np.array, np.array): """ Check the given SMILES to ensure it's present in the model's
import pyproj import pooch import numpy as np import xarray as xr import verde as vd import boule as bl import harmonica as hm import matplotlib.pyplot as plt print("Harmonica version: {}".format(hm.__version__)) # Fetch gravity data and DEM data = hm.datasets.fetch_south_africa_gravity() url = "https://github.com/fatiando/transform21/raw/main/data/bushveld_topography.nc" fname = pooch.retrieve(url, known_hash=None, fname="bushveld_topography.nc") topography = xr.load_dataset(fname).bedrock # Project the dataset coordinates projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) easting, northing = projection(data.longitude.values, data.latitude.values) data = data.assign(easting=easting) data = data.assign(northing=northing) # Cut the datasets to a very small region to run the script faster region_deg = (28, 29, -26, -25) inside = vd.inside((data.longitude, data.latitude), region_deg) data = data[inside] topography = topography.sel(longitude=slice(*region_deg[:2]), latitude=slice(*region_deg[2:])) # Compute gravity disturbance
def data_path(url, path=None, force_update=False, update_path=None, *, verbose=None): """Get path to local copy of EEGMMI dataset URL. This is a low-level function useful for getting a local copy of a remote EEGBCI dataset :footcite:`SchalkEtAl2004` which is available at PhysioNet :footcite:`GoldbergerEtAl2000`. Parameters ---------- url : str The dataset to use. path : None | str Location of where to look for the EEGBCI data storing location. If None, the environment variable or config parameter ``MNE_DATASETS_EEGBCI_PATH`` is used. If it doesn't exist, the "~/mne_data" directory is used. If the EEGBCI dataset is not found under the given path, the data will be automatically downloaded to the specified folder. force_update : bool Force update of the dataset even if a local copy exists. update_path : bool | None If True, set the MNE_DATASETS_EEGBCI_PATH in mne-python config to the given path. If None, the user is prompted. %(verbose)s Returns ------- path : list of Path Local path to the given data file. This path is contained inside a list of length one, for compatibility. Notes ----- For example, one could do: >>> from mne.datasets import eegbci >>> url = 'http://www.physionet.org/physiobank/database/eegmmidb/' >>> eegbci.data_path(url, os.getenv('HOME') + '/datasets') # doctest:+SKIP This would download the given EEGBCI data file to the 'datasets' folder, and prompt the user to save the 'datasets' path to the mne-python config, if it isn't there already. References ---------- .. footbibliography:: """ # noqa: E501 import pooch key = 'MNE_DATASETS_EEGBCI_PATH' name = 'EEGBCI' path = _get_path(path, key, name) fname = 'MNE-eegbci-data' destination = _url_to_local_path(url, op.join(path, fname)) destinations = [destination] # Fetch the file if not op.isfile(destination) or force_update: if op.isfile(destination): os.remove(destination) if not op.isdir(op.dirname(destination)): os.makedirs(op.dirname(destination)) pooch.retrieve( # URL to one of Pooch's test files url=url, path=destination, fname=fname ) # Offer to update the path _do_path_update(path, update_path, key, name) destinations = [_mne_path(dest) for dest in destinations] return destinations
if not flipped: atoms = "{}-{}".format( *tuple((bond.GetBeginAtom().GetSymbol(), bond.GetEndAtom().GetSymbol()))) else: atoms = "{}-{}".format( *tuple((bond.GetEndAtom().GetSymbol(), bond.GetBeginAtom().GetSymbol()))) btype = str((bond.GetBondType(), bond.GetIsConjugated())) ring = 'R{}'.format(get_ring_size(bond, max_size=6)) if bond.IsInRing() else '' return " ".join([atoms, btype, ring]).strip() preprocessor = nfp.SmilesBondIndexPreprocessor( atom_features=atom_featurizer, bond_features=bond_featurizer, explicit_hs=True, output_dtype='int64' ) preprocessor.from_json(retrieve( _model_files_baseurl + 'preprocessor.json', known_hash='412d15ca4d0e8b5030e9b497f566566922818ff355b8ee677a91dd23696878ac')) def get_features(smiles: str, **kwargs) -> dict: return preprocessor(smiles, train=False, **kwargs)
def _update_sleep_temazepam_records(fname=TEMAZEPAM_SLEEP_RECORDS): """Help function to download Physionet's temazepam dataset records.""" import pooch pd = _check_pandas_installed() tmp = _TempDir() # Download subjects info. subjects_fname = op.join(tmp, 'ST-subjects.xls') pooch.retrieve(url=TEMAZEPAM_RECORDS_URL, known_hash=f"sha1:{TEMAZEPAM_RECORDS_URL_SHA1}", path=tmp, fname=op.basename(subjects_fname)) # Load and Massage the checksums. sha1_df = pd.read_csv(sha1sums_fname, sep=' ', header=None, names=['sha', 'fname'], engine='python') select_age_records = (sha1_df.fname.str.startswith('ST') & sha1_df.fname.str.endswith('edf')) sha1_df = sha1_df[select_age_records] sha1_df['id'] = [name[:6] for name in sha1_df.fname] # Load and massage the data. data = pd.read_excel(subjects_fname, header=[0, 1]) data = data.set_index(('Subject - age - sex', 'Nr')) data.index.name = 'subject' data.columns.names = [None, None] data = (data.set_index([('Subject - age - sex', 'Age'), ('Subject - age - sex', 'M1/F2')], append=True).stack(level=0).reset_index()) data = data.rename( columns={ ('Subject - age - sex', 'Age'): 'age', ('Subject - age - sex', 'M1/F2'): 'sex', 'level_3': 'drug' }) data['id'] = [ 'ST7{:02d}{:1d}'.format(s, n) for s, n in zip(data.subject, data['night nr']) ] data = pd.merge(sha1_df, data, how='outer', on='id') data['record type'] = (data.fname.str.split('-', expand=True)[1].str.split( '.', expand=True)[0].astype('category')) data = data.set_index([ 'id', 'subject', 'age', 'sex', 'drug', 'lights off', 'night nr', 'record type' ]).unstack() data.columns = [l1 + '_' + l2 for l1, l2 in data.columns] data = data.reset_index().drop(columns=['id']) data['sex'] = (data.sex.astype('category').cat.rename_categories({ 1: 'male', 2: 'female' })) data['drug'] = data['drug'].str.split(expand=True)[0] data['subject_orig'] = data['subject'] data['subject'] = data.index // 2 # to make sure index is from 0 to 21 # Save the data. data.to_csv(fname, index=False)
def test_constants(tmp_path): """Test compensation.""" tmp_path = str(tmp_path) # old pytest... fname = 'fiff.zip' dest = op.join(tmp_path, fname) pooch.retrieve(url='https://codeload.github.com/' f'{REPO}/fiff-constants/zip/{COMMIT}', path=tmp_path, fname=fname, known_hash=None) names = list() with zipfile.ZipFile(dest, 'r') as ff: for name in ff.namelist(): if 'Dictionary' in name: ff.extract(name, tmp_path) names.append(op.basename(name)) shutil.move(op.join(tmp_path, name), op.join(tmp_path, names[-1])) names = sorted(names) assert names == [ 'DictionaryIOD.txt', 'DictionaryIOD_MNE.txt', 'DictionaryStructures.txt', 'DictionaryTags.txt', 'DictionaryTags_MNE.txt', 'DictionaryTypes.txt', 'DictionaryTypes_MNE.txt' ] # IOD (MEGIN and MNE) fif = dict(iod=dict(), tags=dict(), types=dict(), defines=dict()) con = dict(iod=dict(), tags=dict(), types=dict(), defines=dict()) fiff_version = None for name in ['DictionaryIOD.txt', 'DictionaryIOD_MNE.txt']: with open(op.join(tmp_path, name), 'rb') as fid: for line in fid: line = line.decode('latin1').strip() if line.startswith('# Packing revision'): assert fiff_version is None fiff_version = line.split()[-1] if (line.startswith('#') or line.startswith('alias') or len(line) == 0): continue line = line.split('"') assert len(line) in (1, 2, 3) desc = '' if len(line) == 1 else line[1] line = line[0].split() assert len(line) in (2, 3) if len(line) == 2: kind, id_ = line else: kind, id_, tagged = line assert tagged in ('tagged', ) id_ = int(id_) if id_ not in iod_dups: assert id_ not in fif['iod'] fif['iod'][id_] = [kind, desc] # Tags (MEGIN) with open(op.join(tmp_path, 'DictionaryTags.txt'), 'rb') as fid: for line in fid: line = line.decode('ISO-8859-1').strip() if (line.startswith('#') or line.startswith('alias') or line.startswith(':') or len(line) == 0): continue line = line.split('"') assert len(line) in (1, 2, 3), line desc = '' if len(line) == 1 else line[1] line = line[0].split() assert len(line) == 4, line kind, id_, dtype, unit = line id_ = int(id_) val = [kind, dtype, unit] assert id_ not in fif['tags'], (fif['tags'].get(id_), val) fif['tags'][id_] = val # Tags (MNE) with open(op.join(tmp_path, 'DictionaryTags_MNE.txt'), 'rb') as fid: for li, line in enumerate(fid): line = line.decode('ISO-8859-1').strip() # ignore continuation lines (*) if (line.startswith('#') or line.startswith('alias') or line.startswith(':') or line.startswith('*') or len(line) == 0): continue # weird syntax around line 80: if line in ('/*', '"'): continue line = line.split('"') assert len(line) in (1, 2, 3), line if len(line) == 3 and len(line[2]) > 0: l2 = line[2].strip() assert l2.startswith('/*') and l2.endswith('*/'), l2 desc = '' if len(line) == 1 else line[1] line = line[0].split() assert len(line) == 3, (li + 1, line) kind, id_, dtype = line unit = '-' id_ = int(id_) val = [kind, dtype, unit] if id_ not in tag_dups: assert id_ not in fif['tags'], (fif['tags'].get(id_), val) fif['tags'][id_] = val # Types and enums in_ = None re_prim = re.compile(r'^primitive\((.*)\)\s*(\S*)\s*"(.*)"$') re_enum = re.compile(r'^enum\((\S*)\)\s*".*"$') re_enum_entry = re.compile(r'\s*(\S*)\s*(\S*)\s*"(.*)"$') re_defi = re.compile(r'#define\s*(\S*)\s*(\S*)\s*"(.*)"$') used_enums = list() for extra in ('', '_MNE'): with open(op.join(tmp_path, 'DictionaryTypes%s.txt' % (extra, )), 'rb') as fid: for li, line in enumerate(fid): line = line.decode('ISO-8859-1').strip() if in_ is None: p = re_prim.match(line) e = re_enum.match(line) d = re_defi.match(line) if p is not None: t, s, d = p.groups() s = int(s) assert s not in fif['types'] fif['types'][s] = [t, d] elif e is not None: # entering an enum this_enum = e.group(1) if this_enum not in fif: used_enums.append(this_enum) fif[this_enum] = dict() con[this_enum] = dict() in_ = fif[this_enum] elif d is not None: t, s, d = d.groups() s = int(s) fif['defines'][t] = [s, d] else: assert not line.startswith('enum(') else: # in an enum if line == '{': continue elif line == '}': in_ = None continue t, s, d = re_enum_entry.match(line).groups() s = int(s) if t != 'ecg' and s != 3: # ecg defined the same way assert s not in in_ in_[s] = [t, d] # # Assertions # # Version mne_version = '%d.%d' % (FIFF.FIFFC_MAJOR_VERSION, FIFF.FIFFC_MINOR_VERSION) assert fiff_version == mne_version unknowns = list() # Assert that all our constants are in the FIF def assert 'FIFFV_SSS_JOB_NOTHING' in dir(FIFF) for name in sorted(dir(FIFF)): if name.startswith('_') or name in _dir_ignore_names: continue check = None val = getattr(FIFF, name) if name in fif['defines']: assert fif['defines'][name][0] == val elif name.startswith('FIFFC_'): # Checked above assert name in ('FIFFC_MAJOR_VERSION', 'FIFFC_MINOR_VERSION', 'FIFFC_VERSION') elif name.startswith('FIFFB_'): check = 'iod' elif name.startswith('FIFFT_'): check = 'types' elif name.startswith('FIFFV_'): if name.startswith('FIFFV_MNE_') and name.endswith('_ORI'): check = 'mne_ori' elif name.startswith('FIFFV_MNE_') and name.endswith('_COV'): check = 'covariance_type' elif name.startswith('FIFFV_MNE_COORD'): check = 'coord' # weird wrapper elif name.endswith('_CH') or '_QUAT_' in name or name in \ ('FIFFV_DIPOLE_WAVE', 'FIFFV_GOODNESS_FIT', 'FIFFV_HPI_ERR', 'FIFFV_HPI_G', 'FIFFV_HPI_MOV'): check = 'ch_type' elif name.startswith('FIFFV_SUBJ_'): check = name.split('_')[2].lower() elif name in ('FIFFV_POINT_LPA', 'FIFFV_POINT_NASION', 'FIFFV_POINT_RPA', 'FIFFV_POINT_INION'): check = 'cardinal_point' else: for check in used_enums: if name.startswith('FIFFV_' + check.upper()): break else: if name not in _tag_ignore_names: raise RuntimeError('Could not find %s' % (name, )) assert check in used_enums, name if 'SSS' in check: raise RuntimeError elif name.startswith('FIFF_UNIT'): # units and multipliers check = name.split('_')[1].lower() elif name.startswith('FIFF_'): check = 'tags' else: unknowns.append((name, val)) if check is not None and name not in _tag_ignore_names: assert val in fif[check], '%s: %s, %s' % (check, val, name) if val in con[check]: msg = "%s='%s' ?" % (name, con[check][val]) assert _aliases.get(name) == con[check][val], msg else: con[check][val] = name unknowns = '\n\t'.join('%s (%s)' % u for u in unknowns) assert len(unknowns) == 0, 'Unknown types\n\t%s' % unknowns # Assert that all the FIF defs are in our constants assert set(fif.keys()) == set(con.keys()) for key in sorted(set(fif.keys()) - {'defines'}): this_fif, this_con = fif[key], con[key] assert len(set(this_fif.keys())) == len(this_fif) assert len(set(this_con.keys())) == len(this_con) missing_from_con = sorted(set(this_con.keys()) - set(this_fif.keys())) assert missing_from_con == [], key if key not in _ignore_incomplete_enums: missing_from_fif = sorted( set(this_fif.keys()) - set(this_con.keys())) assert missing_from_fif == [], key # Assert that `coil_def.dat` has accurate descriptions of all enum(coil) coil_def = _read_coil_defs() coil_desc = np.array([c['desc'] for c in coil_def]) coil_def = np.array([(c['coil_type'], c['accuracy']) for c in coil_def], int) mask = (coil_def[:, 1] == FWD.COIL_ACCURACY_ACCURATE) coil_def = coil_def[mask, 0] coil_desc = coil_desc[mask] bad_list = [] for key in fif['coil']: if key not in _missing_coil_def and key not in coil_def: bad_list.append((' %s,' % key).ljust(10) + ' # ' + fif['coil'][key][1]) assert len(bad_list) == 0, \ '\nIn fiff-constants, missing from coil_def:\n' + '\n'.join(bad_list) # Assert that enum(coil) has all `coil_def.dat` entries for key, desc in zip(coil_def, coil_desc): if key not in fif['coil']: bad_list.append((' %s,' % key).ljust(10) + ' # ' + desc) assert len(bad_list) == 0, \ 'In coil_def, missing from fiff-constants:\n' + '\n'.join(bad_list)
def default_absorbers( Tatm, ozone_file='apeozone_cam3_5_54.nc', verbose=True, ): '''Initialize a dictionary of well-mixed radiatively active gases All values are volumetric mixing ratios. Ozone is set to a climatology. All other gases are assumed well-mixed: - CO2 - CH4 - N2O - O2 - CFC11 - CFC12 - CFC22 - CCL4 Specific values are based on the AquaPlanet Experiment protocols, except for O2 which is set the realistic value 0.21 (affects the RRTMG scheme). ''' absorber_vmr = {} absorber_vmr['CO2'] = 348. / 1E6 absorber_vmr['CH4'] = 1650. / 1E9 absorber_vmr['N2O'] = 306. / 1E9 absorber_vmr['O2'] = 0.21 absorber_vmr['CFC11'] = 0. absorber_vmr['CFC12'] = 0. absorber_vmr['CFC22'] = 0. absorber_vmr['CCL4'] = 0. # Ozone: start with all zeros, interpolate to data if we can xTatm = Tatm.to_xarray() O3 = 0. * xTatm if ozone_file is not None: ozonepath_http = _datapath_http + 'ozone/' + ozone_file ozonefilehandle = pooch.retrieve( url=ozonepath_http, known_hash= "bc659bfa129fafa4ed9368bb19278ae15724a5a66599affd317c143ba511ff84") ozonedata = xr.open_dataset(ozonefilehandle) ## zonal and time average ozone_zon = ozonedata.OZONE.mean(dim=('time', 'lon')).transpose('lat', 'lev') if ('lat' in xTatm.dims): O3source = ozone_zon else: weight = np.cos(np.deg2rad(ozonedata.lat)) ozone_global = (ozone_zon * weight).mean(dim='lat') / weight.mean(dim='lat') O3source = ozone_global try: O3 = O3source.interp_like(xTatm) # There will be NaNs for gridpoints outside the ozone file domain assert not np.any(np.isnan(O3)) except: warnings.warn( 'Some grid points are beyond the bounds of the ozone file. Ozone values will be extrapolated.' ) try: # passing fill_value='extrapolate' to the underlying scipy interpolator # will result in extrapolation instead of NaNs O3 = O3source.interp_like(xTatm, kwargs={'fill_value': 'extrapolate'}) assert not np.any(np.isnan(O3)) except: warnings.warn( 'Interpolation of ozone data failed. Setting O3 to zero instead.' ) O3 = 0. * xTatm absorber_vmr['O3'] = O3.values return absorber_vmr
def open_dataset( name, engine=None, cache=True, cache_dir=None, **kws, ): """ Open a dataset from the online repository (requires internet). If a local copy is found then always use that to avoid network traffic. Parameters ---------- name : str Name of the file containing the dataset. e.g. 'air_temperature' engine : str, optional The engine to use. cache_dir : path-like, optional The directory in which to search for and write cached data. cache : bool, optional If True, then cache data locally for use on subsequent calls kws : dict, optional Passed to xarray.open_dataset Notes ----- Available datasets: * ``"air_temperature"`` * ``"rasm"`` * ``"ROMS_example"`` * ``"tiny"`` * ``"era5-2mt-2019-03-uk.grib"`` * ``"RGB.byte"``: example rasterio file from https://github.com/mapbox/rasterio See Also -------- xarray.open_dataset """ try: import pooch except ImportError: raise ImportError("using the tutorial data requires pooch") if isinstance(cache_dir, pathlib.Path): cache_dir = os.fspath(cache_dir) elif cache_dir is None: cache_dir = pooch.os_cache(_default_cache_dir_name) if name in external_urls: engine_, url = external_urls[name] if engine is None: engine = engine_ else: # process the name default_extension = ".nc" path = pathlib.Path(name) if not path.suffix: path = path.with_suffix(default_extension) url = f"{base_url}/raw/{version}/{path.name}" _open = overrides.get(engine, _open_dataset) # retrieve the file filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir) ds = _open(filepath, engine=engine, **kws) if not cache: ds = ds.load() pathlib.Path(filepath).unlink() return ds
def open_dataset( name, cache=True, cache_dir=None, **kws, ): """ Open a dataset from the online repository (requires internet). If a local copy is found then always use that to avoid network traffic. Available datasets: * ``"air_temperature"``: NCEP reanalysis subset * ``"rasm"``: Output of the Regional Arctic System Model (RASM) * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output * ``"tiny"``: small synthetic dataset with a 1D data variable * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data Parameters ---------- name : str Name of the file containing the dataset. e.g. 'air_temperature' cache_dir : path-like, optional The directory in which to search for and write cached data. cache : bool, optional If True, then cache data locally for use on subsequent calls **kws : dict, optional Passed to xarray.open_dataset See Also -------- xarray.open_dataset """ try: import pooch except ImportError: raise ImportError("using the tutorial data requires pooch") logger = pooch.get_logger() logger.setLevel("WARNING") cache_dir = _construct_cache_dir(cache_dir) if name in external_urls: url = external_urls[name] else: # process the name default_extension = ".nc" path = pathlib.Path(name) if not path.suffix: path = path.with_suffix(default_extension) url = f"{base_url}/raw/{version}/{path.name}" # retrieve the file filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir) ds = _open_dataset(filepath, **kws) if not cache: ds = ds.load() pathlib.Path(filepath).unlink() return ds
def fetch_hcp_mmp_parcellation(subjects_dir=None, combine=True, *, accept=False, verbose=None): """Fetch the HCP-MMP parcellation. This will download and install the HCP-MMP parcellation :footcite:`GlasserEtAl2016` files for FreeSurfer's fsaverage :footcite:`Mills2016` to the specified directory. Parameters ---------- subjects_dir : str | None The subjects directory to use. The file will be placed in ``subjects_dir + '/fsaverage/label'``. combine : bool If True, also produce the combined/reduced set of 23 labels per hemisphere as ``HCPMMP1_combined.annot`` :footcite:`GlasserEtAl2016supp`. %(accept)s %(verbose)s Notes ----- Use of this parcellation is subject to terms of use on the `HCP-MMP webpage <https://balsa.wustl.edu/WN56>`_. References ---------- .. footbibliography:: """ import pooch subjects_dir = get_subjects_dir(subjects_dir, raise_error=True) destination = op.join(subjects_dir, 'fsaverage', 'label') fnames = [ op.join(destination, '%s.HCPMMP1.annot' % hemi) for hemi in ('lh', 'rh') ] urls = dict(lh='https://ndownloader.figshare.com/files/5528816', rh='https://ndownloader.figshare.com/files/5528819') hashes = dict(lh='46a102b59b2fb1bb4bd62d51bf02e975', rh='75e96b331940227bbcb07c1c791c2463') if not all(op.isfile(fname) for fname in fnames): if accept or '--accept-hcpmmp-license' in sys.argv: answer = 'y' else: answer = _safe_input('%s\nAgree (y/[n])? ' % _hcp_mmp_license_text) if answer.lower() != 'y': raise RuntimeError('You must agree to the license to use this ' 'dataset') for hemi, fpath in zip(('lh', 'rh'), fnames): if not op.isfile(fpath): fname = op.basename(fpath) pooch.retrieve(url=urls[hemi], known_hash=f"md5:{hashes[hemi]}", path=destination, fname=fname) if combine: fnames = [ op.join(destination, '%s.HCPMMP1_combined.annot' % hemi) for hemi in ('lh', 'rh') ] if all(op.isfile(fname) for fname in fnames): return # otherwise, let's make them logger.info('Creating combined labels') groups = OrderedDict([ ('Primary Visual Cortex (V1)', ('V1', )), ('Early Visual Cortex', ('V2', 'V3', 'V4')), ('Dorsal Stream Visual Cortex', ('V3A', 'V3B', 'V6', 'V6A', 'V7', 'IPS1')), ('Ventral Stream Visual Cortex', ('V8', 'VVC', 'PIT', 'FFC', 'VMV1', 'VMV2', 'VMV3')), ('MT+ Complex and Neighboring Visual Areas', ('V3CD', 'LO1', 'LO2', 'LO3', 'V4t', 'FST', 'MT', 'MST', 'PH')), ('Somatosensory and Motor Cortex', ('4', '3a', '3b', '1', '2')), ('Paracentral Lobular and Mid Cingulate Cortex', ( '24dd', '24dv', '6mp', '6ma', 'SCEF', '5m', '5L', '5mv', )), ('Premotor Cortex', ('55b', '6d', '6a', 'FEF', '6v', '6r', 'PEF')), ('Posterior Opercular Cortex', ('43', 'FOP1', 'OP4', 'OP1', 'OP2-3', 'PFcm')), ('Early Auditory Cortex', ('A1', 'LBelt', 'MBelt', 'PBelt', 'RI')), ('Auditory Association Cortex', ( 'A4', 'A5', 'STSdp', 'STSda', 'STSvp', 'STSva', 'STGa', 'TA2', )), ('Insular and Frontal Opercular Cortex', ('52', 'PI', 'Ig', 'PoI1', 'PoI2', 'FOP2', 'FOP3', 'MI', 'AVI', 'AAIC', 'Pir', 'FOP4', 'FOP5')), ('Medial Temporal Cortex', ( 'H', 'PreS', 'EC', 'PeEc', 'PHA1', 'PHA2', 'PHA3', )), ('Lateral Temporal Cortex', ( 'PHT', 'TE1p', 'TE1m', 'TE1a', 'TE2p', 'TE2a', 'TGv', 'TGd', 'TF', )), ('Temporo-Parieto-Occipital Junction', ( 'TPOJ1', 'TPOJ2', 'TPOJ3', 'STV', 'PSL', )), ('Superior Parietal Cortex', ( 'LIPv', 'LIPd', 'VIP', 'AIP', 'MIP', '7PC', '7AL', '7Am', '7PL', '7Pm', )), ('Inferior Parietal Cortex', ( 'PGp', 'PGs', 'PGi', 'PFm', 'PF', 'PFt', 'PFop', 'IP0', 'IP1', 'IP2', )), ('Posterior Cingulate Cortex', ( 'DVT', 'ProS', 'POS1', 'POS2', 'RSC', 'v23ab', 'd23ab', '31pv', '31pd', '31a', '23d', '23c', 'PCV', '7m', )), ('Anterior Cingulate and Medial Prefrontal Cortex', ( '33pr', 'p24pr', 'a24pr', 'p24', 'a24', 'p32pr', 'a32pr', 'd32', 'p32', 's32', '8BM', '9m', '10v', '10r', '25', )), ('Orbital and Polar Frontal Cortex', ( '47s', '47m', 'a47r', '11l', '13l', 'a10p', 'p10p', '10pp', '10d', 'OFC', 'pOFC', )), ('Inferior Frontal Cortex', ( '44', '45', 'IFJp', 'IFJa', 'IFSp', 'IFSa', '47l', 'p47r', )), ('DorsoLateral Prefrontal Cortex', ( '8C', '8Av', 'i6-8', 's6-8', 'SFL', '8BL', '9p', '9a', '8Ad', 'p9-46v', 'a9-46v', '46', '9-46d', )), ('???', ('???', )) ]) assert len(groups) == 23 labels_out = list() for hemi in ('lh', 'rh'): labels = read_labels_from_annot('fsaverage', 'HCPMMP1', hemi=hemi, subjects_dir=subjects_dir, sort=False) label_names = [ '???' if label.name.startswith('???') else label.name.split('_')[1] for label in labels ] used = np.zeros(len(labels), bool) for key, want in groups.items(): assert '\t' not in key these_labels = [ li for li, label_name in enumerate(label_names) if label_name in want ] assert not used[these_labels].any() assert len(these_labels) == len(want) used[these_labels] = True these_labels = [labels[li] for li in these_labels] # take a weighted average to get the color # (here color == task activation) w = np.array([len(label.vertices) for label in these_labels]) w = w / float(w.sum()) color = np.dot(w, [label.color for label in these_labels]) these_labels = sum(these_labels, Label([], subject='fsaverage', hemi=hemi)) these_labels.name = key these_labels.color = color labels_out.append(these_labels) assert used.all() assert len(labels_out) == 46 for hemi, side in (('lh', 'left'), ('rh', 'right')): table_name = './%s.fsaverage164.label.gii' % (side, ) write_labels_to_annot(labels_out, 'fsaverage', 'HCPMMP1_combined', hemi=hemi, subjects_dir=subjects_dir, sort=False, table_name=table_name)
def test_load_model_compressed_remote_fail(): with pytest.raises(Exception): model_file = pooch.retrieve(url="https://nowhere.zip", known_hash=None) geo_model = gp.load_model(name='error', path=model_file)
def read_noaa_mbl_url(noaa_mbl_url, dest): """Downloads url and reads in the MBL surface file Args: noaa_mbl_url (str): the address for the noaa surface file dest (str): the destination to which the raw file will be saved Returns: pd.Series: multindexed series of xCO2 with (time, lat) as coords. """ import re from pathlib import Path import numpy as np import pandas as pd import pooch # save to temporary location with pooch print( f"[SeaFlux] Downloading {noaa_mbl_url} to {dest} and reading in as pd.DataFrame" ) dest = Path(dest) fname = pooch.retrieve( url=noaa_mbl_url, known_hash=None, path=str(dest.parent), fname=str(dest.name), ) # find start line is_mbl_surface = False for start_line, line in enumerate(open(fname)): if re.findall("MBL.*SURFACE", line): is_mbl_surface = True if not line.startswith("#"): break if not is_mbl_surface: raise Exception( "The file at the provided url is not an MBL SURFACE file. " "Please check that you have provided the surface url. " ) # read fixed width file CO2 df = pd.read_fwf(fname, skiprows=start_line, header=None, index_col=0) df.index.name = "date" # every second line is uncertainty df = df.iloc[:, ::2] # latitude is given as sin(lat) df.columns = np.rad2deg(np.arcsin(np.linspace(-1, 1, 41))) # resolve time properly year = (df.index.values - (df.index.values % 1)).astype(int) day_of_year = ((df.index.values - year) * 365 + 1).astype(int) date_strings = ["{}-{:03d}".format(*a) for a in zip(year, day_of_year)] date = pd.to_datetime(date_strings, format="%Y-%j") df = df.set_index(date) df = df.iloc[:-1] # remove the last value that is for 2020-01-01 # renaming indexes (have to stack for that) df = df.stack() index = df.index.set_names(["time", "lat"]) df = df.set_axis(index) df.source = noaa_mbl_url return df