def test_conf_auth(): with temp_conf({ 'auth': { 'class': 'intake.auth.secret.SecretAuth', 'kwargs': { 'secret': 'test' } } }) as fn: env = os.environ.copy() env['INTAKE_CONF_FILE'] = fn with server(env=env, wait=5000): # raw request r = requests.get('http://localhost:5000/v1/info') assert r.status_code == 403 r = requests.get('http://localhost:5000/v1/info', headers={'intake-secret': 'test'}) assert r.ok # with cat with pytest.raises(Exception): intake.Catalog('intake://localhost:5000') cat = intake.Catalog( 'intake://localhost:5000', storage_options={'headers': { 'intake-secret': 'test' }}) assert 'entry1' in cat
def bundle(request, intake_server, example_data, tmp_path): # noqa serializer = Serializer(tmp_path) uid, docs = example_data for name, doc in docs: serializer(name, doc) serializer.close() fullname = os.path.join(TMP_DIR, YAML_FILENAME) with open(fullname, 'w') as f: f.write(f''' plugins: source: - module: intake_bluesky sources: xyz: description: Some imaginary beamline driver: intake_bluesky.msgpack.BlueskyMsgpackCatalog container: catalog args: paths: {[str(path) for path in serializer.artifacts['all']]} handler_registry: NPY_SEQ: ophyd.sim.NumpySeqHandler metadata: beamline: "00-ID" ''') time.sleep(2) if request.param == 'local': cat = intake.Catalog(os.path.join(TMP_DIR, YAML_FILENAME)) elif request.param == 'remote': cat = intake.Catalog(intake_server, page_size=10) else: raise ValueError return types.SimpleNamespace(cat=cat, uid=uid, docs=docs)
def test_yaml_cat(temp_db): table, uri = temp_db os.environ['TEST_SQLITE_URI'] = uri # used in catalog default cat = intake.Catalog(os.path.join(here, 'cat.yaml')) assert 'tables' in cat cat2 = cat.tables() assert isinstance(cat2, SQLCatalog) assert 'temp' in list(cat2) d2 = cat.tables.temp.read() assert df.equals(d2)
def __init__(self, master_catalog_path, output_dir, remote_url_base, actually_load=False): self.master_catalog_path = master_catalog_path self.output_dir = output_dir self.remote_url_base = remote_url_base self.actually_load = actually_load self.cat = intake.Catalog(master_catalog_path)
def bundle(request, intake_server, example_data, db_factory): # noqa fullname = os.path.join(TMP_DIR, YAML_FILENAME) mds_db = db_factory() assets_db = db_factory() serializer = Serializer(mds_db, assets_db) uid, docs = example_data for name, doc in docs: serializer(name, doc) def extract_uri(db): return f'mongodb://{db.client.address[0]}:{db.client.address[1]}/{db.name}' with open(fullname, 'w') as f: f.write(f''' plugins: source: - module: intake_bluesky sources: xyz: description: Some imaginary beamline driver: intake_bluesky.mongo_normalized.BlueskyMongoCatalog container: catalog args: metadatastore_db: {extract_uri(mds_db)} asset_registry_db: {extract_uri(assets_db)} handler_registry: NPY_SEQ: ophyd.sim.NumpySeqHandler metadata: beamline: "00-ID" ''') time.sleep(2) if request.param == 'local': cat = intake.Catalog(os.path.join(TMP_DIR, YAML_FILENAME)) elif request.param == 'remote': cat = intake.Catalog(intake_server, page_size=10) else: raise ValueError return types.SimpleNamespace(cat=cat, uid=uid, docs=docs)
def load(self, variable=None, names=None, depth=5, catfile=DEFAULT_INTAKE_CAT, auth_token=None): """Loads datasets from given parameters. Parameters ---------- variable : str, optional Variable Name. If input files have only one non-coordinate variable, that variable's name is used by default. names : list of str, optional List of dataset names. depth : int, optional Depth of catalog search (default: 5) catfile : str, optional Path to catalogue metadata file, can be a remote URL. The pangeo Intake master catalogue is used by default. auth_token : str, optional Path to credentials key file to use for accessing cloud storage buckets. Returns ------- datasets : list xarray DataArray objects. """ import intake if auth_token: os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = auth_token cat = intake.Catalog(catfile) meta = cat.walk(depth=depth) sel = [name for name, ent in meta.items() if ent.container == 'xarray'] names = sel if not names else names entries = [cat[name] for name in sel] shortnames = [name.split('.')[-1] for name in sel] dset_dict = { name: ent.to_dask() for name, ent in zip(shortnames, entries) if name in names } return self._prep_datasets(variable, dset_dict)
def test_open_styles(tmp_path_catalog): cat = intake.Catalog(tmp_path_catalog) cat2 = intake.open_catalog(tmp_path_catalog) assert list(cat) == list(cat2) cat2 = intake.open_catalog([tmp_path_catalog]) assert list(cat) == list(cat2) cat2 = intake.open_catalog(os.path.join( os.path.dirname(tmp_path_catalog), "*")) assert list(cat) == list(cat2) assert type(cat2).name == 'yaml_files_cat' cat2 = intake.open_catalog(os.path.dirname(tmp_path_catalog)) assert list(cat) == list(cat2) assert type(cat2).name == 'yaml_files_cat' cat2 = intake.open_yaml_file_cat(tmp_path_catalog) assert list(cat) == list(cat2) cat2 = intake.open_yaml_files_cat([tmp_path_catalog]) assert list(cat) == list(cat2) cat2 = intake.open_yaml_files_cat(os.path.join( os.path.dirname(tmp_path_catalog), "*")) assert list(cat) == list(cat2)
def get_master_catalog(): # TODO: replace with environment variable fname = os.path.join(os.path.dirname(__file__), '../intake-catalogs/master.yaml') return intake.Catalog(fname)
source[0] = rendered # https://pypi.python.org/pypi/sphinx-bootstrap-theme/ def setup(app): app.add_stylesheet("pangeo-style.css") # also can be a full URL app.add_stylesheet( "https://netdna.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css" ) app.add_stylesheet("example_gallery_styles_patched.css") app.connect("source-read", rstjinja) import intake catalog = intake.Catalog('../gce/catalog.yaml') # a hack to get our custom people data into sphinx import yaml with open('data/people.yml') as people_data_file: people = yaml.load(people_data_file) people.sort(key=lambda x: x['last_name'].lower()) with open('data/deployments.yml') as deployments_data_file: deployments = yaml.load(deployments_data_file) html_context = { 'people': people, 'deployments': deployments, 'catalog': catalog }
def test_old_usage(): assert isinstance(intake.Catalog(), intake.Catalog) assert intake.Catalog is intake.catalog.base.Catalog
def _find_entries(name, catalog_url): """ Function used by from_catalog to decode xarray or xmitgcm catalogs. It is also used by conf.py in docs to create dataset.rst Parameters ---------- name: str Name of the oceandataset to open. catalog_url: str or None Path from which to read the catalog. If None, use SciServer's catalogs. Returns ------- cat, entries, url, intake_switch """ # Check parameters if catalog_url is None: # pragma: no cover url = ('https://raw.githubusercontent.com/malmans2/oceanspy/' 'master/sciserver_catalogs/datasets_list.yaml') f = _urllib.request.urlopen(url) SCISERVER_DATASETS = _yaml.safe_load(f)['datasets']['sciserver'] if name not in SCISERVER_DATASETS: raise ValueError('[{}] is not available on SciServer.' ' Here is a list of available oceandatasets: {}.' ''.format(name, SCISERVER_DATASETS)) else: _check_instance({'catalog_url': catalog_url}, 'str') # Read catatog try: if catalog_url is None: url = ('https://raw.githubusercontent.com/malmans2/oceanspy/' 'master/sciserver_catalogs/catalog_xarray.yaml') else: url = catalog_url cat = _intake.Catalog(url) entries = [entry for entry in cat if name in entry] if len(entries) == 0: raise ValidationError('', '') intake_switch = True except ValidationError: if catalog_url is None: url = ('https://raw.githubusercontent.com/malmans2/oceanspy/' 'master/sciserver_catalogs/catalog_xmitgcm.yaml') else: url = catalog_url # Is it an url? try: f = _urllib.request.urlopen(url) cat = _yaml.safe_load(f) except ValueError: with open(url) as f: cat = _yaml.safe_load(f) entries = [entry for entry in cat if name in entry] intake_switch = False # Error if not available if len(entries) == 0: raise ValueError('[{}] is not in the catalog.'.format(name)) else: return cat, entries, url, intake_switch
Run this script at the beginning of each month to build new conda downloads badges from the previous month. """ import os from yaml import safe_load import requests import datetime import intake import colorcet as cc import numpy as np here = os.path.abspath(os.path.dirname(__file__)) cache_path = os.path.join(here, '..', 'doc', '_static', 'cache') cat = intake.Catalog( 'https://raw.githubusercontent.com/ContinuumIO/anaconda-package-data/master/catalog/anaconda_package_data.yaml' ) colors = cc.palette_n.rainbow[-20:80:-1] top_of_colormap = 1e6 step = len(colors) / np.log10(top_of_colormap) today = datetime.date.today() first = today.replace(day=1) last_month = first - datetime.timedelta(days=1) try: monthly = cat.anaconda_package_data_by_month( year=last_month.year, month=last_month.month, columns=['pkg_name', 'counts']).to_dask() except:
# The script uses 1/48 deg MITgcm model run to construct smaller files for regional data # Import Modules import xarray as xr import xrft import dask.array as da from dask.distributed import Client, LocalCluster from xgcm import Grid import numpy as np from xmitgcm import llcreader import bottleneck import intake # Load grid data from web cat = intake.Catalog( "https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean/llc4320.yaml" ) dsgrid = cat["LLC4320_grid"].to_dask() dsgrid = llcreader.faces_dataset_to_latlon(dsgrid, metric_vector_pairs=[('dxC', 'dyC'), ('dyG', 'dxG') ]) model = llcreader.ECCOPortalLLC4320Model() #ds = model.get_dataset(varnames=['U','V'], k_levels=[1,3,5,10,30],type='latlon') ds = model.get_dataset(varnames=['U', 'V'], k_levels=[1], type='latlon') #,k_chunksize=5) ds = xr.merge([ds, dsgrid]) ds = ds.isel(time=np.arange(0, 1)) #len(ds.time), 12))
def test_old_usage(): with pytest.warns(UserWarning): intake.Catalog()