Exemple #1
0
def test_conf_auth():
    with temp_conf({
            'auth': {
                'class': 'intake.auth.secret.SecretAuth',
                'kwargs': {
                    'secret': 'test'
                }
            }
    }) as fn:
        env = os.environ.copy()
        env['INTAKE_CONF_FILE'] = fn
        with server(env=env, wait=5000):
            # raw request
            r = requests.get('http://localhost:5000/v1/info')
            assert r.status_code == 403
            r = requests.get('http://localhost:5000/v1/info',
                             headers={'intake-secret': 'test'})
            assert r.ok

            # with cat
            with pytest.raises(Exception):
                intake.Catalog('intake://localhost:5000')

            cat = intake.Catalog(
                'intake://localhost:5000',
                storage_options={'headers': {
                    'intake-secret': 'test'
                }})
            assert 'entry1' in cat
Exemple #2
0
def bundle(request, intake_server, example_data, tmp_path):  # noqa
    serializer = Serializer(tmp_path)
    uid, docs = example_data
    for name, doc in docs:
        serializer(name, doc)
    serializer.close()

    fullname = os.path.join(TMP_DIR, YAML_FILENAME)
    with open(fullname, 'w') as f:
        f.write(f'''
plugins:
  source:
    - module: intake_bluesky
sources:
  xyz:
    description: Some imaginary beamline
    driver: intake_bluesky.msgpack.BlueskyMsgpackCatalog
    container: catalog
    args:
      paths: {[str(path) for path in serializer.artifacts['all']]}
      handler_registry:
        NPY_SEQ: ophyd.sim.NumpySeqHandler
    metadata:
      beamline: "00-ID"
        ''')

    time.sleep(2)

    if request.param == 'local':
        cat = intake.Catalog(os.path.join(TMP_DIR, YAML_FILENAME))
    elif request.param == 'remote':
        cat = intake.Catalog(intake_server, page_size=10)
    else:
        raise ValueError
    return types.SimpleNamespace(cat=cat, uid=uid, docs=docs)
def test_yaml_cat(temp_db):
    table, uri = temp_db
    os.environ['TEST_SQLITE_URI'] = uri  # used in catalog default
    cat = intake.Catalog(os.path.join(here, 'cat.yaml'))
    assert 'tables' in cat
    cat2 = cat.tables()
    assert isinstance(cat2, SQLCatalog)
    assert 'temp' in list(cat2)
    d2 = cat.tables.temp.read()
    assert df.equals(d2)
 def __init__(self,
              master_catalog_path,
              output_dir,
              remote_url_base,
              actually_load=False):
     self.master_catalog_path = master_catalog_path
     self.output_dir = output_dir
     self.remote_url_base = remote_url_base
     self.actually_load = actually_load
     self.cat = intake.Catalog(master_catalog_path)
def bundle(request, intake_server, example_data, db_factory):  # noqa
    fullname = os.path.join(TMP_DIR, YAML_FILENAME)
    mds_db = db_factory()
    assets_db = db_factory()
    serializer = Serializer(mds_db, assets_db)
    uid, docs = example_data
    for name, doc in docs:
        serializer(name, doc)

    def extract_uri(db):
        return f'mongodb://{db.client.address[0]}:{db.client.address[1]}/{db.name}'

    with open(fullname, 'w') as f:
        f.write(f'''
plugins:
  source:
    - module: intake_bluesky
sources:
  xyz:
    description: Some imaginary beamline
    driver: intake_bluesky.mongo_normalized.BlueskyMongoCatalog
    container: catalog
    args:
      metadatastore_db: {extract_uri(mds_db)}
      asset_registry_db: {extract_uri(assets_db)}
      handler_registry:
        NPY_SEQ: ophyd.sim.NumpySeqHandler
    metadata:
      beamline: "00-ID"
        ''')

    time.sleep(2)

    if request.param == 'local':
        cat = intake.Catalog(os.path.join(TMP_DIR, YAML_FILENAME))
    elif request.param == 'remote':
        cat = intake.Catalog(intake_server, page_size=10)
    else:
        raise ValueError
    return types.SimpleNamespace(cat=cat, uid=uid, docs=docs)
Exemple #6
0
    def load(self,
             variable=None,
             names=None,
             depth=5,
             catfile=DEFAULT_INTAKE_CAT,
             auth_token=None):
        """Loads datasets from given parameters.

        Parameters
        ----------
        variable : str, optional
            Variable Name. If input files have only one non-coordinate variable,
            that variable's name is used by default.
        names : list of str, optional
            List of dataset names.
        depth : int, optional
            Depth of catalog search (default: 5)
        catfile : str, optional
            Path to catalogue metadata file, can be a remote URL. The pangeo
            Intake master catalogue is used by default.
        auth_token : str, optional
            Path to credentials key file to use for accessing cloud storage
            buckets.

        Returns
        -------
        datasets : list
            xarray DataArray objects.
        """
        import intake
        if auth_token:
            os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = auth_token
        cat = intake.Catalog(catfile)
        meta = cat.walk(depth=depth)
        sel = [name for name, ent in meta.items() if ent.container == 'xarray']
        names = sel if not names else names
        entries = [cat[name] for name in sel]
        shortnames = [name.split('.')[-1] for name in sel]
        dset_dict = {
            name: ent.to_dask()
            for name, ent in zip(shortnames, entries) if name in names
        }
        return self._prep_datasets(variable, dset_dict)
Exemple #7
0
def test_open_styles(tmp_path_catalog):
    cat = intake.Catalog(tmp_path_catalog)
    cat2 = intake.open_catalog(tmp_path_catalog)
    assert list(cat) == list(cat2)
    cat2 = intake.open_catalog([tmp_path_catalog])
    assert list(cat) == list(cat2)
    cat2 = intake.open_catalog(os.path.join(
        os.path.dirname(tmp_path_catalog), "*"))
    assert list(cat) == list(cat2)
    assert type(cat2).name == 'yaml_files_cat'
    cat2 = intake.open_catalog(os.path.dirname(tmp_path_catalog))
    assert list(cat) == list(cat2)
    assert type(cat2).name == 'yaml_files_cat'
    cat2 = intake.open_yaml_file_cat(tmp_path_catalog)
    assert list(cat) == list(cat2)
    cat2 = intake.open_yaml_files_cat([tmp_path_catalog])
    assert list(cat) == list(cat2)
    cat2 = intake.open_yaml_files_cat(os.path.join(
        os.path.dirname(tmp_path_catalog), "*"))
    assert list(cat) == list(cat2)
Exemple #8
0
def get_master_catalog():
    # TODO: replace with environment variable
    fname = os.path.join(os.path.dirname(__file__),
                         '../intake-catalogs/master.yaml')
    return intake.Catalog(fname)
Exemple #9
0
    source[0] = rendered


# https://pypi.python.org/pypi/sphinx-bootstrap-theme/
def setup(app):
    app.add_stylesheet("pangeo-style.css")  # also can be a full URL
    app.add_stylesheet(
        "https://netdna.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css"
    )
    app.add_stylesheet("example_gallery_styles_patched.css")
    app.connect("source-read", rstjinja)


import intake

catalog = intake.Catalog('../gce/catalog.yaml')

# a hack to get our custom people data into sphinx
import yaml
with open('data/people.yml') as people_data_file:
    people = yaml.load(people_data_file)
people.sort(key=lambda x: x['last_name'].lower())

with open('data/deployments.yml') as deployments_data_file:
    deployments = yaml.load(deployments_data_file)

html_context = {
    'people': people,
    'deployments': deployments,
    'catalog': catalog
}
Exemple #10
0
def test_old_usage():
    assert isinstance(intake.Catalog(), intake.Catalog)
    assert intake.Catalog is intake.catalog.base.Catalog
def _find_entries(name, catalog_url):
    """
    Function used by from_catalog to decode xarray or xmitgcm catalogs.
    It is also used by conf.py in docs to create dataset.rst

    Parameters
    ----------
    name: str
        Name of the oceandataset to open.
    catalog_url: str or None
        Path from which to read the catalog.
        If None, use SciServer's catalogs.

    Returns
    -------
    cat, entries, url, intake_switch
    """
    # Check parameters
    if catalog_url is None:  # pragma: no cover
        url = ('https://raw.githubusercontent.com/malmans2/oceanspy/'
               'master/sciserver_catalogs/datasets_list.yaml')
        f = _urllib.request.urlopen(url)
        SCISERVER_DATASETS = _yaml.safe_load(f)['datasets']['sciserver']
        if name not in SCISERVER_DATASETS:
            raise ValueError('[{}] is not available on SciServer.'
                             ' Here is a list of available oceandatasets: {}.'
                             ''.format(name, SCISERVER_DATASETS))
    else:
        _check_instance({'catalog_url': catalog_url}, 'str')

    # Read catatog
    try:
        if catalog_url is None:
            url = ('https://raw.githubusercontent.com/malmans2/oceanspy/'
                   'master/sciserver_catalogs/catalog_xarray.yaml')
        else:
            url = catalog_url
        cat = _intake.Catalog(url)
        entries = [entry for entry in cat if name in entry]
        if len(entries) == 0:
            raise ValidationError('', '')
        intake_switch = True
    except ValidationError:
        if catalog_url is None:
            url = ('https://raw.githubusercontent.com/malmans2/oceanspy/'
                   'master/sciserver_catalogs/catalog_xmitgcm.yaml')
        else:
            url = catalog_url

        # Is it an url?
        try:
            f = _urllib.request.urlopen(url)
            cat = _yaml.safe_load(f)
        except ValueError:
            with open(url) as f:
                cat = _yaml.safe_load(f)
        entries = [entry for entry in cat if name in entry]
        intake_switch = False

    # Error if not available
    if len(entries) == 0:
        raise ValueError('[{}] is not in the catalog.'.format(name))
    else:
        return cat, entries, url, intake_switch
Exemple #12
0
Run this script at the beginning of each month to build new conda downloads badges
from the previous month.
"""

import os
from yaml import safe_load
import requests
import datetime
import intake
import colorcet as cc
import numpy as np

here = os.path.abspath(os.path.dirname(__file__))
cache_path = os.path.join(here, '..', 'doc', '_static', 'cache')
cat = intake.Catalog(
    'https://raw.githubusercontent.com/ContinuumIO/anaconda-package-data/master/catalog/anaconda_package_data.yaml'
)

colors = cc.palette_n.rainbow[-20:80:-1]
top_of_colormap = 1e6
step = len(colors) / np.log10(top_of_colormap)

today = datetime.date.today()
first = today.replace(day=1)
last_month = first - datetime.timedelta(days=1)
try:
    monthly = cat.anaconda_package_data_by_month(
        year=last_month.year,
        month=last_month.month,
        columns=['pkg_name', 'counts']).to_dask()
except:
Exemple #13
0
# The script uses 1/48 deg MITgcm model run to construct smaller files for regional data

# Import Modules
import xarray as xr
import xrft
import dask.array as da
from dask.distributed import Client, LocalCluster
from xgcm import Grid
import numpy as np
from xmitgcm import llcreader
import bottleneck
import intake

# Load grid data from web
cat = intake.Catalog(
    "https://raw.githubusercontent.com/pangeo-data/pangeo-datastore/master/intake-catalogs/ocean/llc4320.yaml"
)
dsgrid = cat["LLC4320_grid"].to_dask()
dsgrid = llcreader.faces_dataset_to_latlon(dsgrid,
                                           metric_vector_pairs=[('dxC', 'dyC'),
                                                                ('dyG', 'dxG')
                                                                ])

model = llcreader.ECCOPortalLLC4320Model()
#ds = model.get_dataset(varnames=['U','V'], k_levels=[1,3,5,10,30],type='latlon')
ds = model.get_dataset(varnames=['U', 'V'], k_levels=[1],
                       type='latlon')  #,k_chunksize=5)

ds = xr.merge([ds, dsgrid])
ds = ds.isel(time=np.arange(0, 1))  #len(ds.time), 12))
Exemple #14
0
def test_old_usage():
    with pytest.warns(UserWarning):
        intake.Catalog()