コード例 #1
0
ファイル: util.py プロジェクト: wjh179/cmip6hack-precip
def load_data_catalog():
    """Load data on either NCAR or pangeo machine."""
    if is_ncar_host():
        col = intake.open_esm_datastore("../catalogs/glade-cmip6.json")
    else:
        col = intake.open_esm_datastore("../catalogs/pangeo-cmip6.json")
    return col
コード例 #2
0
def createDataDict(this_experiment_id, this_variable_id, this_table_id,
                   this_grid_label):
    if util.is_ncar_host():
        col = intake.open_esm_datastore("../catalogs/glade-cmip6.json")
    else:
        col = intake.open_esm_datastore("../catalogs/pangeo-cmip6.json")

    cat = col.search(experiment_id=this_experiment_id, \
                     table_id=this_table_id, \
                     variable_id=this_variable_id, \
                     grid_label=this_grid_label)
    dataset_info = cat.df

    dset_dict = cat.to_dataset_dict(zarr_kwargs={
        'consolidated': True,
        'decode_times': False
    },
                                    cdf_kwargs={
                                        'chunks': {},
                                        'decode_times': False
                                    })
    #dset_dict.keys()

    source_ids = cat.df['source_id']
    modelnames = list(set(source_ids))

    return dataset_info, dset_dict, modelnames
コード例 #3
0
def test_serialize_to_json():
    with TemporaryDirectory() as local_store:
        col = intake.open_esm_datastore(catalog_dict_records)
        name = 'test_serialize_dict'
        col.serialize(name=name, directory=local_store, catalog_type='dict')
        output_catalog = os.path.join(local_store, name + '.json')
        col2 = intake.open_esm_datastore(output_catalog)
        pd.testing.assert_frame_equal(col.df, col2.df)
コード例 #4
0
def test_invalid_derivedcat(query, regex):
    registry = intake_esm.DerivedVariableRegistry()

    @registry.register(variable='FOO', query=query)
    def func(ds):
        ds['FOO'] = ds.FLNS + ds.FLUT
        return ds

    with pytest.raises(ValueError, match=regex):
        intake.open_esm_datastore(catalog_dict_records, registry=registry)
コード例 #5
0
def test_serialize_to_csv():
    col = intake.open_esm_datastore(cdf_col_sample_cmip6)
    with TemporaryDirectory() as local_store:
        col_subset = col.search(source_id='MRI-ESM2-0', )
        name = 'CMIP6-MRI-ESM2-0'
        col_subset.serialize(name=name,
                             directory=local_store,
                             catalog_type='file')
        col = intake.open_esm_datastore(f'{local_store}/{name}.json')
        pd.testing.assert_frame_equal(col_subset.df, col.df)
        assert col.esmcol_data['id'] == name
コード例 #6
0
def get_cmip6_catalogue():
    """
    Get full catalogue of CMIP6 data on glade or cloud
    """
    if is_ncar_host():
        cmip6_collection = intake.open_esm_datastore(
            "../../catalogs/glade-cmip6.json")
    else:
        cmip6_collection = intake.open_esm_datastore(
            "../../catalogs/pangeo-cmip6.json")

    return cmip6_collection
コード例 #7
0
def test_catalog_serialize(tmp_path, catalog_type):
    cat = intake.open_esm_datastore(cdf_col_sample_cmip6)
    local_store = tmp_path
    cat_subset = cat.search(source_id='MRI-ESM2-0', )
    name = 'CMIP6-MRI-ESM2-0'
    cat_subset.serialize(name=name,
                         directory=local_store,
                         catalog_type=catalog_type)
    cat = intake.open_esm_datastore(f'{local_store}/{name}.json')
    pd.testing.assert_frame_equal(cat_subset.df.reset_index(drop=True),
                                  cat.df.reset_index(drop=True))
    assert cat.esmcat.id == name
コード例 #8
0
def test_to_dataset_dict_s3():
    pytest.importorskip('s3fs')
    col = intake.open_esm_datastore(zarr_col_aws_cesm)
    cat = col.search(variable='RAIN', experiment='20C')
    dsets = cat.to_dataset_dict(storage_options={'anon': True})
    _, ds = dsets.popitem()
    assert isinstance(ds, xr.Dataset)
コード例 #9
0
def test_to_dataset_dict_aggfalse(esmcol_path, query):
    col = intake.open_esm_datastore(esmcol_path)
    cat = col.search(**query)
    nds = len(cat.df)
    dsets = cat.to_dataset_dict(zarr_kwargs={'consolidated': True},
                                aggregate=False)
    assert len(dsets.keys()) == nds
コード例 #10
0
def test_df_property():
    col = intake.open_esm_datastore(catalog_dict_records)
    assert len(col.df) == 5
    col.df = col.df.iloc[0:2, :]
    assert isinstance(col.df, pd.DataFrame)
    assert len(col) == 1
    assert len(col.df) == 2
コード例 #11
0
def test_getitem(key, decode_times):
    col = intake.open_esm_datastore(cdf_col_sample_cmip6)
    x = col[key]
    assert isinstance(x, intake_esm.source.ESMGroupDataSource)
    ds = x(cdf_kwargs={'chunks': {}, 'decode_times': decode_times}).to_dask()
    assert isinstance(ds, xr.Dataset)
    assert set(x.df['member_id']) == set(ds['member_id'].values)
コード例 #12
0
def test_ipython_key_completions():
    col = intake.open_esm_datastore(cdf_col_sample_cmip6)
    rv = [
        'df',
        'to_dataset_dict',
        'from_df',
        'keys',
        'serialize',
        'search',
        'unique',
        'nunique',
        'update_aggregation',
        'key_template',
        'groupby_attrs',
        'variable_column_name',
        'aggregations',
        'agg_columns',
        'aggregation_dict',
        'path_column_name',
        'data_format',
        'format_column_name',
    ]
    keys = col._ipython_key_completions_()
    for key in rv:
        assert key in keys
コード例 #13
0
def test_repr_html(url):
    col = intake.open_esm_datastore(url)
    text = col._repr_html_()
    assert 'unique' in text
    columns = col.df.columns.tolist()
    for column in columns:
        assert column in text
コード例 #14
0
def plot_30_Jahre_Klimatologie_von(var,
                                   scen,
                                   time,
                                   mod='GFDL-ESM4',
                                   table_id='Amon'):
    col = intake.open_esm_datastore(
        "https://storage.googleapis.com/cmip6/pangeo-cmip6.json")
    query = dict(
        experiment_id=[scen],
        table_id=table_id,
        variable_id=[var],
        source_id=mod,  #'MPI-ESM1-2-HR',
        member_id='r1i1p1f1',
    )
    map_data = col.search(require_all_on=["source_id"], **query)
    from collections import defaultdict
    dsets = defaultdict(dict)

    for group, df in map_data.df.groupby(by=['source_id', 'experiment_id']):
        dsets[group[0]][group[1]] = open_delayed(df)

    dsets_ = dask.compute(dict(dsets))[0]

    fig, axes = plt.subplots(nrows=1,
                             ncols=1,
                             figsize=(8, 4),
                             subplot_kw={'projection': ccrs.Robinson()})
    (dsets_[mod][scen].sel(
        time=slice(str(time - 15), str(time + 15))).mean('time'))[var].plot(
            ax=axes,
            transform=ccrs.PlateCarree(),
            cbar_kwargs=dict(shrink=0.5))
    axes.coastlines()

    return ()
コード例 #15
0
ファイル: intake_loader.py プロジェクト: lecjlg/forest
def _load_from_intake(experiment_id, table_id, grid_label, variable_id,
                      institution_id, activity_id, member_id):
    """
    Load data from the pangeo CMIP6 intake catalogue.The arguments relate to
    the CMIP6 parameters of a dataset. The CMIP6 reference is the ESGF servers
    which can be accessed here:
    https://esgf-index1.ceda.ac.uk/search/cmip6-ceda/
    Function is cahced to reduce remote queries.
    """
    collection = intake.open_esm_datastore(URL)
    cat = collection.search(experiment_id=experiment_id,
                            table_id=table_id,
                            grid_label=grid_label,
                            institution_id=institution_id,
                            member_id=member_id,
                            variable_id=variable_id)
    dset_dict = cat.to_dataset_dict(zarr_kwargs={
        'consolidated': True,
        'decode_times': False
    },
                                    cdf_kwargs={
                                        'chunks': {},
                                        'decode_times': False
                                    })

    # The search should have produced a dictionary with only 1 item, so
    # get that item and get a cube from it.
    ds_label, xr = dset_dict.popitem()
    cube = xr[variable_id].to_iris()
    coord_names = [c1.name() for c1 in cube.coords()]
    if 'air_pressure' in coord_names:
        cube.coord('air_pressure').convert_units('hPa')
    return iris.util.squeeze(cube)  # drop member dimension
コード例 #16
0
    def load(self, query, catfile=DEFAULT_INTAKE_ESM_CAT, **kwargs):
        """Loads datasets from given parameters.
        
        Parameters
        ----------
        query: dict
            Key, value pairs used to search the catalogue.
            Depth of catalog search (default: 5)
        catfile : str, optional
            Path to catalogue metadata file, can be a remote URL. The pangeo
            intake-esm CMIP6 catalogue is used by default.
        **kwargs : dict, optional
            Keyword Arguments for `intake_esm.core.esm_datastore.to_dataset_dict()`

        Returns
        -------
        datasets : list
            xarray DataArray objects.
        """
        import intake
        col = intake.open_esm_datastore(catfile)
        cat = col.search(**query)
        dset_dict = cat.to_dataset_dict(**kwargs)
        variable = query.get('variable_id')
        return self._prep_datasets(variable, dset_dict)
コード例 #17
0
def test_to_xarray_cesm_netcdf(chunks, expected_chunks):
    c = intake.open_esm_datastore(cdf_col)
    query = {'variable': ['SHF'], 'member_id': [1, 3, 9], 'experiment': ['20C', 'RCP85']}
    cat = c.search(**query)
    dset = cat.to_dataset_dict(cdf_kwargs=dict(chunks=chunks))
    _, ds = dset.popitem()
    assert ds['SHF'].data.chunksize == expected_chunks
コード例 #18
0
def test_to_xarray_cmip(chunks, expected_chunks):
    c = intake.open_esm_datastore(esmcol_path)
    cat = c.search(variable=['hfls'], frequency='mon', modeling_realm='atmos', model=['CNRM-CM5'])

    dset = cat.to_dataset_dict(cdf_kwargs=dict(chunks=chunks))
    _, ds = dset.popitem()
    assert ds['hfls'].data.chunksize == expected_chunks
コード例 #19
0
def test_to_dataset_dict_skip_error():
    cat = intake.open_esm_datastore(catalog_dict_records)
    with pytest.raises(intake_esm.source.ESMDataSourceError):
        dsets = cat.to_dataset_dict(
            xarray_open_kwargs={
                'backend_kwargsd': {
                    'storage_options': {
                        'anon': True
                    }
                }
            },
            skip_on_error=False,
        )

    dsets = cat.to_dataset_dict(
        xarray_open_kwargs={
            'backend_kwargsd': {
                'storage_options': {
                    'anon': True
                }
            }
        },
        skip_on_error=True,
    )

    assert len(dsets.keys()) == 0
コード例 #20
0
def create_data_dict(this_experiment_id, this_variable_id,
                     this_table_id, this_grid_label):
    """Creates data dictionary.

    Creates a data dictionary for some variable, grid, and table id for
    the chosen experiment(s).

    Args:
        this_experiment_id: The string ID for the experiment.
                            Can be list of strings.
        this_variable_id; The string ID for this variable (e.g. 'tas').
        this_table_id: ID for the table (e.g. 'Amon').
        this_grid_label: String label of the reference grid (e.g. 'gn').
    Returns:
        dataset_info:
        dset_dict: The data dictionary.
        modelnames: String list of source ids of the models in the dict.
    """
    col = intake.open_esm_datastore(DIR_CATALOG + "pangeo-cmip6.json")

    cat = col.search(experiment_id=this_experiment_id,
                     table_id=this_table_id,
                     variable_id=this_variable_id,
                     grid_label=this_grid_label)
    dataset_info = cat.df

    dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True,
                                                 'decode_times': False},
                                    cdf_kwargs={'chunks': {},
                                                'decode_times': False})
    source_ids = cat.df['source_id']
    modelnames = list(set(source_ids))

    return dataset_info, dset_dict, modelnames
コード例 #21
0
def test_to_dask(path, query, xarray_open_kwargs):
    cat = intake.open_esm_datastore(path)
    cat_sub = cat.search(**query)
    ds = cat_sub.to_dask(xarray_open_kwargs=xarray_open_kwargs)
    assert 'member_id' in ds.dims
    assert len(ds.__dask_keys__()) > 0
    assert ds.time.encoding
コード例 #22
0
def get_ERA5_zstore_list(year: str = None) -> list:
    col = intake.open_esm_datastore(
        "https://cmip6downscaling.blob.core.windows.net/cmip6/ERA5_catalog.json"
    )
    store_list = list(col.df.zstore)
    if year is not None:
        store_list = [s for s in store_list if year in s]
    return store_list
コード例 #23
0
def test_serialize():
    with TemporaryDirectory() as local_store:
        col = intake.open_esm_datastore(
            'https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json'
        )
        col_subset = col.search(source_id='BCC-ESM1',
                                grid_label='gn',
                                table_id='Amon',
                                experiment_id='historical')

        name = 'cmip6_bcc_esm1'
        col_subset.serialize(name=name, directory=local_store)

        col = intake.open_esm_datastore(f'{local_store}/cmip6_bcc_esm1.json')
        pd.testing.assert_frame_equal(col_subset.df, col.df)

        assert col._col_data['id'] == name
コード例 #24
0
def test_init(capsys, url):
    col = intake.open_esm_datastore(url)
    assert isinstance(col.df, pd.DataFrame)
    print(repr(col))
    # Use pytest-capturing method
    # https://docs.pytest.org/en/latest/capture.html#accessing-captured-output-from-a-test-function
    captured = capsys.readouterr()
    assert 'catalog with' in captured.out
コード例 #25
0
def test_to_aggregations_off(esmcol_path, query):
    col = intake.open_esm_datastore(esmcol_path)
    cat = col.search(**query)
    nds = len(cat.df)
    cat.groupby_attrs = []
    assert len(cat.keys()) == nds
    assert isinstance(cat._grouped, pd.DataFrame)
    assert isinstance(col._grouped, pd.core.groupby.generic.DataFrameGroupBy)
コード例 #26
0
def test_progressbar(progressbar):
    c = intake.open_esm_datastore(cdf_col_sample_cmip5)
    cat = c.search(variable=['hfls'],
                   frequency='mon',
                   modeling_realm='atmos',
                   model=['CNRM-CM5'])
    _ = cat.to_dataset_dict(cdf_kwargs=dict(chunks={}),
                            progressbar=progressbar)
コード例 #27
0
def test_catalog_with_registry_search():
    cat = intake.open_esm_datastore(catalog_dict_records, registry=registry)
    new_cat = cat.search(variable='FOO')
    assert len(cat) == 1
    assert len(new_cat) == 1

    assert len(cat.derivedcat) == 2
    assert len(new_cat.derivedcat) == 1
コード例 #28
0
def test_to_dataset_dict_nocache(esmcol_path, query):
    col = intake.open_esm_datastore(esmcol_path)
    cat = col.search(**query)
    _, ds = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}).popitem()
    id1 = id(ds)
    cat = col.search(**query)
    _, ds = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}).popitem()
    assert id1 != id(ds)
コード例 #29
0
def test_to_dataset_dict_aggfalse(path, query):
    col = intake.open_esm_datastore(path)
    cat = col.search(**query)
    nds = len(cat.df)
    dsets = cat.to_dataset_dict(xarray_open_kwargs={'chunks': {
        'time': 1
    }},
                                aggregate=False)
    assert len(dsets.keys()) == nds
コード例 #30
0
def test_to_collection(path, query, xarray_open_kwargs):
    cat = intake.open_esm_datastore(path)
    cat_sub = cat.search(**query)
    coll = cat_sub.to_collection(xarray_open_kwargs=xarray_open_kwargs)
    _, ds = coll.popitem()
    assert 'member_id' in ds.dims
    assert len(ds.__dask_keys__()) > 0
    assert ds.time.encoding
    assert isinstance(coll, xc.Collection)