def main(cfg): """Run the diagnostic.""" cfg = get_default_settings(cfg) diag = check_cfg(cfg) sns.set(**cfg.get('seaborn_settings', {})) # Get input data input_data = list(cfg['input_data'].values()) input_data.extend(io.netcdf_to_metadata(cfg, pattern=cfg.get('pattern'))) input_data = deepcopy(input_data) check_input_data(input_data) grouped_data = group_metadata(input_data, 'dataset') # Calculate X-axis of emergent constraint diag_func = globals()[diag] (diag_data, var_attrs, attrs) = diag_func(grouped_data, cfg) attrs.update(get_global_attributes(input_data, cfg)) # Save data netcdf_path = get_diagnostic_filename(diag, cfg) io.save_scalar_data(diag_data, netcdf_path, var_attrs, attributes=attrs) logger.info("Found data:\n%s", pformat(diag_data)) # Provenance provenance_record = ec.get_provenance_record( {diag: attrs}, [diag], caption=attrs['plot_xlabel'], ancestors=[d['filename'] for d in input_data]) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, provenance_record)
def get_input_data(cfg, pattern=None, check_mlr_attributes=True, ignore=None): """Get input data and check MLR attributes if desired. Use ``input_data`` and ancestors to get all relevant input files. Parameters ---------- cfg : dict Recipe configuration. pattern : str, optional Pattern matched against ancestor file names. check_mlr_attributes : bool, optional (default: True) If ``True``, only returns datasets with valid MLR attributes. If ``False``, returns all found datasets. ignore : list of dict, optional Ignore specific datasets by specifying multiple :obj:`dict`s of metadata. By setting an attribute to ``None``, ignore all datasets which do not have that attribute. Returns ------- list of dict List of input datasets. Raises ------ ValueError No input data found or at least one dataset has invalid attributes. """ logger.debug("Extracting input files") input_data = list(cfg['input_data'].values()) input_data.extend(io.netcdf_to_metadata(cfg, pattern=pattern)) input_data = deepcopy(input_data) if ignore is not None: valid_data = [] ignored_datasets = [] logger.info("Ignoring files with %s", ignore) for kwargs in ignore: ignored_datasets.extend(_get_datasets(input_data, **kwargs)) for dataset in input_data: if dataset not in ignored_datasets: valid_data.append(dataset) else: valid_data = input_data if not valid_data: raise ValueError("No input data found") if check_mlr_attributes: if not datasets_have_mlr_attributes(valid_data, log_level='error'): raise ValueError("At least one input dataset does not have valid " "MLR attributes") logger.debug("Found files:") logger.debug(pformat([d['filename'] for d in valid_data])) return valid_data
def get_psi(cfg): """Get time-dependent ``psi`` data.""" psi_cubes = {} psi_obs = [] for (dataset, [data]) in group_metadata( io.netcdf_to_metadata(cfg, pattern='psi_*.nc'), 'dataset').items(): cube = iris.load_cube(data['filename']) cube = cube.aggregated_by('year', iris.analysis.MEAN) psi_cubes[dataset] = cube if data['project'] == 'OBS': psi_obs.append(dataset) return (psi_cubes, psi_obs)
def main(cfg): """Run the diagnostic.""" input_data = ( select_metadata(cfg['input_data'].values(), short_name='tas') + select_metadata(cfg['input_data'].values(), short_name='tasa')) if not input_data: raise ValueError("This diagnostics needs 'tas' or 'tasa' variable") # Get tas data tas_cubes = {} tas_obs = [] for (dataset, [data]) in group_metadata(input_data, 'dataset').items(): cube = iris.load_cube(data['filename']) iris.coord_categorisation.add_year(cube, 'time') cube = cube.aggregated_by('year', iris.analysis.MEAN) tas_cubes[dataset] = cube if data['project'] == 'OBS': tas_obs.append(dataset) # Get time-dependent psi data psi_cubes = {} psi_obs = [] for (dataset, [data]) in group_metadata( io.netcdf_to_metadata(cfg, pattern='psi_*.nc'), 'dataset').items(): cube = iris.load_cube(data['filename']) cube = cube.aggregated_by('year', iris.analysis.MEAN) psi_cubes[dataset] = cube if data['project'] == 'OBS': psi_obs.append(dataset) # Get psi, ECS and psi for models (psi_cube, ecs_cube, lambda_cube) = get_external_cubes(cfg) # Plots for obs_name in tas_obs: logger.info("Observation for tas: %s", obs_name) plot_temperature_anomaly(cfg, tas_cubes, lambda_cube, obs_name) for obs_name in psi_obs: logger.info("Observation for psi: %s", obs_name) plot_psi(cfg, psi_cubes, lambda_cube, obs_name) obs_cube = psi_cubes[obs_name] plot_emergent_relationship(cfg, psi_cube, ecs_cube, lambda_cube, obs_cube) plot_pdf(cfg, psi_cube, ecs_cube, obs_cube) plot_cdf(cfg, psi_cube, ecs_cube, obs_cube) # Print ECS range ecs_range = get_ecs_range(cfg, psi_cube, ecs_cube, obs_cube) logger.info("Observational constraint: Ψ = (%.2f ± %.2f) K", np.mean(obs_cube.data), np.std(obs_cube.data)) logger.info( "Constrained ECS range: (%.2f - %.2f) K with best " "estimate %.2f K", ecs_range[1], ecs_range[2], ecs_range[0])
def test_netcdf_to_metadata(mock_walk, mock_load_cube, mock_logger, mock_get_all_ancestors, cubes, walk_out, root, output, n_logger): """Test cube to metadata.""" ancestors = [] for (files_root, _, files) in walk_out: new_files = [os.path.join(files_root, f) for f in files] ancestors.extend(new_files) mock_get_all_ancestors.return_value = ancestors mock_walk.return_value = walk_out mock_load_cube.side_effect = cubes if isinstance(output, type): with pytest.raises(output): io.netcdf_to_metadata({}, pattern=root, root=root) else: for dataset in output: dataset['short_name'] = dataset.pop('var_name') dataset.setdefault('standard_name', None) metadata = io.netcdf_to_metadata({}, pattern=root, root=root) assert metadata == output assert mock_logger.error.call_count == n_logger
def test_netcdf_to_metadata(mock_walk, mock_load_cube, mock_logger, mock_get_all_ancestors, root): """Test cube to metadata.""" attrs = [ { 'dataset': 'model', 'filename': 'path/to/model1.nc', 'project': 'CMIP42', }, { 'dataset': 'model', 'filename': 'path/to/model1.yml', 'project': 'CMIP42', }, { 'dataset': 'model', 'filename': 'path/to/model2.nc', }, { 'dataset': 'model', 'filename': 'path/to/model3.nc', 'project': 'CMIP42', }, { 'dataset': 'model', 'filename': 'path/to/model4.nc', 'project': 'CMIP42', }, ] var_attrs = [ { 'long_name': LONG_NAME, 'var_name': SHORT_NAME, 'units': UNITS, }, { 'long_name': LONG_NAME, 'var_name': SHORT_NAME, 'units': UNITS, }, { 'long_name': LONG_NAME, 'var_name': SHORT_NAME, }, { 'long_name': LONG_NAME, 'var_name': SHORT_NAME, 'standard_name': STANDARD_NAME, 'units': UNITS, }, { 'long_name': LONG_NAME, 'var_name': SHORT_NAME, 'standard_name': None, 'units': UNITS, }, ] cubes = [ iris.cube.Cube(0, attributes=attrs[0], **var_attrs[0]), iris.cube.Cube(0, attributes=attrs[2], **var_attrs[2]), iris.cube.Cube(0, attributes=attrs[3], **var_attrs[3]), iris.cube.Cube(0, attributes=attrs[4], **var_attrs[4]), ] walk_output = [ ('path/to', [], ['model1.nc', 'model1.yml']), ('path/to', ['d'], ['model2.nc', 'model3.nc', 'model4.nc']), ] output = deepcopy([{**attrs[i], **var_attrs[i]} for i in (0, 3, 4)]) for out in output: out['short_name'] = out.pop('var_name') out.setdefault('standard_name', None) mock_get_all_ancestors.return_value = [a['filename'] for a in attrs] mock_walk.return_value = walk_output mock_load_cube.side_effect = cubes metadata = io.netcdf_to_metadata({}, pattern=root, root=root) assert metadata == output mock_logger.warning.assert_called()