def write_cube(cfg, cube, data): """Write cube (check for MLR attributes and existing files first).""" if not mlr.datasets_have_mlr_attributes([data], log_level='error'): raise ValueError( f"Cannot write cube {cube.summary(shorten=True)} using metadata " f"{data}") # Get new path new_path = data['filename'] if os.path.exists(new_path): now = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S%f") data['filename'] = new_path.replace('.nc', f'_{now}.nc') # Provenance ancestors = [data.pop('original_filename')] opts = [opt for opt in cfg if opt in globals()] caption = (f"{cube.long_name} for {mlr.get_alias(data)} preprocessed with " f"operations {opts}.") record = { 'ancestors': ancestors, 'authors': ['schlund_manuel'], 'caption': caption, 'references': ['schlund20jgr'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(data['filename'], record) # Write file io.metadata_to_netcdf(cube, data)
def main(cfg): """Run the diagnostic.""" input_data = ( select_metadata(cfg['input_data'].values(), short_name='tas') + select_metadata(cfg['input_data'].values(), short_name='tasa')) if not input_data: raise ValueError("This diagnostics needs 'tas' or 'tasa' variable") # Calculate psi for every dataset psis = {} psi_attrs = { 'short_name': 'psi', 'long_name': 'Temperature variability metric', 'units': 'K', } grouped_data = group_metadata(input_data, 'dataset') for (dataset, [data]) in grouped_data.items(): logger.info("Processing %s", dataset) cube = iris.load_cube(data['filename']) iris.coord_categorisation.add_year(cube, 'time') cube = cube.aggregated_by('year', iris.analysis.MEAN) psi_cube = calculate_psi(cube, cfg) data.update(psi_attrs) data.pop('standard_name', '') # Provenance caption = ("Temporal evolution of temperature variability metric psi " "between {start_year} and {end_year} for {dataset}.".format( **data)) provenance_record = get_provenance_record(caption, [data['filename']]) out_path = get_diagnostic_filename('psi_' + dataset, cfg) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(out_path, provenance_record) # Save psi for every dataset data['filename'] = out_path io.metadata_to_netcdf(psi_cube, data) # Save averaged psi psis[dataset] = np.mean(psi_cube.data) # Save averaged psis for every dataset in one file out_path = get_diagnostic_filename('psi', cfg) io.save_scalar_data(psis, out_path, psi_attrs, attributes=psi_cube.attributes) # Provenance caption = "{long_name} for mutliple climate models.".format(**psi_attrs) ancestor_files = [d['filename'] for d in input_data] provenance_record = get_provenance_record(caption, ancestor_files) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(out_path, provenance_record)
def test_metadata_to_netcdf(mock_logger, mock_save, metadata, cube, output): """Test metadata to cube.""" if isinstance(output, type): with pytest.raises(output): io.metadata_to_netcdf(cube, metadata) assert not mock_save.called return io.metadata_to_netcdf(cube, metadata) if metadata.get('standard_name') == INVALID_STANDARD_NAME: mock_logger.warning.assert_called() assert 'invalid_standard_name' in output.attributes else: mock_logger.warning.assert_not_called() assert 'invalid_standard_name' not in output.attributes save_args = (output, metadata['filename']) assert mock_save.call_args_list == [mock.call(*save_args)]
def test_metadata_to_netcdf(mock_logger, mock_save, attrs, var_attrs, cube, output): """Test metadata to cube.""" wrong_name = 'I_am_an_invalid_standard_name' metadata = deepcopy({**attrs, **var_attrs}) metadata['short_name'] = metadata.pop('var_name') if metadata['dataset'] == 'a': metadata['standard_name'] = wrong_name io.metadata_to_netcdf(cube, metadata) if metadata.get('standard_name') == wrong_name: mock_logger.debug.assert_called() else: mock_logger.debug.assert_not_called() if output is None: mock_logger.warning.assert_called() assert not mock_save.called else: mock_logger.warning.assert_not_called() save_args = (output, attrs['filename']) assert mock_save.call_args_list == [mock.call(*save_args)]
def rescale_labels(cfg, y_data, y_mean, y_std): """Rescale labels.""" input_data = _get_input_data(cfg) labels_to_rescale = select_metadata(input_data, var_type='label_to_rescale') _check_datasets(labels_to_rescale, 'label_to_rescale') # Get groups groups = [] for dataset in labels_to_rescale: group = _get_group(dataset, cfg['group_by_attributes']) groups.append(group) dataset['group__for__rescaling'] = group groups.sort() if set(groups) != set(y_data.index): raise ValueError( f"Expected identical groups for 'label' and 'label_to_rescale' " f"data, got\n'label': {y_data.index.values}\n'label_to_rescale': " f"{np.array(groups)}") # Rescale data ref_cube = _get_ref_cube(labels_to_rescale) for dataset in labels_to_rescale: cube = iris.load_cube(dataset['filename']) rescaling_factor = ( y_mean / y_data.loc[dataset['group__for__rescaling']].values) logger.info("Rescaling '%s' with factor %.2f", dataset['group__for__rescaling'], rescaling_factor) rescaled_cube = cube.copy(cube.data * rescaling_factor) # Adapt metadata rescaled_dataset = deepcopy(dataset) rescaled_dataset['var_type'] = 'label' rescaled_dataset['rescaled'] = 'using emergent relationship' if '_label' in dataset['filename']: rescaled_dataset['filename'] = dataset['filename'].replace( '_label_to_rescale', '_rescaled_label') else: rescaled_dataset['filename'] = dataset['filename'].replace( '.nc', '_rescaled_label.nc') # Save data rescaled_dataset['filename'] = mlr.get_new_path( cfg, rescaled_dataset['filename']) io.metadata_to_netcdf(rescaled_cube, rescaled_dataset) # Provenance record = { 'ancestors': [dataset['filename']] + _get_ec_ancestors(cfg), 'authors': ['schlund_manuel'], 'caption': f"Rescaled {rescaled_cube.long_name} for " f"{mlr.get_alias(rescaled_dataset)} using emergent " f"relationship.", 'references': ['schlund20jgr'], 'themes': ['EC'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(rescaled_dataset['filename'], record) # Rescale MMM to estimate error logger.debug("Estimating error using global error %e", y_std) mmm_cube = _get_mmm_cube(labels_to_rescale) error_cube = ref_cube.copy(mmm_cube.data * y_std / y_data.mean().values) error_dataset = _get_error_dataset(cfg, labels_to_rescale) io.metadata_to_netcdf(error_cube, error_dataset) # Provenance record = { 'ancestors': ([d['filename'] for d in labels_to_rescale] + _get_ec_ancestors(cfg)), 'authors': ['schlund_manuel'], 'caption': f"Rescaled {error_cube.long_name} using emergent " f"relationship.", 'references': ['schlund20jgr'], 'themes': ['EC'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(error_dataset['filename'], record)