def write_cube(cfg, cube, data):
    """Write cube (check for MLR attributes and existing files first)."""
    if not mlr.datasets_have_mlr_attributes([data], log_level='error'):
        raise ValueError(
            f"Cannot write cube {cube.summary(shorten=True)} using metadata "
            f"{data}")

    # Get new path
    new_path = data['filename']
    if os.path.exists(new_path):
        now = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S%f")
        data['filename'] = new_path.replace('.nc', f'_{now}.nc')

    # Provenance
    ancestors = [data.pop('original_filename')]
    opts = [opt for opt in cfg if opt in globals()]
    caption = (f"{cube.long_name} for {mlr.get_alias(data)} preprocessed with "
               f"operations {opts}.")
    record = {
        'ancestors': ancestors,
        'authors': ['schlund_manuel'],
        'caption': caption,
        'references': ['schlund20jgr'],
    }
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(data['filename'], record)

    # Write file
    io.metadata_to_netcdf(cube, data)
Exemple #2
0
def main(cfg):
    """Run the diagnostic."""
    input_data = (
        select_metadata(cfg['input_data'].values(), short_name='tas') +
        select_metadata(cfg['input_data'].values(), short_name='tasa'))
    if not input_data:
        raise ValueError("This diagnostics needs 'tas' or 'tasa' variable")

    # Calculate psi for every dataset
    psis = {}
    psi_attrs = {
        'short_name': 'psi',
        'long_name': 'Temperature variability metric',
        'units': 'K',
    }
    grouped_data = group_metadata(input_data, 'dataset')
    for (dataset, [data]) in grouped_data.items():
        logger.info("Processing %s", dataset)
        cube = iris.load_cube(data['filename'])
        iris.coord_categorisation.add_year(cube, 'time')
        cube = cube.aggregated_by('year', iris.analysis.MEAN)
        psi_cube = calculate_psi(cube, cfg)
        data.update(psi_attrs)
        data.pop('standard_name', '')

        # Provenance
        caption = ("Temporal evolution of temperature variability metric psi "
                   "between {start_year} and {end_year} for {dataset}.".format(
                       **data))
        provenance_record = get_provenance_record(caption, [data['filename']])
        out_path = get_diagnostic_filename('psi_' + dataset, cfg)
        with ProvenanceLogger(cfg) as provenance_logger:
            provenance_logger.log(out_path, provenance_record)

        # Save psi for every dataset
        data['filename'] = out_path
        io.metadata_to_netcdf(psi_cube, data)

        # Save averaged psi
        psis[dataset] = np.mean(psi_cube.data)

    # Save averaged psis for every dataset in one file
    out_path = get_diagnostic_filename('psi', cfg)
    io.save_scalar_data(psis,
                        out_path,
                        psi_attrs,
                        attributes=psi_cube.attributes)

    # Provenance
    caption = "{long_name} for mutliple climate models.".format(**psi_attrs)
    ancestor_files = [d['filename'] for d in input_data]
    provenance_record = get_provenance_record(caption, ancestor_files)
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(out_path, provenance_record)
def test_metadata_to_netcdf(mock_logger, mock_save, metadata, cube, output):
    """Test metadata to cube."""
    if isinstance(output, type):
        with pytest.raises(output):
            io.metadata_to_netcdf(cube, metadata)
        assert not mock_save.called
        return
    io.metadata_to_netcdf(cube, metadata)
    if metadata.get('standard_name') == INVALID_STANDARD_NAME:
        mock_logger.warning.assert_called()
        assert 'invalid_standard_name' in output.attributes
    else:
        mock_logger.warning.assert_not_called()
        assert 'invalid_standard_name' not in output.attributes
    save_args = (output, metadata['filename'])
    assert mock_save.call_args_list == [mock.call(*save_args)]
Exemple #4
0
def test_metadata_to_netcdf(mock_logger, mock_save, attrs, var_attrs, cube,
                            output):
    """Test metadata to cube."""
    wrong_name = 'I_am_an_invalid_standard_name'
    metadata = deepcopy({**attrs, **var_attrs})
    metadata['short_name'] = metadata.pop('var_name')
    if metadata['dataset'] == 'a':
        metadata['standard_name'] = wrong_name
    io.metadata_to_netcdf(cube, metadata)
    if metadata.get('standard_name') == wrong_name:
        mock_logger.debug.assert_called()
    else:
        mock_logger.debug.assert_not_called()
    if output is None:
        mock_logger.warning.assert_called()
        assert not mock_save.called
    else:
        mock_logger.warning.assert_not_called()
        save_args = (output, attrs['filename'])
        assert mock_save.call_args_list == [mock.call(*save_args)]
def rescale_labels(cfg, y_data, y_mean, y_std):
    """Rescale labels."""
    input_data = _get_input_data(cfg)
    labels_to_rescale = select_metadata(input_data,
                                        var_type='label_to_rescale')
    _check_datasets(labels_to_rescale, 'label_to_rescale')

    # Get groups
    groups = []
    for dataset in labels_to_rescale:
        group = _get_group(dataset, cfg['group_by_attributes'])
        groups.append(group)
        dataset['group__for__rescaling'] = group

    groups.sort()
    if set(groups) != set(y_data.index):
        raise ValueError(
            f"Expected identical groups for 'label' and 'label_to_rescale' "
            f"data, got\n'label': {y_data.index.values}\n'label_to_rescale': "
            f"{np.array(groups)}")

    # Rescale data
    ref_cube = _get_ref_cube(labels_to_rescale)
    for dataset in labels_to_rescale:
        cube = iris.load_cube(dataset['filename'])
        rescaling_factor = (
            y_mean / y_data.loc[dataset['group__for__rescaling']].values)
        logger.info("Rescaling '%s' with factor %.2f",
                    dataset['group__for__rescaling'], rescaling_factor)
        rescaled_cube = cube.copy(cube.data * rescaling_factor)

        # Adapt metadata
        rescaled_dataset = deepcopy(dataset)
        rescaled_dataset['var_type'] = 'label'
        rescaled_dataset['rescaled'] = 'using emergent relationship'
        if '_label' in dataset['filename']:
            rescaled_dataset['filename'] = dataset['filename'].replace(
                '_label_to_rescale', '_rescaled_label')
        else:
            rescaled_dataset['filename'] = dataset['filename'].replace(
                '.nc', '_rescaled_label.nc')

        # Save data
        rescaled_dataset['filename'] = mlr.get_new_path(
            cfg, rescaled_dataset['filename'])
        io.metadata_to_netcdf(rescaled_cube, rescaled_dataset)

        # Provenance
        record = {
            'ancestors': [dataset['filename']] + _get_ec_ancestors(cfg),
            'authors': ['schlund_manuel'],
            'caption':
            f"Rescaled {rescaled_cube.long_name} for "
            f"{mlr.get_alias(rescaled_dataset)} using emergent "
            f"relationship.",
            'references': ['schlund20jgr'],
            'themes': ['EC'],
        }
        with ProvenanceLogger(cfg) as provenance_logger:
            provenance_logger.log(rescaled_dataset['filename'], record)

    # Rescale MMM to estimate error
    logger.debug("Estimating error using global error %e", y_std)
    mmm_cube = _get_mmm_cube(labels_to_rescale)
    error_cube = ref_cube.copy(mmm_cube.data * y_std / y_data.mean().values)
    error_dataset = _get_error_dataset(cfg, labels_to_rescale)
    io.metadata_to_netcdf(error_cube, error_dataset)

    # Provenance
    record = {
        'ancestors':
        ([d['filename'] for d in labels_to_rescale] + _get_ec_ancestors(cfg)),
        'authors': ['schlund_manuel'],
        'caption':
        f"Rescaled {error_cube.long_name} using emergent "
        f"relationship.",
        'references': ['schlund20jgr'],
        'themes': ['EC'],
    }
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(error_dataset['filename'], record)