def _write_xy_provenance(cfg, cubes, plot_path, title, *attrs): """Write provenance information for X-Y plots.""" cubes = cubes.copy() if isinstance(cubes, iris.cube.Cube): cubes = iris.cube.CubeList([cubes]) ancestors = [] for attr in attrs: ancestors.extend(attr['filename'].split('|')) netcdf_path = mlr.get_new_path(cfg, plot_path) io.iris_save(cubes, netcdf_path) long_name = ' and '.join([cube.long_name for cube in cubes]) caption = f"Line plot of {long_name}" if title: caption += f" for {title}." else: caption += '.' record = { 'ancestors': ancestors, 'authors': ['schlund_manuel'], 'caption': caption, 'plot_file': plot_path, 'plot_types': ['line'], 'references': ['schlund20jgr'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, record)
def postprocess_mean(cfg, cube, data): """Postprocess mean prediction cube.""" logger.info("Postprocessing mean prediction cube %s", cube.summary(shorten=True)) cube = _collapse_regular_cube(cfg, cube) _convert_units(cfg, cube) new_path = mlr.get_new_path(cfg, data['filename']) io.iris_save(cube, new_path) logger.info("Mean prediction: %s %s", cube.data, cube.units) _write_provenance(cfg, "Postprocessed", cube, new_path, [data['filename']])
def _write_provenance(cfg, data_frame, plot_path, title, ancestors, **cube_kwargs): """Write provenance information.""" cube = ec.pandas_object_to_cube(data_frame, **cube_kwargs) netcdf_path = mlr.get_new_path(cfg, plot_path) io.iris_save(cube, netcdf_path) record = { 'ancestors': ancestors, 'authors': ['schlund_manuel'], 'caption': f"Boxplot of {title}.", 'plot_file': plot_path, 'plot_types': ['box'], 'references': ['schlund20jgr'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, record)
def _write_map_provenance(cfg, cube, plot_path, title, *attrs): """Write provenance information for map plots.""" cube = cube.copy() ancestors = [] for attr in attrs: ancestors.extend(attr['filename'].split('|')) netcdf_path = mlr.get_new_path(cfg, plot_path) io.iris_save(cube, netcdf_path) record = { 'ancestors': ancestors, 'authors': ['schlund_manuel'], 'caption': f"Geographical distribution of {cube.long_name} for " f"{title}.", 'plot_file': plot_path, 'plot_types': ['geo'], 'references': ['schlund20jgr'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(netcdf_path, record)
def postprocess_errors(cfg, ref_cube, error_datasets, cov_estim_datasets): """Postprocess errors.""" logger.info( "Postprocessing errors using mean prediction cube %s as reference", ref_cube.summary(shorten=True)) # Extract covariance (cov_cube, error_datasets) = _get_covariance_dataset(error_datasets, ref_cube) # Extract squared errors squared_error_cube = mlr.get_squared_error_cube(ref_cube, error_datasets) # Extract variance from covariance if desired if cfg.get('add_var_from_cov', True) and cov_cube is not None: var = np.ma.empty(ref_cube.shape, dtype=ref_cube.dtype) mask = np.ma.getmaskarray(ref_cube.data) var[mask] = np.ma.masked var[~mask] = np.diagonal(cov_cube.data.copy()) squared_error_cube.data += var logger.debug( "Added variance calculated from covariance to squared error " "datasets") if not error_datasets: error_datasets = True # Extract basename for error cubes basepath = mlr.get_new_path(cfg, ref_cube.attributes['filename']) basepath = basepath.replace('.nc', '_error.nc') # Lower and upper error bounds if error_datasets: _calculate_lower_error_bound(cfg, squared_error_cube, basepath) _calculate_upper_error_bound(cfg, squared_error_cube, basepath) # Estimated real error using estimated covariance if cov_estim_datasets: _estimate_real_error(cfg, squared_error_cube, cov_estim_datasets[0], basepath) # Real error if cov_cube is not None: _calculate_real_error(cfg, ref_cube, cov_cube, basepath)
def rescale_labels(cfg, y_data, y_mean, y_std): """Rescale labels.""" input_data = _get_input_data(cfg) labels_to_rescale = select_metadata(input_data, var_type='label_to_rescale') _check_datasets(labels_to_rescale, 'label_to_rescale') # Get groups groups = [] for dataset in labels_to_rescale: group = _get_group(dataset, cfg['group_by_attributes']) groups.append(group) dataset['group__for__rescaling'] = group groups.sort() if set(groups) != set(y_data.index): raise ValueError( f"Expected identical groups for 'label' and 'label_to_rescale' " f"data, got\n'label': {y_data.index.values}\n'label_to_rescale': " f"{np.array(groups)}") # Rescale data ref_cube = _get_ref_cube(labels_to_rescale) for dataset in labels_to_rescale: cube = iris.load_cube(dataset['filename']) rescaling_factor = ( y_mean / y_data.loc[dataset['group__for__rescaling']].values) logger.info("Rescaling '%s' with factor %.2f", dataset['group__for__rescaling'], rescaling_factor) rescaled_cube = cube.copy(cube.data * rescaling_factor) # Adapt metadata rescaled_dataset = deepcopy(dataset) rescaled_dataset['var_type'] = 'label' rescaled_dataset['rescaled'] = 'using emergent relationship' if '_label' in dataset['filename']: rescaled_dataset['filename'] = dataset['filename'].replace( '_label_to_rescale', '_rescaled_label') else: rescaled_dataset['filename'] = dataset['filename'].replace( '.nc', '_rescaled_label.nc') # Save data rescaled_dataset['filename'] = mlr.get_new_path( cfg, rescaled_dataset['filename']) io.metadata_to_netcdf(rescaled_cube, rescaled_dataset) # Provenance record = { 'ancestors': [dataset['filename']] + _get_ec_ancestors(cfg), 'authors': ['schlund_manuel'], 'caption': f"Rescaled {rescaled_cube.long_name} for " f"{mlr.get_alias(rescaled_dataset)} using emergent " f"relationship.", 'references': ['schlund20jgr'], 'themes': ['EC'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(rescaled_dataset['filename'], record) # Rescale MMM to estimate error logger.debug("Estimating error using global error %e", y_std) mmm_cube = _get_mmm_cube(labels_to_rescale) error_cube = ref_cube.copy(mmm_cube.data * y_std / y_data.mean().values) error_dataset = _get_error_dataset(cfg, labels_to_rescale) io.metadata_to_netcdf(error_cube, error_dataset) # Provenance record = { 'ancestors': ([d['filename'] for d in labels_to_rescale] + _get_ec_ancestors(cfg)), 'authors': ['schlund_manuel'], 'caption': f"Rescaled {error_cube.long_name} using emergent " f"relationship.", 'references': ['schlund20jgr'], 'themes': ['EC'], } with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(error_dataset['filename'], record)