def _get_reference_reader(val_run): ref_reader = create_reader(val_run.reference_configuration.dataset, val_run.reference_configuration.version) # we do the dance with the filtering below because filter may actually change the original reader, see ismn network selection ref_reader = setup_filtering( ref_reader, list(val_run.reference_configuration.filters.all()), list(val_run.reference_configuration.parametrisedfilter_set.all()), val_run.reference_configuration.dataset, val_run.reference_configuration.variable) while hasattr(ref_reader, 'cls'): ref_reader = ref_reader.cls return ref_reader
def create_jobs(validation_run): jobs = [] total_points = 0 ref_reader = create_reader(validation_run.reference_configuration.dataset, validation_run.reference_configuration.version) # we do the dance with the filtering below because filter may actually change the original reader, see ismn network selection ref_reader = setup_filtering(ref_reader, list(validation_run.reference_configuration.filters.all()),\ list(validation_run.reference_configuration.parametrisedfilter_set.all()),\ validation_run.reference_configuration.dataset, validation_run.reference_configuration.variable) while (hasattr(ref_reader, 'cls')): ref_reader = ref_reader.cls # if we've got data on a grid, process one cell at a time if isinstance(ref_reader, GriddedBase): cells = ref_reader.grid.get_cells() jobs = [] for cell in cells: gpis, lons, lats = ref_reader.grid.grid_points_for_cell(cell) gpis, lons, lats, index = _geographic_subsetting( gpis, lons, lats, validation_run.min_lat, validation_run.min_lon, validation_run.max_lat, validation_run.max_lon) if isinstance(gpis, np.ma.MaskedArray): gpis = gpis.compressed() lons = lons.compressed() lats = lats.compressed() if len(gpis) > 0: jobs.append((gpis, lons, lats)) total_points += len(gpis) # if we've got ISMN data, process one network at a time elif isinstance(ref_reader, ISMN_Interface): depth_from, depth_to = get_depths_params( validation_run.reference_configuration.parametrisedfilter_set.all( )) ids = ref_reader.get_dataset_ids( variable=validation_run.reference_configuration.variable. pretty_name, min_depth=depth_from, max_depth=depth_to) mdata = ref_reader.metadata[ids] networks = np.unique(mdata['network']) jobs = [] for network in networks: net_ids = mdata['network'] == network net_data = mdata[net_ids] lons = net_data['longitude'] lats = net_data['latitude'] gpis = ids[net_ids] gpis, lons, lats, index = _geographic_subsetting( gpis, lons, lats, validation_run.min_lat, validation_run.min_lon, validation_run.max_lat, validation_run.max_lon) if len(gpis) > 0: jobs.append((gpis, lons, lats)) total_points += len(gpis) else: raise ValueError( "Don't know how to get gridpoints and generate jobs for reader {}". format(ref_reader)) return total_points, jobs
def create_pytesmo_validation(validation_run): ds_list = [] ref_name = None scaling_ref_name = None ds_num = 1 for dataset_config in validation_run.dataset_configurations.all(): reader = create_reader(dataset_config.dataset, dataset_config.version) reader = setup_filtering( reader, list(dataset_config.filters.all()), list(dataset_config.parametrisedfilter_set.all()), dataset_config.dataset, dataset_config.variable) if validation_run.anomalies == ValidationRun.MOVING_AVG_35_D: reader = AnomalyAdapter( reader, window_size=35, columns=[dataset_config.variable.pretty_name]) if validation_run.anomalies == ValidationRun.CLIMATOLOGY: # make sure our baseline period is in UTC and without timezone information anomalies_baseline = [ validation_run.anomalies_from.astimezone(tz=pytz.UTC).replace( tzinfo=None), validation_run.anomalies_to.astimezone(tz=pytz.UTC).replace( tzinfo=None) ] reader = AnomalyClimAdapter( reader, columns=[dataset_config.variable.pretty_name], timespan=anomalies_baseline) if (validation_run.reference_configuration and (dataset_config.id == validation_run.reference_configuration.id)): # reference is always named "0-..." dataset_name = '{}-{}'.format(0, dataset_config.dataset.short_name) else: dataset_name = '{}-{}'.format(ds_num, dataset_config.dataset.short_name) ds_num += 1 ds_list.append((dataset_name, { 'class': reader, 'columns': [dataset_config.variable.pretty_name] })) if (validation_run.reference_configuration and (dataset_config.id == validation_run.reference_configuration.id)): ref_name = dataset_name ref_short_name = validation_run.reference_configuration.dataset.short_name if (validation_run.scaling_ref and (dataset_config.id == validation_run.scaling_ref.id)): scaling_ref_name = dataset_name datasets = dict(ds_list) ds_num = len(ds_list) period = None if validation_run.interval_from is not None and validation_run.interval_to is not None: # while pytesmo can't deal with timezones, normalise the validation period to utc; can be removed once pytesmo can do timezones startdate = validation_run.interval_from.astimezone(UTC).replace( tzinfo=None) enddate = validation_run.interval_to.astimezone(UTC).replace( tzinfo=None) period = [startdate, enddate] upscale_parms = None if validation_run.upscaling_method != "none": __logger.debug("Upscaling option is active") upscale_parms = { "upscaling_method": validation_run.upscaling_method, "temporal_stability": validation_run.temporal_stability, } upscaling_lut = create_upscaling_lut( validation_run=validation_run, datasets=datasets, ref_name=ref_name, ) upscale_parms["upscaling_lut"] = upscaling_lut __logger.debug("Lookup table for non-reference datasets " + ", ".join(upscaling_lut.keys()) + " created") __logger.debug("{}".format(upscaling_lut)) datamanager = DataManager( datasets, ref_name=ref_name, period=period, read_ts_names='read', upscale_parms=upscale_parms, ) ds_names = get_dataset_names(datamanager.reference_name, datamanager.datasets, n=ds_num) # set value of the metadata template according to what reference dataset is used if ref_short_name == 'ISMN': metadata_template = METADATA_TEMPLATE['ismn_ref'] else: metadata_template = METADATA_TEMPLATE['other_ref'] pairwise_metrics = PairwiseIntercomparisonMetrics( metadata_template=metadata_template, calc_kendall=False, ) metric_calculators = {(ds_num, 2): pairwise_metrics.calc_metrics} if (len(ds_names) >= 3) and (validation_run.tcol is True): tcol_metrics = TripleCollocationMetrics( ref_name, metadata_template=metadata_template, ) metric_calculators.update({(ds_num, 3): tcol_metrics.calc_metrics}) if validation_run.scaling_method == validation_run.NO_SCALING: scaling_method = None else: scaling_method = validation_run.scaling_method __logger.debug(f"Scaling method: {scaling_method}") __logger.debug(f"Scaling dataset: {scaling_ref_name}") val = Validation(datasets=datamanager, temporal_matcher=make_combined_temporal_matcher( pd.Timedelta(12, "H")), spatial_ref=ref_name, scaling=scaling_method, scaling_ref=scaling_ref_name, metrics_calculators=metric_calculators, period=period) return val
def create_pytesmo_validation(validation_run): ds_list = [] ref_name = None scaling_ref_name = None ds_num = 1 for dataset_config in validation_run.dataset_configurations.all(): reader = create_reader(dataset_config.dataset, dataset_config.version) reader = setup_filtering( reader, list(dataset_config.filters.all()), list(dataset_config.parametrisedfilter_set.all()), dataset_config.dataset, dataset_config.variable) if validation_run.anomalies == ValidationRun.MOVING_AVG_35_D: reader = AnomalyAdapter( reader, window_size=35, columns=[dataset_config.variable.pretty_name]) if validation_run.anomalies == ValidationRun.CLIMATOLOGY: # make sure our baseline period is in UTC and without timezone information anomalies_baseline = [ validation_run.anomalies_from.astimezone(tz=pytz.UTC).replace( tzinfo=None), validation_run.anomalies_to.astimezone(tz=pytz.UTC).replace( tzinfo=None) ] reader = AnomalyClimAdapter( reader, columns=[dataset_config.variable.pretty_name], timespan=anomalies_baseline) if ((validation_run.reference_configuration) and (dataset_config.id == validation_run.reference_configuration.id)): # reference is always named "0-..." dataset_name = '{}-{}'.format(0, dataset_config.dataset.short_name) else: dataset_name = '{}-{}'.format(ds_num, dataset_config.dataset.short_name) ds_num += 1 ds_list.append((dataset_name, { 'class': reader, 'columns': [dataset_config.variable.pretty_name] })) if ((validation_run.reference_configuration) and (dataset_config.id == validation_run.reference_configuration.id)): ref_name = dataset_name if ((validation_run.scaling_ref) and (dataset_config.id == validation_run.scaling_ref.id)): scaling_ref_name = dataset_name datasets = dict(ds_list) ds_num = len(ds_list) period = None if validation_run.interval_from is not None and validation_run.interval_to is not None: ## while pytesmo can't deal with timezones, normalise the validation period to utc; can be removed once pytesmo can do timezones startdate = validation_run.interval_from.astimezone(UTC).replace( tzinfo=None) enddate = validation_run.interval_to.astimezone(UTC).replace( tzinfo=None) period = [startdate, enddate] datamanager = DataManager(datasets, ref_name=ref_name, period=period, read_ts_names='read') ds_names = get_dataset_names(datamanager.reference_name, datamanager.datasets, n=ds_num) if (len(ds_names) >= 3) and (validation_run.tcol is True): # if there are 3 or more dataset, do TC, exclude ref metrics metrics = TCMetrics( dataset_names=ds_names, tc_metrics_for_ref=False, other_names=['k{}'.format(i + 1) for i in range(ds_num - 1)]) else: metrics = IntercomparisonMetrics( dataset_names=ds_names, other_names=['k{}'.format(i + 1) for i in range(ds_num - 1)]) if validation_run.scaling_method == validation_run.NO_SCALING: scaling_method = None else: scaling_method = validation_run.scaling_method __logger.debug(f"Scaling method: {scaling_method}") __logger.debug(f"Scaling dataset: {scaling_ref_name}") val = Validation(datasets=datamanager, spatial_ref=ref_name, temporal_window=0.5, scaling=scaling_method, scaling_ref=scaling_ref_name, metrics_calculators={ (ds_num, ds_num): metrics.calc_metrics }, period=period) return val