def calculate_event_metrics(proc_fx_obs, categories, metrics): """ Calculate event metrics for the processed data using the provided categories and metric types. Parameters ---------- proc_fx_obs : datamodel.ProcessedForecastObservation categories : list of str List of categories to compute metrics over. metrics : list of str List of metrics to be computed. Returns ------- solarforecastarbiter.datamodel.MetricResult Contains all the computed metrics by categories. Raises ------ RuntimeError If there is no forecast, observation timeseries data or no metrics are specified. """ out = { 'name': proc_fx_obs.name, 'forecast_id': proc_fx_obs.original.forecast.forecast_id, 'observation_id': proc_fx_obs.original.observation.observation_id } fx = proc_fx_obs.forecast_values obs = proc_fx_obs.observation_values # No data or metrics if fx.empty: raise RuntimeError("No Forecast timeseries data.") elif obs.empty: raise RuntimeError("No Observation timeseries data.") elif len(metrics) == 0: raise RuntimeError("No metrics specified.") # Dataframe for grouping df = pd.concat({'forecast': fx, 'observation': obs}, axis=1) metric_vals = [] # Calculate metrics for category in set(categories): # total (special category) if category == 'total': index_category = lambda x: 0 # NOQA else: index_category = getattr(df.index, category) # Calculate each metric for metric_ in set(metrics): # Group by category for cat, group in df.groupby(index_category): # Calculate res = _apply_event_metric_func( metric_, group.forecast, group.observation ) # Change category label of the group from numbers # to e.g. January or Monday if category == 'month': cat = calendar.month_abbr[cat] elif category == 'weekday': cat = calendar.day_abbr[cat] metric_vals.append(datamodel.MetricValue( category, metric_, str(cat), res)) out['values'] = _sort_metrics_vals(metric_vals, datamodel.ALLOWED_EVENT_METRICS) calc_metrics = datamodel.MetricResult.from_dict(out) return calc_metrics
def calculate_deterministic_metrics(processed_fx_obs, categories, metrics): """ Calculate deterministic metrics for the processed data using the provided categories and metric types. Normalization is determined by the attributes of the input objects. If ``processed_fx_obs.uncertainty`` is not ``None``, a deadband equal to the uncertainty will be used by the metrics that support it. Parameters ---------- processed_fx_obs : datamodel.ProcessedForecastObservation categories : list of str List of categories to compute metrics over. metrics : list of str List of metrics to be computed. Returns ------- solarforecastarbiter.datamodel.MetricResult Contains all the computed metrics by categories. Raises ------ RuntimeError If there is no forecast, observation timeseries data or no metrics are specified. """ out = { 'name': processed_fx_obs.name, 'forecast_id': processed_fx_obs.original.forecast.forecast_id, } try: out['observation_id'] = processed_fx_obs.original.observation.observation_id # NOQA: E501 except AttributeError: out['aggregate_id'] = processed_fx_obs.original.aggregate.aggregate_id fx = processed_fx_obs.forecast_values obs = processed_fx_obs.observation_values # Check reference forecast is from processed pair, if needed ref_fx = processed_fx_obs.reference_forecast_values if ref_fx is None: ref_fx = np.nan # avoids issues with None and deadband masking # No data or metrics if fx.empty: raise RuntimeError("No forecast timeseries data.") elif obs.empty: raise RuntimeError("No observation timeseries data.") elif len(metrics) == 0: raise RuntimeError("No metrics specified.") # Dataframe for grouping df = pd.DataFrame({'forecast': fx, 'observation': obs, 'reference': ref_fx}) # get normalization factor normalization = processed_fx_obs.normalization_factor # get uncertainty. deadband = processed_fx_obs.uncertainty # Force `groupby` to be consistent with `interval_label`, i.e., if # `interval_label == ending`, then the last interval should be in the bin if processed_fx_obs.interval_label == "ending": df.index -= pd.Timedelta("1ns") metric_vals = [] # Calculate metrics for category in set(categories): # total (special category) if category == 'total': index_category = lambda x: 0 # NOQA: E731 else: index_category = getattr(df.index, category) # Calculate each metric for metric_ in metrics: # Group by category for cat, group in df.groupby(index_category): # Calculate res = _apply_deterministic_metric_func( metric_, group.forecast, group.observation, ref_fx=group.reference, normalization=normalization, deadband=deadband) # Change category label of the group from numbers # to e.g. January or Monday if category == 'month': cat = calendar.month_abbr[cat] elif category == 'weekday': cat = calendar.day_abbr[cat] metric_vals.append(datamodel.MetricValue( category, metric_, str(cat), res)) out['values'] = _sort_metrics_vals( metric_vals, datamodel.ALLOWED_DETERMINISTIC_METRICS) calc_metrics = datamodel.MetricResult.from_dict(out) return calc_metrics
def _calculate_probabilistic_metrics_from_df(data_df, categories, metrics, interval_label): """ Calculate probabilistic metrics for the processed data using the provided categories and metric types. Parameters ---------- data_df : pandas.DataFrame DataFrame that contains all timeseries values on the same index. categories : list of str List of categories to compute metrics over. metrics : list of str List of metrics to be computed. interval_label : str Returns ------- list of tuples of datamodel.MetricValue Contains all the computed metrics by categories. Each tuple is associated with a datamodel.ProbabilisticForecastConstantValue. """ metric_values = [] # Force `groupby` to be consistent with `interval_label`, i.e., if # `interval_label == ending`, then the last interval should be in the # bin if interval_label == "ending": data_df.index -= pd.Timedelta("1ns") # Calculate metrics for category in set(categories): # total (special category) if category == 'total': index_category = lambda x: 0 # NOQA: E731 else: index_category = getattr(data_df.index, category) # Calculate each metric for metric_ in set(metrics): # Group by category for cat, group in data_df.groupby(index_category): try: ref_fx_vals = group.xs('reference_forecast', level=1, axis=1).to_numpy() # NOQA: E501 ref_fx_prob_vals = group.xs('reference_probability', level=1, axis=1).to_numpy() # NOQA E501 if ref_fx_vals.size == ref_fx_vals.shape[0]: ref_fx_vals = ref_fx_vals.T[0] ref_fx_prob_vals = ref_fx_prob_vals.T[0] except KeyError: ref_fx_vals = np.nan ref_fx_prob_vals = np.nan fx_vals = group.xs('forecast', level=1, axis=1).to_numpy() fx_prob_vals = group.xs('probability', level=1, axis=1).to_numpy() # NOQA: E501 if fx_vals.size == fx_vals.shape[0]: fx_vals = fx_vals.T[0] fx_prob_vals = fx_prob_vals.T[0] obs_vals = group[(None, 'observation')].to_numpy() # Calculate res = _apply_probabilistic_metric_func( metric_, fx_vals, fx_prob_vals, obs_vals, ref_fx=ref_fx_vals, ref_fx_prob=ref_fx_prob_vals) # Change category label of the group from numbers # to e.g. January or Monday if category == 'month': cat = calendar.month_abbr[cat] elif category == 'weekday': cat = calendar.day_abbr[cat] metric_values.append(datamodel.MetricValue( category, metric_, str(cat), res)) return metric_values
def calculate_summary_statistics(processed_fx_obs, categories): """ Calculate summary statistics for the processed data using the provided categories and all metrics defined in :py:mod:`.summary`. Parameters ---------- proc_fx_obs : datamodel.ProcessedForecastObservation categories : list of str List of categories to compute metrics over. Returns ------- solarforecastarbiter.datamodel.MetricResult Contains all the computed statistics by category. Raises ------ RuntimeError If there is no data to summarize """ out = { 'name': processed_fx_obs.name, 'forecast_id': processed_fx_obs.original.forecast.forecast_id, 'is_summary': True } try: out['observation_id'] = \ processed_fx_obs.original.observation.observation_id except AttributeError: out['aggregate_id'] = \ processed_fx_obs.original.aggregate.aggregate_id dfd = {'observation': processed_fx_obs.observation_values} # only calculate stats for deterministic forecasts # but always for observations if _is_deterministic_forecast(processed_fx_obs): dfd['forecast'] = processed_fx_obs.forecast_values ref_fx = processed_fx_obs.reference_forecast_values if ref_fx is not None: dfd['reference_forecast'] = ref_fx df = pd.DataFrame(dfd) if df.empty: raise RuntimeError('No data to calculate summary statistics for.') # Force `groupby` to be consistent with `interval_label`, i.e., if # `interval_label == ending`, then the last interval should be in the bin if processed_fx_obs.interval_label == "ending": df.index -= pd.Timedelta("1ns") metric_vals = [] # Calculate metrics for category in set(categories): index_category = _index_category(category, df) # Group by category for cat, group in df.groupby(index_category): all_metrics = _calculate_summary_for_frame(group) # Change category label of the group from numbers # to e.g. January or Monday if category == 'month': cat = calendar.month_abbr[cat] elif category == 'weekday': cat = calendar.day_abbr[cat] metric_vals.extend([ datamodel.MetricValue(category, f'{obj}_{met}', str(cat), val) for obj, ser in all_metrics.items() for met, val in ser.items() ]) out['values'] = _sort_metrics_vals( metric_vals, { f'{type_}_{k}': v for k, v in datamodel.ALLOWED_SUMMARY_STATISTICS.items() for type_ in ('forecast', 'observation', 'reference_forecast') }) calc_stats = datamodel.MetricResult.from_dict(out) return calc_stats