def _counts_to_validation_results(counts, before_resample): return [ datamodel.ValidationResult(flag=k, count=int(v), before_resample=before_resample) for k, v in counts.items() ]
def test_render_pdf_special_chars( ac_power_observation_metadata, ac_power_forecast_metadata, dash_url, fail_pdf, preprocessing_result_types, report_metrics): if shutil.which('pdflatex') is None: # pragma: no cover pytest.skip('pdflatex must be on PATH to generate PDF reports') quality_flag_filter = datamodel.QualityFlagFilter( ( "USER FLAGGED", ) ) forecast = ac_power_forecast_metadata.replace( name="ac_power forecast (why,) ()'-_,") observation = ac_power_observation_metadata.replace( name="ac_power observations ()'-_,") fxobs = datamodel.ForecastObservation(forecast, observation) tz = 'America/Phoenix' start = pd.Timestamp('20190401 0000', tz=tz) end = pd.Timestamp('20190404 2359', tz=tz) report_params = datamodel.ReportParameters( name="NREL MIDC OASIS GHI Forecast Analysis ()'-_,", start=start, end=end, object_pairs=(fxobs,), metrics=("mae", "rmse", "mbe", "s"), categories=("total", "date", "hour"), filters=(quality_flag_filter,) ) report = datamodel.Report( report_id="56c67770-9832-11e9-a535-f4939feddd83", report_parameters=report_params ) qflags = list( f.quality_flags for f in report.report_parameters.filters if isinstance(f, datamodel.QualityFlagFilter) ) qflags = list(qflags[0]) ser_index = pd.date_range( start, end, freq=to_offset(forecast.interval_length), name='timestamp') ser = pd.Series( np.repeat(100, len(ser_index)), name='value', index=ser_index) pfxobs = datamodel.ProcessedForecastObservation( forecast.name, fxobs, forecast.interval_value_type, forecast.interval_length, forecast.interval_label, valid_point_count=len(ser), validation_results=tuple(datamodel.ValidationResult( flag=f, count=0) for f in qflags), preprocessing_results=tuple(datamodel.PreprocessingResult( name=t, count=0) for t in preprocessing_result_types), forecast_values=ser, observation_values=ser ) figs = datamodel.RawReportPlots( ( datamodel.PlotlyReportFigure.from_dict( { 'name': 'mae tucson ac_power', 'spec': '{"data":[{"x":[1],"y":[1],"type":"bar"}]}', 'pdf': fail_pdf, 'figure_type': 'bar', 'category': 'total', 'metric': 'mae', 'figure_class': 'plotly', } ),), '4.5.3', ) raw = datamodel.RawReport( generated_at=report.report_parameters.end, timezone=tz, plots=figs, metrics=report_metrics(report), processed_forecasts_observations=(pfxobs,), versions=(('test', 'test_with_underscore?'),), messages=(datamodel.ReportMessage( message="Failed to make metrics for ac_power forecast ()'-_,", step='', level='', function=''),)) rr = report.replace(raw_report=raw) rendered = template.render_pdf(rr, dash_url) assert rendered.startswith(b'%PDF')
def process_forecast_observations(forecast_observations, filters, forecast_fill_method, start, end, data, timezone, costs=tuple()): """ Convert ForecastObservations into ProcessedForecastObservations applying any filters and resampling to align forecast and observation. Parameters ---------- forecast_observations : list of solarforecastarbiter.datamodel.ForecastObservation, solarforecastarbiter.datamodel.ForecastAggregate Pairs to process filters : list of solarforecastarbiter.datamodel.BaseFilter Filters to apply to each pair. forecast_fill_method : str Indicates what process to use for handling missing forecasts. Currently supports : 'drop', 'forward', and bool or numeric value. start : pandas.Timestamp Start date and time for assessing forecast performance. end : pandas.Timestamp End date and time for assessing forecast performance. data : dict Dict with keys that are the Forecast/Observation/Aggregate object and values that are the corresponding pandas.Series/DataFrame for the object. Keys must also include all Forecast objects assigned to the ``reference_forecast`` attributes of the ``forecast_observations``. timezone : str Timezone that data should be converted to costs : tuple of :py:class:`solarforecastarbiter.datamodel.Cost` Costs that are referenced by any pairs. Pairs and costs are matched by the Cost name. Returns ------- list of ProcessedForecastObservation Notes ----- The logic is as follows. For each forecast, observation pair in ``forecast_observations``: 1. Remove observation data points with ``quality_flag`` in filters. Remaining observation series is discontinuous. 2. Fill missing forecast data points according to ``forecast_fill_method``. 3. Fill missing reference forecast data points according to ``forecast_fill_method``. 4. Resample observations to match forecast intervals. A minimum of 10% of the observation intervals within a forecast interval must be valid (not flagged or previously missing) else the resampled observation is NaN. 5. Drop remaining NaN observation and forecast values. 6. Align observations to match forecast times. Observation times for which there is not a matching forecast time are dropped. 7. Create :py:class:`~solarforecastarbiter.datamodel.ProcessedForecastObservation` with resampled, aligned data and metadata. """ # NOQA: E501 if not all([ isinstance(filter_, datamodel.QualityFlagFilter) for filter_ in filters ]): logger.warning( 'Only filtering on Quality Flag is currently implemented') forecast_fill_map = FORECAST_FILL_STRING_MAP.copy() if forecast_fill_method not in forecast_fill_map.keys(): forecast_fill_map.update({ forecast_fill_method: FORECAST_FILL_CONST_STRING.format(forecast_fill_method) }) # NOQA qfilter = _merge_quality_filters(filters) costs_dict = {c.name: c for c in costs} validated_observations = {} processed_fxobs = {} for fxobs in forecast_observations: # validate observation or aggregate data if fxobs.data_object not in validated_observations: try: obs_ser, counts = apply_validation(data[fxobs.data_object], qfilter, exclude) except Exception as e: logger.error('Failed to validate data for %s. %s', fxobs.data_object.name, e) # store empty data in validated_observations preproc_results = (datamodel.PreprocessingResult( name=VALIDATION_RESULT_TOTAL_STRING, count=-1), ) validated_observations[fxobs.data_object] = (pd.Series( [], name='value', index=pd.DatetimeIndex([], name='timestamp', tz='UTC'), dtype=float), (), preproc_results) else: # store validated data in validated_observations val_results = tuple( datamodel.ValidationResult(flag=k, count=v) for k, v in counts.items()) preproc_results = (datamodel.PreprocessingResult( name=VALIDATION_RESULT_TOTAL_STRING, count=(len(data[fxobs.data_object]) - len(obs_ser))), ) validated_observations[fxobs.data_object] = (obs_ser, val_results, preproc_results) obs_ser, val_results, preproc_results = ( validated_observations[fxobs.data_object]) # Apply fill to forecasts fx_ser = data[fxobs.forecast] fx_ser, count = apply_fill(fx_ser, fxobs.forecast, forecast_fill_method, start, end) preproc_results += (datamodel.PreprocessingResult( name=FILL_RESULT_TOTAL_STRING.format( '', forecast_fill_map[forecast_fill_method]), count=int(count)), ) if fxobs.reference_forecast is not None: ref_ser = data[fxobs.reference_forecast] ref_ser, count = apply_fill(ref_ser, fxobs.reference_forecast, forecast_fill_method, start, end) preproc_results += (datamodel.PreprocessingResult( name=FILL_RESULT_TOTAL_STRING.format( "Reference ", forecast_fill_map[forecast_fill_method]), count=int(count)), ) else: ref_ser = None # Resample and align and create processed pair try: forecast_values, observation_values, ref_fx_values, results = \ resample_and_align(fxobs, fx_ser, obs_ser, ref_ser, timezone) preproc_results += tuple( datamodel.PreprocessingResult(name=k, count=int(v)) for k, v in results.items()) except Exception as e: logger.error( 'Failed to resample and align data for pair (%s, %s): %s', fxobs.forecast.name, fxobs.data_object.name, e) else: logger.info('Processed data successfully for pair (%s, %s)', fxobs.forecast.name, fxobs.data_object.name) name = _name_pfxobs(processed_fxobs.keys(), fxobs.forecast) cost_name = fxobs.cost cost = costs_dict.get(cost_name) if cost_name is not None and cost is None: logger.warning( 'Cannot calculate cost metrics for %s, cost parameters ' 'not supplied for cost: %s', name, cost_name) processed = datamodel.ProcessedForecastObservation( name=name, original=fxobs, interval_value_type=fxobs.forecast.interval_value_type, interval_length=fxobs.forecast.interval_length, interval_label=fxobs.forecast.interval_label, valid_point_count=len(forecast_values), validation_results=val_results, preprocessing_results=preproc_results, forecast_values=forecast_values, observation_values=observation_values, reference_forecast_values=ref_fx_values, normalization_factor=fxobs.normalization, uncertainty=fxobs.uncertainty, cost=cost) processed_fxobs[name] = processed return tuple(processed_fxobs.values())
def process_forecast_observations(forecast_observations, filters, data, timezone): """ Convert ForecastObservations into ProcessedForecastObservations applying any filters and resampling to align forecast and observation. Parameters ---------- forecast_observations : list of solarforecastarbiter.datamodel.ForecastObservation, solarforecastarbiter.datamodel.ForecastAggregate Pairs to process filters : list of solarforecastarbiter.datamodel.BaseFilter Filters to apply to each pair. data : dict Dict with keys that are the Forecast/Observation/Aggregate object and values that are the corresponding pandas.Series/DataFrame for the object. Keys must also include all Forecast objects assigned to the ``reference_forecast`` attributes of the ``forecast_observations``. timezone : str Timezone that data should be converted to Returns ------- list of ProcessedForecastObservation """ # NOQA if not all([ isinstance(filter_, datamodel.QualityFlagFilter) for filter_ in filters ]): logger.warning( 'Only filtering on Quality Flag is currently implemented') qfilter = _merge_quality_filters(filters) validated_observations = {} processed_fxobs = {} for fxobs in forecast_observations: # validate observation or aggregate data if fxobs.data_object not in validated_observations: try: obs_ser, counts = apply_validation(data[fxobs.data_object], qfilter, exclude) except Exception as e: logger.error('Failed to validate data for %s. %s', fxobs.data_object.name, e) # store empty data in validated_observations preproc_results = (datamodel.PreprocessingResult( name=VALIDATION_RESULT_TOTAL_STRING, count=-1), ) validated_observations[fxobs.data_object] = (pd.Series( [], name='value', index=pd.DatetimeIndex([], name='timestamp', tz='UTC'), dtype=float), (), preproc_results) else: # store validated data in validated_observations val_results = tuple( datamodel.ValidationResult(flag=k, count=v) for k, v in counts.items()) preproc_results = (datamodel.PreprocessingResult( name=VALIDATION_RESULT_TOTAL_STRING, count=len(data[fxobs.data_object]) - len(obs_ser)), ) validated_observations[fxobs.data_object] = (obs_ser, val_results, preproc_results) obs_ser, val_results, preproc_results = ( validated_observations[fxobs.data_object]) # resample and align observations to forecast, create # ProcessedForecastObservation fx_ser = data[fxobs.forecast] if fxobs.reference_forecast is not None: ref_ser = data[fxobs.reference_forecast] else: ref_ser = None try: forecast_values, observation_values, ref_fx_values, results = \ resample_and_align(fxobs, fx_ser, obs_ser, ref_ser, timezone) preproc_results += tuple( datamodel.PreprocessingResult(name=k, count=v) for k, v in results.items()) except Exception as e: logger.error( 'Failed to resample and align data for pair (%s, %s): %s', fxobs.forecast.name, fxobs.data_object.name, e) else: logger.info('Processed data successfully for pair (%s, %s)', fxobs.forecast.name, fxobs.data_object.name) name = _name_pfxobs(processed_fxobs.keys(), fxobs.forecast.name) processed = datamodel.ProcessedForecastObservation( name=name, original=fxobs, interval_value_type=fxobs.forecast.interval_value_type, interval_length=fxobs.forecast.interval_length, interval_label=fxobs.forecast.interval_label, valid_point_count=len(forecast_values), validation_results=val_results, preprocessing_results=preproc_results, forecast_values=forecast_values, observation_values=observation_values, reference_forecast_values=ref_fx_values, normalization_factor=fxobs.normalization, uncertainty=fxobs.uncertainty) processed_fxobs[name] = processed return tuple(processed_fxobs.values())
def process_forecast_observations(forecast_observations, filters, forecast_fill_method, start, end, data, timezone, costs=tuple()): """ Convert ForecastObservations into ProcessedForecastObservations applying any filters and resampling to align forecast and observation. Parameters ---------- forecast_observations : list of solarforecastarbiter.datamodel.ForecastObservation, solarforecastarbiter.datamodel.ForecastAggregate Pairs to process filters : list of solarforecastarbiter.datamodel.BaseFilter Filters to apply to each pair. forecast_fill_method : str Indicates what process to use for handling missing forecasts. Currently supports : 'drop', 'forward', and bool or numeric value. start : pandas.Timestamp Start date and time for assessing forecast performance. end : pandas.Timestamp End date and time for assessing forecast performance. data : dict Dict with keys that are the Forecast/Observation/Aggregate object and values that are the corresponding pandas.Series/DataFrame for the object. Keys must also include all Forecast objects assigned to the ``reference_forecast`` attributes of the ``forecast_observations``. timezone : str Timezone that data should be converted to costs : tuple of :py:class:`solarforecastarbiter.datamodel.Cost` Costs that are referenced by any pairs. Pairs and costs are matched by the Cost name. Returns ------- tuple of ProcessedForecastObservation Notes ----- In the case where the `interval_label` of the `obs` and `fx` do not match, this function currently returns a `ProcessedForecastObservation` object with a `interval_label` the same as the `fx`, regardless of whether the `interval_length` of the `fx` and `obs` are the same or different. The processing logic is as follows. For each forecast, observation pair in ``forecast_observations``: 1. Fill missing forecast data points according to ``forecast_fill_method``. 2. Fill missing reference forecast data points according to ``forecast_fill_method``. 3. Remove observation data points with ``quality_flag`` in filters. Remaining observation series is discontinuous. 4. Resample observations to match forecast intervals. If at least 10% of the observation intervals within a forecast interval are valid (not missing or matching ``filters``), the interval is value is computed from all subintervals. Otherwise the resampled observation is NaN. 5. Drop NaN observation values. 6. Align observations to match forecast times. Observation times for which there is not a matching forecast time are dropped. 7. Create :py:class:`~solarforecastarbiter.datamodel.ProcessedForecastObservation` with resampled, aligned data and metadata. """ # NOQA: E501 if not all([isinstance(filter_, datamodel.QualityFlagFilter) for filter_ in filters]): logger.warning( 'Only filtering on Quality Flag is currently implemented. ' 'Other filters will be discarded.') filters = [ f for f in filters if isinstance(f, datamodel.QualityFlagFilter)] # create string for tracking forecast fill results. # this approach supports known methods or filling with contant values. forecast_fill_str = FORECAST_FILL_STRING_MAP.get( forecast_fill_method, FORECAST_FILL_CONST_STRING.format(forecast_fill_method) ) costs_dict = {c.name: c for c in costs} # accumulate ProcessedForecastObservations in a dict. # use a dict so we can keep track of existing names and avoid repeats. processed_fxobs = {} for fxobs in forecast_observations: # accumulate PreprocessingResults from various stages in a list preproc_results = [] # extract fx and obs data from data dict try: fx_data = data[fxobs.forecast] except KeyError as e: logger.error( 'Failed to find data for forecast %s: %s', fxobs.forecast.name, e) continue try: obs_data = data[fxobs.data_object] except KeyError as e: logger.error( 'Failed to find data for observation %s: %s', fxobs.data_object.name, e) continue # Apply fill to forecast and reference forecast fx_data, count = apply_fill(fx_data, fxobs.forecast, forecast_fill_method, start, end) preproc_results.append(datamodel.PreprocessingResult( name=FILL_RESULT_TOTAL_STRING.format('', forecast_fill_str), count=int(count))) ref_data = data.get(fxobs.reference_forecast, None) try: check_reference_forecast_consistency(fxobs, ref_data) except ValueError as e: logger.error('Incompatible reference forecast and data: %s', e) continue if fxobs.reference_forecast is not None: ref_data, count = apply_fill(ref_data, fxobs.reference_forecast, forecast_fill_method, start, end) preproc_results.append(datamodel.PreprocessingResult( name=FILL_RESULT_TOTAL_STRING.format( "Reference ", forecast_fill_str), count=int(count))) # filter and resample observation/aggregate data try: forecast_values, observation_values, counts = filter_resample( fxobs, fx_data, obs_data, filters) except Exception as e: # should figure out the specific exception types to catch logger.error( 'Failed to filter and resample data for pair (%s, %s): %s', fxobs.forecast.name, fxobs.data_object.name, e) continue # store validated data in validated_observations val_results = tuple(datamodel.ValidationResult(flag=k, count=int(v)) for k, v in counts.items()) # this count value no longer makes sense because the first object # is at a different interval than the second. # might need to add a 'total' to counts, exclude from the # ValidationResult comprehension above, and use it here. preproc_obs_results = datamodel.PreprocessingResult( name=VALIDATION_RESULT_TOTAL_STRING, count=(len(data[fxobs.data_object]) - len(observation_values))) preproc_results.append(preproc_obs_results) # Align and create processed pair try: forecast_values, observation_values, ref_fx_values, results = \ align(fxobs, forecast_values, observation_values, ref_data, timezone) preproc_results.extend( [datamodel.PreprocessingResult(name=k, count=int(v)) for k, v in results.items()]) except Exception as e: logger.error( 'Failed to align data for pair (%s, %s): %s', fxobs.forecast.name, fxobs.data_object.name, e) continue logger.info('Processed data successfully for pair (%s, %s)', fxobs.forecast.name, fxobs.data_object.name) name = _name_pfxobs(processed_fxobs.keys(), fxobs.forecast) cost_name = fxobs.cost cost = costs_dict.get(cost_name) if cost_name is not None and cost is None: logger.warning( 'Cannot calculate cost metrics for %s, cost parameters ' 'not supplied for cost: %s', name, cost_name) processed = datamodel.ProcessedForecastObservation( name=name, original=fxobs, interval_value_type=fxobs.forecast.interval_value_type, interval_length=fxobs.forecast.interval_length, interval_label=fxobs.forecast.interval_label, valid_point_count=len(forecast_values), validation_results=val_results, preprocessing_results=tuple(preproc_results), forecast_values=forecast_values, observation_values=observation_values, reference_forecast_values=ref_fx_values, normalization_factor=fxobs.normalization, uncertainty=fxobs.uncertainty, cost=cost ) processed_fxobs[name] = processed return tuple(processed_fxobs.values())