Esempio n. 1
0
def test_render_pdf_special_chars(
        ac_power_observation_metadata, ac_power_forecast_metadata, dash_url,
        fail_pdf, preprocessing_result_types, report_metrics):
    if shutil.which('pdflatex') is None:  # pragma: no cover
        pytest.skip('pdflatex must be on PATH to generate PDF reports')
    quality_flag_filter = datamodel.QualityFlagFilter(
        (
            "USER FLAGGED",
        )
    )
    forecast = ac_power_forecast_metadata.replace(
        name="ac_power forecast (why,)  ()'-_,")
    observation = ac_power_observation_metadata.replace(
        name="ac_power observations  ()'-_,")
    fxobs = datamodel.ForecastObservation(forecast,
                                          observation)
    tz = 'America/Phoenix'
    start = pd.Timestamp('20190401 0000', tz=tz)
    end = pd.Timestamp('20190404 2359', tz=tz)
    report_params = datamodel.ReportParameters(
        name="NREL MIDC OASIS GHI Forecast Analysis  ()'-_,",
        start=start,
        end=end,
        object_pairs=(fxobs,),
        metrics=("mae", "rmse", "mbe", "s"),
        categories=("total", "date", "hour"),
        filters=(quality_flag_filter,)
    )
    report = datamodel.Report(
        report_id="56c67770-9832-11e9-a535-f4939feddd83",
        report_parameters=report_params
    )
    qflags = list(
        f.quality_flags for f in report.report_parameters.filters if
        isinstance(f, datamodel.QualityFlagFilter)
    )
    qflags = list(qflags[0])
    ser_index = pd.date_range(
        start, end,
        freq=to_offset(forecast.interval_length),
        name='timestamp')
    ser = pd.Series(
        np.repeat(100, len(ser_index)), name='value',
        index=ser_index)
    pfxobs = datamodel.ProcessedForecastObservation(
        forecast.name,
        fxobs,
        forecast.interval_value_type,
        forecast.interval_length,
        forecast.interval_label,
        valid_point_count=len(ser),
        validation_results=tuple(datamodel.ValidationResult(
            flag=f, count=0) for f in qflags),
        preprocessing_results=tuple(datamodel.PreprocessingResult(
            name=t, count=0) for t in preprocessing_result_types),
        forecast_values=ser,
        observation_values=ser
    )

    figs = datamodel.RawReportPlots(
        (
            datamodel.PlotlyReportFigure.from_dict(
                {
                    'name': 'mae tucson ac_power',
                    'spec': '{"data":[{"x":[1],"y":[1],"type":"bar"}]}',
                    'pdf': fail_pdf,
                    'figure_type': 'bar',
                    'category': 'total',
                    'metric': 'mae',
                    'figure_class': 'plotly',
                }
            ),), '4.5.3',
    )
    raw = datamodel.RawReport(
        generated_at=report.report_parameters.end,
        timezone=tz,
        plots=figs,
        metrics=report_metrics(report),
        processed_forecasts_observations=(pfxobs,),
        versions=(('test',  'test_with_underscore?'),),
        messages=(datamodel.ReportMessage(
            message="Failed to make metrics for ac_power forecast ()'-_,",
            step='', level='', function=''),))
    rr = report.replace(raw_report=raw)
    rendered = template.render_pdf(rr, dash_url)
    assert rendered.startswith(b'%PDF')
def process_forecast_observations(forecast_observations, filters, data,
                                  timezone):
    """
    Convert ForecastObservations into ProcessedForecastObservations
    applying any filters and resampling to align forecast and observation.

    Parameters
    ----------
    forecast_observations : list of solarforecastarbiter.datamodel.ForecastObservation, solarforecastarbiter.datamodel.ForecastAggregate
        Pairs to process
    filters : list of solarforecastarbiter.datamodel.BaseFilter
        Filters to apply to each pair.
    data : dict
        Dict with keys that are the Forecast/Observation/Aggregate object
        and values that are the corresponding pandas.Series/DataFrame for
        the object. Keys must also include all Forecast objects assigned
        to the ``reference_forecast`` attributes of the
        ``forecast_observations``.
    timezone : str
        Timezone that data should be converted to

    Returns
    -------
    list of ProcessedForecastObservation
    """  # NOQA
    if not all([
            isinstance(filter_, datamodel.QualityFlagFilter)
            for filter_ in filters
    ]):
        logger.warning(
            'Only filtering on Quality Flag is currently implemented')
    qfilter = _merge_quality_filters(filters)
    validated_observations = {}
    processed_fxobs = {}
    for fxobs in forecast_observations:
        # validate observation or aggregate data
        if fxobs.data_object not in validated_observations:
            try:
                obs_ser, counts = apply_validation(data[fxobs.data_object],
                                                   qfilter, exclude)
            except Exception as e:
                logger.error('Failed to validate data for %s. %s',
                             fxobs.data_object.name, e)
                # store empty data in validated_observations
                preproc_results = (datamodel.PreprocessingResult(
                    name=VALIDATION_RESULT_TOTAL_STRING, count=-1), )
                validated_observations[fxobs.data_object] = (pd.Series(
                    [],
                    name='value',
                    index=pd.DatetimeIndex([], name='timestamp', tz='UTC'),
                    dtype=float), (), preproc_results)
            else:
                # store validated data in validated_observations
                val_results = tuple(
                    datamodel.ValidationResult(flag=k, count=v)
                    for k, v in counts.items())
                preproc_results = (datamodel.PreprocessingResult(
                    name=VALIDATION_RESULT_TOTAL_STRING,
                    count=len(data[fxobs.data_object]) - len(obs_ser)), )
                validated_observations[fxobs.data_object] = (obs_ser,
                                                             val_results,
                                                             preproc_results)

        obs_ser, val_results, preproc_results = (
            validated_observations[fxobs.data_object])

        # resample and align observations to forecast, create
        # ProcessedForecastObservation
        fx_ser = data[fxobs.forecast]
        if fxobs.reference_forecast is not None:
            ref_ser = data[fxobs.reference_forecast]
        else:
            ref_ser = None
        try:
            forecast_values, observation_values, ref_fx_values, results = \
                resample_and_align(fxobs, fx_ser, obs_ser, ref_ser, timezone)
            preproc_results += tuple(
                datamodel.PreprocessingResult(name=k, count=v)
                for k, v in results.items())
        except Exception as e:
            logger.error(
                'Failed to resample and align data for pair (%s, %s): %s',
                fxobs.forecast.name, fxobs.data_object.name, e)
        else:
            logger.info('Processed data successfully for pair (%s, %s)',
                        fxobs.forecast.name, fxobs.data_object.name)
            name = _name_pfxobs(processed_fxobs.keys(), fxobs.forecast.name)
            processed = datamodel.ProcessedForecastObservation(
                name=name,
                original=fxobs,
                interval_value_type=fxobs.forecast.interval_value_type,
                interval_length=fxobs.forecast.interval_length,
                interval_label=fxobs.forecast.interval_label,
                valid_point_count=len(forecast_values),
                validation_results=val_results,
                preprocessing_results=preproc_results,
                forecast_values=forecast_values,
                observation_values=observation_values,
                reference_forecast_values=ref_fx_values,
                normalization_factor=fxobs.normalization,
                uncertainty=fxobs.uncertainty)
            processed_fxobs[name] = processed
    return tuple(processed_fxobs.values())
Esempio n. 3
0
def process_forecast_observations(forecast_observations,
                                  filters,
                                  forecast_fill_method,
                                  start,
                                  end,
                                  data,
                                  timezone,
                                  costs=tuple()):
    """
    Convert ForecastObservations into ProcessedForecastObservations
    applying any filters and resampling to align forecast and observation.

    Parameters
    ----------
    forecast_observations : list of solarforecastarbiter.datamodel.ForecastObservation, solarforecastarbiter.datamodel.ForecastAggregate
        Pairs to process
    filters : list of solarforecastarbiter.datamodel.BaseFilter
        Filters to apply to each pair.
    forecast_fill_method : str
        Indicates what process to use for handling missing forecasts.
        Currently supports : 'drop', 'forward', and bool or numeric value.
    start : pandas.Timestamp
        Start date and time for assessing forecast performance.
    end : pandas.Timestamp
        End date and time for assessing forecast performance.
    data : dict
        Dict with keys that are the Forecast/Observation/Aggregate object
        and values that are the corresponding pandas.Series/DataFrame for
        the object. Keys must also include all Forecast objects assigned
        to the ``reference_forecast`` attributes of the
        ``forecast_observations``.
    timezone : str
        Timezone that data should be converted to
    costs : tuple of :py:class:`solarforecastarbiter.datamodel.Cost`
        Costs that are referenced by any pairs. Pairs and costs are matched
        by the Cost name.

    Returns
    -------
    list of ProcessedForecastObservation

    Notes
    -----
    The logic is as follows.

    For each forecast, observation pair in ``forecast_observations``:

      1. Remove observation data points with ``quality_flag`` in filters.
         Remaining observation series is discontinuous.
      2. Fill missing forecast data points according to
         ``forecast_fill_method``.
      3. Fill missing reference forecast data points according to
         ``forecast_fill_method``.
      4. Resample observations to match forecast intervals. A minimum of 10% of
         the observation intervals within a forecast interval must be valid
         (not flagged or previously missing) else the resampled observation is
         NaN.
      5. Drop remaining NaN observation and forecast values.
      6. Align observations to match forecast times. Observation times for
         which there is not a matching forecast time are dropped.
      7. Create :py:class:`~solarforecastarbiter.datamodel.ProcessedForecastObservation`
         with resampled, aligned data and metadata.
    """  # NOQA: E501
    if not all([
            isinstance(filter_, datamodel.QualityFlagFilter)
            for filter_ in filters
    ]):
        logger.warning(
            'Only filtering on Quality Flag is currently implemented')
    forecast_fill_map = FORECAST_FILL_STRING_MAP.copy()
    if forecast_fill_method not in forecast_fill_map.keys():
        forecast_fill_map.update({
            forecast_fill_method:
            FORECAST_FILL_CONST_STRING.format(forecast_fill_method)
        })  # NOQA
    qfilter = _merge_quality_filters(filters)
    costs_dict = {c.name: c for c in costs}
    validated_observations = {}
    processed_fxobs = {}
    for fxobs in forecast_observations:
        # validate observation or aggregate data
        if fxobs.data_object not in validated_observations:
            try:
                obs_ser, counts = apply_validation(data[fxobs.data_object],
                                                   qfilter, exclude)
            except Exception as e:
                logger.error('Failed to validate data for %s. %s',
                             fxobs.data_object.name, e)
                # store empty data in validated_observations
                preproc_results = (datamodel.PreprocessingResult(
                    name=VALIDATION_RESULT_TOTAL_STRING, count=-1), )
                validated_observations[fxobs.data_object] = (pd.Series(
                    [],
                    name='value',
                    index=pd.DatetimeIndex([], name='timestamp', tz='UTC'),
                    dtype=float), (), preproc_results)
            else:
                # store validated data in validated_observations
                val_results = tuple(
                    datamodel.ValidationResult(flag=k, count=v)
                    for k, v in counts.items())
                preproc_results = (datamodel.PreprocessingResult(
                    name=VALIDATION_RESULT_TOTAL_STRING,
                    count=(len(data[fxobs.data_object]) - len(obs_ser))), )
                validated_observations[fxobs.data_object] = (obs_ser,
                                                             val_results,
                                                             preproc_results)

        obs_ser, val_results, preproc_results = (
            validated_observations[fxobs.data_object])

        # Apply fill to forecasts
        fx_ser = data[fxobs.forecast]
        fx_ser, count = apply_fill(fx_ser, fxobs.forecast,
                                   forecast_fill_method, start, end)
        preproc_results += (datamodel.PreprocessingResult(
            name=FILL_RESULT_TOTAL_STRING.format(
                '', forecast_fill_map[forecast_fill_method]),
            count=int(count)), )
        if fxobs.reference_forecast is not None:
            ref_ser = data[fxobs.reference_forecast]
            ref_ser, count = apply_fill(ref_ser, fxobs.reference_forecast,
                                        forecast_fill_method, start, end)
            preproc_results += (datamodel.PreprocessingResult(
                name=FILL_RESULT_TOTAL_STRING.format(
                    "Reference ", forecast_fill_map[forecast_fill_method]),
                count=int(count)), )
        else:
            ref_ser = None

        # Resample and align and create processed pair
        try:
            forecast_values, observation_values, ref_fx_values, results = \
                resample_and_align(fxobs, fx_ser, obs_ser, ref_ser, timezone)
            preproc_results += tuple(
                datamodel.PreprocessingResult(name=k, count=int(v))
                for k, v in results.items())
        except Exception as e:
            logger.error(
                'Failed to resample and align data for pair (%s, %s): %s',
                fxobs.forecast.name, fxobs.data_object.name, e)
        else:
            logger.info('Processed data successfully for pair (%s, %s)',
                        fxobs.forecast.name, fxobs.data_object.name)
            name = _name_pfxobs(processed_fxobs.keys(), fxobs.forecast)
            cost_name = fxobs.cost
            cost = costs_dict.get(cost_name)
            if cost_name is not None and cost is None:
                logger.warning(
                    'Cannot calculate cost metrics for %s, cost parameters '
                    'not supplied for cost: %s', name, cost_name)
            processed = datamodel.ProcessedForecastObservation(
                name=name,
                original=fxobs,
                interval_value_type=fxobs.forecast.interval_value_type,
                interval_length=fxobs.forecast.interval_length,
                interval_label=fxobs.forecast.interval_label,
                valid_point_count=len(forecast_values),
                validation_results=val_results,
                preprocessing_results=preproc_results,
                forecast_values=forecast_values,
                observation_values=observation_values,
                reference_forecast_values=ref_fx_values,
                normalization_factor=fxobs.normalization,
                uncertainty=fxobs.uncertainty,
                cost=cost)
            processed_fxobs[name] = processed
    return tuple(processed_fxobs.values())
def process_forecast_observations(forecast_observations,
                                  filters,
                                  forecast_fill_method,
                                  start,
                                  end,
                                  data,
                                  timezone,
                                  costs=tuple()):
    """
    Convert ForecastObservations into ProcessedForecastObservations
    applying any filters and resampling to align forecast and observation.

    Parameters
    ----------
    forecast_observations : list of solarforecastarbiter.datamodel.ForecastObservation, solarforecastarbiter.datamodel.ForecastAggregate
        Pairs to process
    filters : list of solarforecastarbiter.datamodel.BaseFilter
        Filters to apply to each pair.
    forecast_fill_method : str
        Indicates what process to use for handling missing forecasts.
        Currently supports : 'drop', 'forward', and bool or numeric value.
    start : pandas.Timestamp
        Start date and time for assessing forecast performance.
    end : pandas.Timestamp
        End date and time for assessing forecast performance.
    data : dict
        Dict with keys that are the Forecast/Observation/Aggregate object
        and values that are the corresponding pandas.Series/DataFrame for
        the object. Keys must also include all Forecast objects assigned
        to the ``reference_forecast`` attributes of the
        ``forecast_observations``.
    timezone : str
        Timezone that data should be converted to
    costs : tuple of :py:class:`solarforecastarbiter.datamodel.Cost`
        Costs that are referenced by any pairs. Pairs and costs are matched
        by the Cost name.

    Returns
    -------
    tuple of ProcessedForecastObservation

    Notes
    -----
    In the case where the `interval_label` of the `obs` and `fx` do not
    match, this function currently returns a
    `ProcessedForecastObservation` object with a `interval_label` the
    same as the `fx`, regardless of whether the `interval_length` of the
    `fx` and `obs` are the same or different.

    The processing logic is as follows. For each forecast, observation
    pair in ``forecast_observations``:

      1. Fill missing forecast data points according to
         ``forecast_fill_method``.
      2. Fill missing reference forecast data points according to
         ``forecast_fill_method``.
      3. Remove observation data points with ``quality_flag`` in
         filters. Remaining observation series is discontinuous.
      4. Resample observations to match forecast intervals. If at least
         10% of the observation intervals within a forecast interval are
         valid (not missing or matching ``filters``), the interval is
         value is computed from all subintervals. Otherwise the
         resampled observation is NaN.
      5. Drop NaN observation values.
      6. Align observations to match forecast times. Observation times
         for which there is not a matching forecast time are dropped.
      7. Create
         :py:class:`~solarforecastarbiter.datamodel.ProcessedForecastObservation`
         with resampled, aligned data and metadata.
    """  # NOQA: E501
    if not all([
            isinstance(filter_, datamodel.QualityFlagFilter)
            for filter_ in filters
    ]):
        logger.warning(
            'Only filtering on Quality Flag is currently implemented. '
            'Other filters will be discarded.')
        filters = [
            f for f in filters if isinstance(f, datamodel.QualityFlagFilter)
        ]
    # create string for tracking forecast fill results.
    # this approach supports known methods or filling with contant values.
    forecast_fill_str = FORECAST_FILL_STRING_MAP.get(
        forecast_fill_method,
        FORECAST_FILL_CONST_STRING.format(forecast_fill_method))
    costs_dict = {c.name: c for c in costs}
    # accumulate ProcessedForecastObservations in a dict.
    # use a dict so we can keep track of existing names and avoid repeats.
    processed_fxobs = {}
    for fxobs in forecast_observations:
        # accumulate PreprocessingResults from various stages in a list
        preproc_results = []

        # extract fx and obs data from data dict
        try:
            fx_data = data[fxobs.forecast]
        except KeyError as e:
            logger.error('Failed to find data for forecast %s: %s',
                         fxobs.forecast.name, e)
            continue

        try:
            obs_data = data[fxobs.data_object]
        except KeyError as e:
            logger.error('Failed to find data for observation %s: %s',
                         fxobs.data_object.name, e)
            continue

        # Apply fill to forecast and reference forecast
        fx_data, count = apply_fill(fx_data, fxobs.forecast,
                                    forecast_fill_method, start, end)
        preproc_results.append(
            datamodel.PreprocessingResult(name=FILL_RESULT_TOTAL_STRING.format(
                '', forecast_fill_str),
                                          count=int(count)))

        ref_data = data.get(fxobs.reference_forecast, None)
        try:
            check_reference_forecast_consistency(fxobs, ref_data)
        except ValueError as e:
            logger.error('Incompatible reference forecast and data: %s', e)
            continue

        if fxobs.reference_forecast is not None:
            ref_data, count = apply_fill(ref_data, fxobs.reference_forecast,
                                         forecast_fill_method, start, end)
            preproc_results.append(
                datamodel.PreprocessingResult(
                    name=FILL_RESULT_TOTAL_STRING.format(
                        "Reference ", forecast_fill_str),
                    count=int(count)))

        # filter and resample observation/aggregate data
        try:
            forecast_values, observation_values, val_results = filter_resample(
                fxobs, fx_data, obs_data, filters)
        except Exception as e:
            # should figure out the specific exception types to catch
            logger.error(
                'Failed to filter and resample data for pair (%s, %s): %s',
                fxobs.forecast.name, fxobs.data_object.name, e)
            continue

        # the total count ultimately shows up in both the validation
        # results table and the preprocessing summary table.
        total_discard_before_resample = _search_validation_results(
            val_results, 'TOTAL DISCARD BEFORE RESAMPLE')
        if total_discard_before_resample is None:
            logger.warning(
                'TOTAL DISCARD BEFORE RESAMPLE not available for pair '
                '(%s, %s)', fxobs.forecast.name, fxobs.data_object.name)
        else:
            preproc_results.append(
                datamodel.PreprocessingResult(
                    name='Observation Values Discarded Before Resampling',
                    count=int(total_discard_before_resample)))

        total_discard_after_resample = _search_validation_results(
            val_results, 'TOTAL DISCARD AFTER RESAMPLE')
        if total_discard_after_resample is None:
            logger.warning(
                'TOTAL DISCARD AFTER RESAMPLE not available for pair (%s, %s)',
                fxobs.forecast.name, fxobs.data_object.name)
        else:
            preproc_results.append(
                datamodel.PreprocessingResult(
                    name='Resampled Observation Values Discarded',
                    count=int(total_discard_after_resample)))

        # Align and create processed pair
        try:
            forecast_values, observation_values, ref_fx_values, results = \
                align(fxobs, forecast_values, observation_values, ref_data,
                      timezone)
            preproc_results.extend([
                datamodel.PreprocessingResult(name=k, count=int(v))
                for k, v in results.items()
            ])
        except Exception as e:
            logger.error('Failed to align data for pair (%s, %s): %s',
                         fxobs.forecast.name, fxobs.data_object.name, e)
            continue

        logger.info('Processed data successfully for pair (%s, %s)',
                    fxobs.forecast.name, fxobs.data_object.name)
        name = _name_pfxobs(processed_fxobs.keys(), fxobs.forecast)
        cost_name = fxobs.cost
        cost = costs_dict.get(cost_name)
        if cost_name is not None and cost is None:
            logger.warning(
                'Cannot calculate cost metrics for %s, cost parameters '
                'not supplied for cost: %s', name, cost_name)
        processed = datamodel.ProcessedForecastObservation(
            name=name,
            original=fxobs,
            interval_value_type=fxobs.forecast.interval_value_type,
            interval_length=fxobs.forecast.interval_length,
            interval_label=fxobs.forecast.interval_label,
            valid_point_count=len(forecast_values),
            validation_results=val_results,
            preprocessing_results=tuple(preproc_results),
            forecast_values=forecast_values,
            observation_values=observation_values,
            reference_forecast_values=ref_fx_values,
            normalization_factor=fxobs.normalization,
            uncertainty=fxobs.uncertainty,
            cost=cost)
        processed_fxobs[name] = processed
    return tuple(processed_fxobs.values())