Example #1
0
def _process_single_group(session, run_for, group, run_time):
    logger.info('Computing forecasts for group %s at %s', run_for, run_time)
    errors = _verify_nwp_forecasts_compatible(group)
    if errors:
        logger.error(
            'Not all forecasts compatible in group with %s. '
            'The following parameters may differ: %s', run_for, errors)
        return
    try:
        key_fx = group.loc[run_for].forecast
    except KeyError:
        logger.error(
            'Forecast, %s, that others are piggybacking on not '
            'found', run_for)
        return
    model_str = group.loc[run_for].model
    model = getattr(models, model_str)
    issue_time = group.loc[run_for].next_issue_time
    if issue_time is None:
        issue_time = utils.get_next_issue_time(key_fx, run_time)
    try:
        nwp_result = run_nwp(key_fx, model, run_time, issue_time)
    except FileNotFoundError as e:
        logger.error('Could not process group of %s, %s', run_for, str(e))
        return
    for fx_id, fx in group['forecast'].iteritems():
        fx_vals = getattr(nwp_result, fx.variable)
        if fx_vals is None:
            logger.warning('No forecast produced for %s in group with %s',
                           fx_id, run_for)
            continue
        logger.info('Posting values %s for %s:%s issued at %s', len(fx_vals),
                    fx.name, fx_id, issue_time)
        _post_forecast_values(session, fx, fx_vals, model_str)
def test_get_next_issue_time(single_forecast, runtime, expected):
    fx = replace(single_forecast,
                 issue_time_of_day=dt.time(5, 0),
                 run_length=pd.Timedelta('6h'),
                 lead_time_to_start=pd.Timedelta('1h'))
    out = utils.get_next_issue_time(fx, runtime)
    assert out == expected
def find_reference_nwp_forecasts(forecasts, run_time=None):
    """
    Sort through all *forecasts* to find those that should be generated
    by the Arbiter from NWP models. The forecast must have a *model* key
    in *extra_parameters* (formatted as a JSON string). If *piggyback_on*
    is also defined in *extra_parameters*, it should be the forecast_id
    of another forecast that has the same parameters, including site,
    except the variable.

    Parameters
    ----------
    forecasts : list of datamodel.Forecasts
        The forecasts that should be filtered to find references.
    run_time : pandas.Timestamp or None, default None
        The run_time of that forecast generation is taking place. If not
        None, the next issue time for each forecast is added to the output.

    Returns
    -------
    pandas.DataFrame
        NWP reference forecasts with index of forecast_id and columns
        (forecast, piggyback_on, model, next_issue_time).
    """
    df_vals = []
    for fx in forecasts:
        # more explicit than filter()
        if not _is_reference_forecast(fx.extra_parameters):
            logger.debug('Forecast %s is not labeled as a reference forecast',
                         fx.forecast_id)
            continue

        try:
            extra_parameters = json.loads(fx.extra_parameters)
        except json.JSONDecodeError:
            logger.warning(
                'Failed to decode extra_parameters for %s: %s as JSON',
                fx.name, fx.forecast_id)
            continue

        try:
            model = extra_parameters['model']
        except KeyError:
            logger.error(
                'Forecast, %s: %s, has no model. Cannot make forecast.',
                fx.name, fx.forecast_id)
            continue

        if run_time is not None:
            next_issue_time = utils.get_next_issue_time(fx, run_time)
        else:
            next_issue_time = None
        piggyback_on = extra_parameters.get('piggyback_on', fx.forecast_id)
        df_vals.append((fx.forecast_id, fx, piggyback_on, model,
                        next_issue_time))

    forecast_df = pd.DataFrame(
        df_vals, columns=['forecast_id', 'forecast', 'piggyback_on', 'model',
                          'next_issue_time']
        ).set_index('forecast_id')
    return forecast_df
Example #4
0
def _nwp_issue_time_generator(fx, gap_start, gap_end):
    # max_run_time is the forecast issue time that will generate forecast
    # that end before gap_end
    max_run_time = utils.find_next_issue_time_from_last_forecast(
        fx, gap_end - pd.Timedelta('1ns'))
    # next_issue_time is the forecast issue time that will generate forecast
    # values at gap_start
    next_issue_time = utils.find_next_issue_time_from_last_forecast(
        fx, gap_start)
    while next_issue_time < max_run_time:
        yield next_issue_time
        next_issue_time = utils.get_next_issue_time(
            fx, next_issue_time + pd.Timedelta('1ns'))
def restrict_forecast_upload_window(extra_parameters, get_forecast,
                                    first_time):
    """
    Check that the first_time falls within the window before the
    next initialization time of the forecast from the current time.
    Accounts for forecast lead_time_to_start and interval_label.
    Requires 'read' permission on the forecast in question.

    Parameters
    ----------
    extra_parameters : str
        The extra_parameters string for the forecast. If
        '"restrict_upload": true' is not found in the string, no restriction
        occurs and this function returns immediately.
    get_forecast : func
        Function to get the forecast from the database.
    first_time : datetime-like
        First timestamp in the posted forecast timeseries.

    Raises
    ------
    NotFoundException
        When the user does not have 'read' permission for the forecast or
        it doesn't exist.
    BadAPIRequest
        If the first_time of the timeseries is not consistent for the
        next initaliziation time of the forecast.
    """
    if not _restrict_in_extra(extra_parameters):
        return

    try:
        fx_dict = get_forecast().copy()
    except (StorageAuthError, NotFoundException):
        raise NotFoundException(
            errors={'404': 'Cannot read forecast or forecast does not exist'})
    # we don't care about the axis or constant values for probabilistic
    fx_dict['site'] = Site('name', 0, 0, 0, 'UTC')
    fx = Forecast.from_dict(fx_dict)
    next_issue_time = fx_utils.get_next_issue_time(fx,
                                                   _current_utc_timestamp())
    expected_start = next_issue_time + fx.lead_time_to_start
    if fx.interval_label == 'ending':
        expected_start += fx.interval_length
    if first_time != expected_start:
        raise BadAPIRequest(
            errors={
                'issue_time':
                (f'Currently only accepting forecasts issued for {next_issue_time}.'
                 f' Expecting forecast series to start at {expected_start}.')
            })
Example #6
0
def process_nwp_forecast_groups(session, run_time, forecast_df):
    """
    Groups NWP forecasts based on piggyback_on, calculates the forecast as
    appropriate for *run_time*, and uploads the values to the API.

    Parameters
    ----------
    session : io.api.APISession
        API session for uploading forecast values
    run_time : pandas.Timestamp
        Run time of the forecast. Also used along with the forecast metadata
        to determine the issue_time of the forecast.
    forecast_df : pandas.DataFrame
        Dataframe of the forecast objects as procduced by
        :py:func:`solarforecastarbiter.reference_forecasts.main.find_reference_nwp_forecasts`.
    """  # NOQA
    for run_for, group in forecast_df.groupby('piggyback_on'):
        logger.info('Computing forecasts for group %s', run_for)
        errors = _verify_nwp_forecasts_compatible(group)
        if errors:
            logger.error(
                'Not all forecasts compatible in group with %s. '
                'The following parameters may differ: %s', run_for, errors)
            continue
        try:
            key_fx = group.loc[run_for].forecast
        except KeyError:
            logger.error(
                'Forecast, %s,  that others are piggybacking on not '
                'found', run_for)
            continue
        model_str = group.loc[run_for].model
        model = getattr(models, model_str)
        issue_time = group.loc[run_for].next_issue_time
        if issue_time is None:
            issue_time = utils.get_next_issue_time(key_fx, run_time)
        try:
            nwp_result = run_nwp(key_fx, model, run_time, issue_time)
        except FileNotFoundError as e:
            logger.error('Could not process group of %s, %s', run_for, str(e))
            continue
        for fx_id, fx in group['forecast'].iteritems():
            fx_vals = getattr(nwp_result, fx.variable)
            if fx_vals is None:
                logger.warning('No forecast produced for %s in group with %s',
                               fx_id, run_for)
                continue
            logger.info('Posting values %s for %s:%s issued at %s',
                        len(fx_vals), fx.name, fx_id, issue_time)
            _post_forecast_values(session, fx, fx_vals, model_str)
Example #7
0
def _issue_time_generator(observation, fx, obs_mint, obs_maxt, next_issue_time,
                          max_run_time):
    # now find all the run times that can be made based on the
    # last observation timestamp
    while next_issue_time <= max_run_time:
        data_start, data_end = utils.get_data_start_end(
            observation, fx, next_issue_time, next_issue_time)
        if data_end > obs_maxt:
            break

        if data_start > obs_mint:
            yield next_issue_time
        next_issue_time = utils.get_next_issue_time(
            fx, next_issue_time + pd.Timedelta('1ns'))
Example #8
0
def generate_reference_persistence_forecast_parameters(session, forecasts,
                                                       observations,
                                                       max_run_time):
    """Sort through all *forecasts* to find those that should be generated
    by the Arbiter from persisting Observation values. The forecast
    must have ``'is_reference_persistence_forecast': true`` and an
    observation_id in Forecast.extra_parameters (formatted as a JSON
    string). A boolean value for "index_persistence" in
    Forecast.extra_parameters controls whether the persistence
    forecast should be made adjusting for clear-sky/AC power index or
    not.

    Parameters
    ----------
    session : solarforecastarbiter.io.api.APISession
    forecasts : list of datamodel.Forecasts
        The forecasts that should be filtered to find references.
    observations : list of datamodel.Observations
        Observations that will are available to use to fetch values
        and make persistence forecasts.
    max_run_time : pandas.Timestamp
        The maximum run time/issue time for any forecasts. Usually now.

    Returns
    -------
    generator of (Forecast, Observation, next_issue_time, index)

    """
    user_info = session.get_user_info()
    observation_dict = {obs.observation_id: obs for obs in observations}
    for fx in forecasts:
        if not _is_reference_persistence_forecast(fx.extra_parameters):
            logger.debug(
                'Forecast %s is not labeled as a reference '
                'persistence forecast', fx.forecast_id)
            continue

        if not fx.provider == user_info['organization']:
            logger.debug("Forecast %s is not in user's organization",
                         fx.forecast_id)
            continue

        try:
            extra_parameters = json.loads(fx.extra_parameters)
        except json.JSONDecodeError:
            logger.warning(
                'Failed to decode extra_parameters for %s: %s as JSON',
                fx.name, fx.forecast_id)
            continue

        try:
            observation_id = extra_parameters['observation_id']
        except KeyError:
            logger.error(
                'Forecast, %s: %s, has no observation_id to base forecasts'
                ' off of. Cannot make persistence forecast.', fx.name,
                fx.forecast_id)
            continue
        if observation_id not in observation_dict:
            logger.error(
                'Observation %s not in set of given observations.'
                ' Cannot generate persistence forecast for %s: %s.',
                observation_id, fx.name, fx.forecast_id)
            continue
        observation = observation_dict[observation_id]

        index = extra_parameters.get('index_persistence', False)
        obs_mint, obs_maxt = session.get_observation_time_range(observation_id)
        if pd.isna(obs_maxt):  # no observations to use anyway
            logger.info(
                'No observation values to use for %s: %s from observation %s',
                fx.name, fx.forecast_id, observation_id)
            continue

        fx_mint, fx_maxt = session.get_forecast_time_range(fx.forecast_id)
        # find the next issue time for the forecast based on the last value
        # in the forecast series
        if pd.isna(fx_maxt):
            # if there is no forecast yet, go back a bit from the last
            # observation. Don't use the start of observations, since it
            # could really stress the workers if we have a few years of
            # data before deciding to make a persistence fx
            next_issue_time = utils.get_next_issue_time(
                fx, obs_maxt - fx.run_length)
        else:
            next_issue_time = utils.find_next_issue_time_from_last_forecast(
                fx, fx_maxt)

        # now find all the run times that can be made based on the
        # last observation timestamp
        while next_issue_time <= max_run_time:
            data_start, data_end = utils.get_data_start_end(
                observation, fx, next_issue_time)
            if data_end > obs_maxt:
                break

            if data_start > obs_mint:
                yield (fx, observation, next_issue_time, index)
            next_issue_time = utils.get_next_issue_time(
                fx, next_issue_time + pd.Timedelta('1ns'))
Example #9
0
def generate_reference_persistence_forecast_parameters(session, forecasts,
                                                       observations,
                                                       max_run_time):
    """Sort through all *forecasts* to find those that should be generated
    by the Arbiter from persisting Observation values. The forecast
    must have ``'is_reference_persistence_forecast': true`` and an
    observation_id in Forecast.extra_parameters (formatted as a JSON
    string). A boolean value for "index_persistence" in
    Forecast.extra_parameters controls whether the persistence
    forecast should be made adjusting for clear-sky/AC power index or
    not.

    Parameters
    ----------
    session : solarforecastarbiter.io.api.APISession
    forecasts : list of datamodel.Forecasts
        The forecasts that should be filtered to find references.
    observations : list of datamodel.Observations
        Observations that will are available to use to fetch values
        and make persistence forecasts.
    max_run_time : pandas.Timestamp
        The maximum run time/issue time for any forecasts. Usually now.

    Returns
    -------
    generator of (Forecast, Observation, index, data_start, issue_times)
    """
    user_info = session.get_user_info()
    observation_dict = {obs.observation_id: obs for obs in observations}
    out = namedtuple(
        'PersistenceParameters',
        ['forecast', 'observation', 'index', 'data_start', 'issue_times'])

    for fx in forecasts:
        obs_ind_mint_maxt = _ref_persistence_check(fx, observation_dict,
                                                   user_info, session)
        if obs_ind_mint_maxt is None:
            continue
        observation, index, obs_mint, obs_maxt = obs_ind_mint_maxt
        # probably split this out to generate issues times for only gaps vs
        # latest
        if isinstance(fx, datamodel.ProbabilisticForecast):
            fx_mint, fx_maxt = \
                session.get_probabilistic_forecast_constant_value_time_range(
                    fx.constant_values[0].forecast_id)
        else:
            fx_mint, fx_maxt = session.get_forecast_time_range(fx.forecast_id)
        # find the next issue time for the forecast based on the last value
        # in the forecast series
        if pd.isna(fx_maxt):
            # if there is no forecast yet, go back a bit from the last
            # observation. Don't use the start of observations, since it
            # could really stress the workers if we have a few years of
            # data before deciding to make a persistence fx
            next_issue_time = utils.get_next_issue_time(
                fx, obs_maxt - fx.run_length)
        else:
            next_issue_time = utils.find_next_issue_time_from_last_forecast(
                fx, fx_maxt)

        data_start, _ = utils.get_data_start_end(observation, fx,
                                                 next_issue_time,
                                                 next_issue_time)
        issue_times = tuple(
            _issue_time_generator(observation, fx, obs_mint, obs_maxt,
                                  next_issue_time, max_run_time))

        if len(issue_times) == 0:
            continue

        yield out(fx, observation, index, data_start, issue_times)