コード例 #1
0
def test_rolling_forecast_with_refitting(caplog):
    """Also rolling forecasting, but with re-fitting the model in between.
    We'll test if the expected number of re-fittings happened.
    Also, the model we end up with should not be the one we started with."""
    caplog.set_level(logging.DEBUG, logger="timetomodel.forecasting")
    model, specs = test_utils.create_dummy_model_state(
        DATA_START, data_range_in_hours=192
    ).split()
    start = DATA_START + timedelta(hours=70)
    end = DATA_START + timedelta(hours=190)
    forecasts, final_model_state = forecasting.make_rolling_forecasts(start, end, specs)
    expected_values = specs.outcome_var.load_series(
        time_window=(start, end),
        expected_frequency=timedelta(hours=1),
        check_time_window=True,
    ).loc[start:end][:-1]
    for forecast, expected_value in zip(forecasts, expected_values):
        assert abs(forecast - expected_value) < TOLERANCE
    refitting_logs = [
        log for log in caplog.records if "Fitting new model" in log.message
    ]
    remodel_frequency_in_hours = int(specs.remodel_frequency.total_seconds() / 3600)
    expected_log_times = [remodel_frequency_in_hours]
    while max(expected_log_times) < 190:
        expected_log_times.append(max(expected_log_times) + remodel_frequency_in_hours)
    assert len(refitting_logs) == len([elt for elt in expected_log_times if elt >= 70])
    assert model is not final_model_state.model
コード例 #2
0
def test_rolling_forecast():
    """Using the simple linear model, create a rolling forecast"""
    model, specs = test_utils.create_dummy_model_state(
        DATA_START, data_range_in_hours=24).split()
    start = DATA_START + timedelta(hours=18)
    end = DATA_START + timedelta(hours=20)
    forecasts = forecasting.make_rolling_forecasts(start, end, specs)[0]
    expected_values = specs.outcome_var.load_series(
        expected_frequency=timedelta(hours=1)).loc[start:end][:-1]
    for forecast, expected_value in zip(forecasts, expected_values):
        assert abs(forecast - expected_value) < TOLERANCE
コード例 #3
0
def test_rolling_forecast():
    """Using the simple linear model, create a rolling forecast"""
    model, specs = test_utils.create_dummy_model_state(
        DATA_START, data_range_in_hours=24
    ).split()
    h0 = 3  # first 3 hours can't be predicted,lacking the lagged outcome variable
    hn = 26  # only 2 additional forecast can be made, because the lowest lag is 2 hours
    start = DATA_START + timedelta(hours=h0)
    end = DATA_START + timedelta(hours=hn)
    forecasts = forecasting.make_rolling_forecasts(start, end, specs)[0]
    expected_values = range(h0, hn)
    for forecast, expected_value in zip(forecasts, expected_values):
        assert abs(forecast - expected_value) < TOLERANCE
コード例 #4
0
def make_rolling_viewpoint_forecasts(
    sensor_id: int,
    horizon: timedelta,
    start: datetime,
    end: datetime,
    custom_model_params: dict = None,
) -> int:
    """Build forecasting model specs, make rolling-viewpoint forecasts, and save the forecasts made.

    Each individual forecast is a belief about a time interval.
    Rolling-viewpoint forecasts share the same belief horizon (the duration between belief time and knowledge time).
    Model specs are also retrained in a rolling fashion, but with its own frequency set in custom_model_params.
    See the timely-beliefs lib for relevant terminology.

    Parameters
    ----------
    :param sensor_id: int
        To identify which sensor to forecast
    :param horizon: timedelta
        duration between the end of each interval and the time at which the belief about that interval is formed
    :param start: datetime
        start of forecast period, i.e. start time of the first interval to be forecast
    :param end: datetime
        end of forecast period, i.e end time of the last interval to be forecast
    :param custom_model_params: dict
        pass in params which will be passed to the model specs configurator,
        e.g. outcome_var_transformation, only advisable to be used for testing.
    :returns: int
        the number of forecasts made
    """
    # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork
    db.engine.dispose()

    rq_job = get_current_job()

    # find out which model to run, fall back to latest recommended
    model_search_term = rq_job.meta.get("model_search_term", "linear-OLS")

    # find sensor
    sensor = Sensor.query.filter_by(id=sensor_id).one_or_none()

    click.echo(
        "Running Forecasting Job %s: %s for %s on model '%s', from %s to %s" %
        (rq_job.id, sensor, horizon, model_search_term, start, end))

    if hasattr(sensor, "market_type"):
        ex_post_horizon = None  # Todo: until we sorted out the ex_post_horizon, use all available price data
    else:
        ex_post_horizon = timedelta(hours=0)

    # Make model specs
    model_configurator = lookup_model_specs_configurator(model_search_term)
    model_specs, model_identifier, fallback_model_search_term = model_configurator(
        sensor=sensor,
        forecast_start=as_server_time(start),
        forecast_end=as_server_time(end),
        forecast_horizon=horizon,
        ex_post_horizon=ex_post_horizon,
        custom_model_params=custom_model_params,
    )
    model_specs.creation_time = server_now()

    rq_job.meta["model_identifier"] = model_identifier
    rq_job.meta["fallback_model_search_term"] = fallback_model_search_term
    rq_job.save()

    # before we run the model, check if horizon is okay and enough data is available
    if horizon not in supported_horizons():
        raise InvalidHorizonException("Invalid horizon on job %s: %s" %
                                      (rq_job.id, horizon))

    query_window = get_query_window(
        model_specs.start_of_training,
        end,
        [lag * model_specs.frequency for lag in model_specs.lags],
    )
    check_data_availability(
        sensor,
        TimedBelief,
        start,
        end,
        query_window,
        horizon,
    )

    data_source = get_data_source(
        data_source_name="Seita (%s)" %
        rq_job.meta.get("model_identifier", "unknown model"),
        data_source_type="forecasting script",
    )

    forecasts, model_state = make_rolling_forecasts(
        start=as_server_time(start),
        end=as_server_time(end),
        model_specs=model_specs,
    )
    click.echo("Job %s made %d forecasts." % (rq_job.id, len(forecasts)))

    ts_value_forecasts = [
        TimedBelief(
            event_start=dt,
            belief_horizon=horizon,
            event_value=value,
            sensor=sensor,
            source=data_source,
        ) for dt, value in forecasts.items()
    ]
    bdf = tb.BeliefsDataFrame(ts_value_forecasts)
    save_to_db(bdf)
    db.session.commit()

    return len(forecasts)
コード例 #5
0
def populate_time_series_forecasts(  # noqa: C901
    db: SQLAlchemy,
    sensor_ids: List[int],
    horizons: List[timedelta],
    forecast_start: datetime,
    forecast_end: datetime,
    event_resolution: Optional[timedelta] = None,
):
    training_and_testing_period = timedelta(days=30)

    click.echo(
        "Populating the database %s with time series forecasts of %s ahead ..."
        % (db.engine,
           infl_eng.join([naturaldelta(horizon) for horizon in horizons])))

    # Set a data source for the forecasts
    data_source = DataSource.query.filter_by(name="Seita",
                                             type="demo script").one_or_none()

    # List all sensors for which to forecast.
    sensors = [Sensor.query.filter(Sensor.id.in_(sensor_ids)).one_or_none()]
    if not sensors:
        click.echo("No such sensors in db, so I will not add any forecasts.")
        return

    # Make a model for each sensor and horizon, make rolling forecasts and save to database.
    # We cannot use (faster) bulk save, as forecasts might become regressors in other forecasts.
    for sensor in sensors:
        for horizon in horizons:
            try:
                default_model = lookup_model_specs_configurator()
                model_specs, model_identifier, model_fallback = default_model(
                    sensor=sensor,
                    forecast_start=forecast_start,
                    forecast_end=forecast_end,
                    forecast_horizon=horizon,
                    custom_model_params=dict(
                        training_and_testing_period=training_and_testing_period,
                        event_resolution=event_resolution,
                    ),
                )
                click.echo(
                    "Computing forecasts of %s ahead for sensor %s, "
                    "from %s to %s with a training and testing period of %s, using %s ..."
                    % (
                        naturaldelta(horizon),
                        sensor.id,
                        forecast_start,
                        forecast_end,
                        naturaldelta(training_and_testing_period),
                        model_identifier,
                    ))
                model_specs.creation_time = forecast_start
                forecasts, model_state = make_rolling_forecasts(
                    start=forecast_start,
                    end=forecast_end,
                    model_specs=model_specs)
                # Upsample to sensor resolution if needed
                if forecasts.index.freq > pd.Timedelta(
                        sensor.event_resolution):
                    forecasts = model_specs.outcome_var.resample_data(
                        forecasts,
                        time_window=(forecasts.index.min(),
                                     forecasts.index.max()),
                        expected_frequency=sensor.event_resolution,
                    )
            except (NotEnoughDataException, MissingData, NaNData) as e:
                click.echo("Skipping forecasts for sensor %s: %s" %
                           (sensor, str(e)))
                continue

            beliefs = [
                TimedBelief(
                    event_start=ensure_local_timezone(dt,
                                                      tz_name=LOCAL_TIME_ZONE),
                    belief_horizon=horizon,
                    event_value=value,
                    sensor=sensor,
                    source=data_source,
                ) for dt, value in forecasts.items()
            ]

            print("Saving %s %s-forecasts for %s..." %
                  (len(beliefs), naturaldelta(horizon), sensor.id))
            for belief in beliefs:
                db.session.add(belief)

    click.echo("DB now has %d forecasts" %
               db.session.query(TimedBelief).filter(
                   TimedBelief.belief_horizon > timedelta(hours=0)).count())
コード例 #6
0
def populate_time_series_forecasts(  # noqa: C901
    db: SQLAlchemy,
    generic_asset_type: str = None,
    generic_asset_name: str = None,
    from_date: str = "2015-02-08",
    to_date: str = "2015-12-31",
):
    start = ensure_local_timezone(datetime.strptime(from_date, "%Y-%m-%d"),
                                  tz_name=LOCAL_TIME_ZONE)
    end = ensure_local_timezone(
        datetime.strptime(to_date, "%Y-%m-%d") + timedelta(days=1),
        tz_name=LOCAL_TIME_ZONE,
    )
    training_and_testing_period = timedelta(days=30)
    horizons = (
        timedelta(hours=1),
        timedelta(hours=6),
        timedelta(hours=24),
        timedelta(hours=48),
    )

    click.echo(
        "Populating the database %s with time series forecasts of %s ahead ..."
        % (db.engine,
           infl_eng.join([naturaldelta(horizon) for horizon in horizons])))

    # Set a data source for the forecasts
    data_source = DataSource.query.filter_by(name="Seita",
                                             type="demo script").one_or_none()

    # List all generic assets for which to forecast.
    # Look into asset type if no asset name is given. If an asset name is given,
    generic_assets = []
    if generic_asset_name is None:
        if generic_asset_type is None or generic_asset_type == "WeatherSensor":
            sensors = WeatherSensor.query.all()
            generic_assets.extend(sensors)
        if generic_asset_type is None or generic_asset_type == "Asset":
            assets = Asset.query.all()
            generic_assets.extend(assets)
        if generic_asset_type is None or generic_asset_type == "Market":
            markets = Market.query.all()
            generic_assets.extend(markets)
    else:
        if generic_asset_type is None:
            click.echo(
                "If you specify --asset-name, please also specify --asset-type, so we can look it up."
            )
            return
        if generic_asset_type == "WeatherSensor":
            sensors = WeatherSensor.query.filter(
                WeatherSensor.name == generic_asset_name).one_or_none()
            if sensors is not None:
                generic_assets.append(sensors)
        if generic_asset_type == "Asset":
            assets = Asset.query.filter(
                Asset.name == generic_asset_name).one_or_none()
            if assets is not None:
                generic_assets.append(assets)
        if generic_asset_type == "Market":
            markets = Market.query.filter(
                Market.name == generic_asset_name).one_or_none()
            if markets is not None:
                generic_assets.append(markets)
    if not generic_assets:
        click.echo("No such assets in db, so I will not add any forecasts.")
        return

    # Make a model for each asset and horizon, make rolling forecasts and save to database.
    # We cannot use (faster) bulk save, as forecasts might become regressors in other forecasts.
    for generic_asset in generic_assets:
        for horizon in horizons:
            try:
                default_model = lookup_model_specs_configurator()
                model_specs, model_identifier, model_fallback = default_model(
                    generic_asset=generic_asset,
                    forecast_start=start,
                    forecast_end=end,
                    forecast_horizon=horizon,
                    custom_model_params=dict(
                        training_and_testing_period=training_and_testing_period
                    ),
                )
                click.echo(
                    "Computing forecasts of %s ahead for %s, "
                    "from %s to %s with a training and testing period of %s, using %s ..."
                    % (
                        naturaldelta(horizon),
                        generic_asset.name,
                        start,
                        end,
                        naturaldelta(training_and_testing_period),
                        model_identifier,
                    ))
                model_specs.creation_time = start
                forecasts, model_state = make_rolling_forecasts(
                    start=start, end=end, model_specs=model_specs)
            except (NotEnoughDataException, MissingData, NaNData) as e:
                click.echo("Skipping forecasts for asset %s: %s" %
                           (generic_asset, str(e)))
                continue
            """
            import matplotlib.pyplot as plt
            plt.plot(
                model_state.specs.outcome_var.load_series().loc[
                    pd.date_range(start, end=end, freq="15T")
                ],
                label="y",
            )
            plt.plot(forecasts, label="y^hat")
            plt.legend()
            plt.show()
            """

            beliefs = []
            if isinstance(generic_asset, Asset):
                beliefs = [
                    Power(
                        datetime=ensure_local_timezone(
                            dt, tz_name=LOCAL_TIME_ZONE),
                        horizon=horizon,
                        value=value,
                        asset_id=generic_asset.id,
                        data_source_id=data_source.id,
                    ) for dt, value in forecasts.items()
                ]
            elif isinstance(generic_asset, Market):
                beliefs = [
                    Price(
                        datetime=ensure_local_timezone(
                            dt, tz_name=LOCAL_TIME_ZONE),
                        horizon=horizon,
                        value=value,
                        market_id=generic_asset.id,
                        data_source_id=data_source.id,
                    ) for dt, value in forecasts.items()
                ]
            elif isinstance(generic_asset, WeatherSensor):
                beliefs = [
                    Weather(
                        datetime=ensure_local_timezone(
                            dt, tz_name=LOCAL_TIME_ZONE),
                        horizon=horizon,
                        value=value,
                        sensor_id=generic_asset.id,
                        data_source_id=data_source.id,
                    ) for dt, value in forecasts.items()
                ]

            print("Saving %s %s-forecasts for %s..." %
                  (len(beliefs), naturaldelta(horizon), generic_asset.name))
            for belief in beliefs:
                db.session.add(belief)

    click.echo("DB now has %d Power Forecasts" %
               db.session.query(Power).filter(
                   Power.horizon > timedelta(hours=0)).count())
    click.echo("DB now has %d Price Forecasts" %
               db.session.query(Price).filter(
                   Price.horizon > timedelta(hours=0)).count())
    click.echo("DB now has %d Weather Forecasts" %
               db.session.query(Weather).filter(
                   Weather.horizon > timedelta(hours=0)).count())
コード例 #7
0
def make_forecasts(
    asset_id: int,
    timed_value_type: str,
    horizon: timedelta,
    start: datetime,
    end: datetime,
    custom_model_params: dict = None,
) -> int:
    """
    Build forecasting model specs, make rolling forecasts, save the forecasts made.
    Each individual forecast is a belief about an interval.
    Returns the number of forecasts made.

    Parameters
    ----------
    :param asset_id: int
        To identify which asset to forecast
    :param timed_value_type: str
        This should go away after a refactoring - we now use it to create the DB entry for the forecasts
    :param horizon: timedelta
        duration between the end of each interval and the time at which the belief about that interval is formed
    :param start: datetime
        start of forecast period, i.e. start time of the first interval to be forecast
    :param end: datetime
        end of forecast period, i.e end time of the last interval to be forecast
    :param custom_model_params: dict
        pass in params which will be passed to the model specs configurator,
        e.g. outcome_var_transformation, only advisable to be used for testing.
    """
    # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork
    db.engine.dispose()

    rq_job = get_current_job()

    # find out which model to run, fall back to latest recommended
    model_search_term = rq_job.meta.get("model_search_term", "linear-OLS")

    # find asset
    asset = get_asset(asset_id, timed_value_type)

    click.echo(
        "Running Forecasting Job %s: %s for %s on model '%s', from %s to %s"
        % (rq_job.id, asset, horizon, model_search_term, start, end)
    )

    if hasattr(asset, "market_type"):
        ex_post_horizon = None  # Todo: until we sorted out the ex_post_horizon, use all available price data
    else:
        ex_post_horizon = timedelta(hours=0)

    # Make model specs
    model_configurator = lookup_model_specs_configurator(model_search_term)
    model_specs, model_identifier, fallback_model_search_term = model_configurator(
        generic_asset=asset,
        forecast_start=as_server_time(start),
        forecast_end=as_server_time(end),
        forecast_horizon=horizon,
        ex_post_horizon=ex_post_horizon,
        custom_model_params=custom_model_params,
    )
    model_specs.creation_time = server_now()

    rq_job.meta["model_identifier"] = model_identifier
    rq_job.meta["fallback_model_search_term"] = fallback_model_search_term
    rq_job.save()

    # before we run the model, check if horizon is okay and enough data is available
    if horizon not in supported_horizons():
        raise InvalidHorizonException(
            "Invalid horizon on job %s: %s" % (rq_job.id, horizon)
        )

    query_window = get_query_window(
        model_specs.start_of_training,
        end,
        [lag * model_specs.frequency for lag in model_specs.lags],
    )
    check_data_availability(
        asset,
        determine_asset_value_class_by_asset(asset),
        start,
        end,
        query_window,
        horizon,
    )

    data_source = get_data_source(
        data_source_name="Seita (%s)"
        % rq_job.meta.get("model_identifier", "unknown model"),
        data_source_type="forecasting script",
    )

    forecasts, model_state = make_rolling_forecasts(
        start=as_server_time(start),
        end=as_server_time(end),
        model_specs=model_specs,
    )
    click.echo("Job %s made %d forecasts." % (rq_job.id, len(forecasts)))

    ts_value_forecasts = [
        make_timed_value(timed_value_type, asset_id, dt, value, horizon, data_source.id)
        for dt, value in forecasts.items()
    ]

    try:
        save_to_session(ts_value_forecasts)
    except IntegrityError as e:

        current_app.logger.warning(e)
        click.echo("Rolling back due to IntegrityError")
        db.session.rollback()

        if current_app.config.get("FLEXMEASURES_MODE", "") == "play":
            click.echo("Saving again, with overwrite=True")
            save_to_session(ts_value_forecasts, overwrite=True)

    db.session.commit()

    return len(forecasts)