def test_rolling_forecast_with_refitting(caplog): """Also rolling forecasting, but with re-fitting the model in between. We'll test if the expected number of re-fittings happened. Also, the model we end up with should not be the one we started with.""" caplog.set_level(logging.DEBUG, logger="timetomodel.forecasting") model, specs = test_utils.create_dummy_model_state( DATA_START, data_range_in_hours=192 ).split() start = DATA_START + timedelta(hours=70) end = DATA_START + timedelta(hours=190) forecasts, final_model_state = forecasting.make_rolling_forecasts(start, end, specs) expected_values = specs.outcome_var.load_series( time_window=(start, end), expected_frequency=timedelta(hours=1), check_time_window=True, ).loc[start:end][:-1] for forecast, expected_value in zip(forecasts, expected_values): assert abs(forecast - expected_value) < TOLERANCE refitting_logs = [ log for log in caplog.records if "Fitting new model" in log.message ] remodel_frequency_in_hours = int(specs.remodel_frequency.total_seconds() / 3600) expected_log_times = [remodel_frequency_in_hours] while max(expected_log_times) < 190: expected_log_times.append(max(expected_log_times) + remodel_frequency_in_hours) assert len(refitting_logs) == len([elt for elt in expected_log_times if elt >= 70]) assert model is not final_model_state.model
def test_rolling_forecast(): """Using the simple linear model, create a rolling forecast""" model, specs = test_utils.create_dummy_model_state( DATA_START, data_range_in_hours=24).split() start = DATA_START + timedelta(hours=18) end = DATA_START + timedelta(hours=20) forecasts = forecasting.make_rolling_forecasts(start, end, specs)[0] expected_values = specs.outcome_var.load_series( expected_frequency=timedelta(hours=1)).loc[start:end][:-1] for forecast, expected_value in zip(forecasts, expected_values): assert abs(forecast - expected_value) < TOLERANCE
def test_rolling_forecast(): """Using the simple linear model, create a rolling forecast""" model, specs = test_utils.create_dummy_model_state( DATA_START, data_range_in_hours=24 ).split() h0 = 3 # first 3 hours can't be predicted,lacking the lagged outcome variable hn = 26 # only 2 additional forecast can be made, because the lowest lag is 2 hours start = DATA_START + timedelta(hours=h0) end = DATA_START + timedelta(hours=hn) forecasts = forecasting.make_rolling_forecasts(start, end, specs)[0] expected_values = range(h0, hn) for forecast, expected_value in zip(forecasts, expected_values): assert abs(forecast - expected_value) < TOLERANCE
def make_rolling_viewpoint_forecasts( sensor_id: int, horizon: timedelta, start: datetime, end: datetime, custom_model_params: dict = None, ) -> int: """Build forecasting model specs, make rolling-viewpoint forecasts, and save the forecasts made. Each individual forecast is a belief about a time interval. Rolling-viewpoint forecasts share the same belief horizon (the duration between belief time and knowledge time). Model specs are also retrained in a rolling fashion, but with its own frequency set in custom_model_params. See the timely-beliefs lib for relevant terminology. Parameters ---------- :param sensor_id: int To identify which sensor to forecast :param horizon: timedelta duration between the end of each interval and the time at which the belief about that interval is formed :param start: datetime start of forecast period, i.e. start time of the first interval to be forecast :param end: datetime end of forecast period, i.e end time of the last interval to be forecast :param custom_model_params: dict pass in params which will be passed to the model specs configurator, e.g. outcome_var_transformation, only advisable to be used for testing. :returns: int the number of forecasts made """ # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork db.engine.dispose() rq_job = get_current_job() # find out which model to run, fall back to latest recommended model_search_term = rq_job.meta.get("model_search_term", "linear-OLS") # find sensor sensor = Sensor.query.filter_by(id=sensor_id).one_or_none() click.echo( "Running Forecasting Job %s: %s for %s on model '%s', from %s to %s" % (rq_job.id, sensor, horizon, model_search_term, start, end)) if hasattr(sensor, "market_type"): ex_post_horizon = None # Todo: until we sorted out the ex_post_horizon, use all available price data else: ex_post_horizon = timedelta(hours=0) # Make model specs model_configurator = lookup_model_specs_configurator(model_search_term) model_specs, model_identifier, fallback_model_search_term = model_configurator( sensor=sensor, forecast_start=as_server_time(start), forecast_end=as_server_time(end), forecast_horizon=horizon, ex_post_horizon=ex_post_horizon, custom_model_params=custom_model_params, ) model_specs.creation_time = server_now() rq_job.meta["model_identifier"] = model_identifier rq_job.meta["fallback_model_search_term"] = fallback_model_search_term rq_job.save() # before we run the model, check if horizon is okay and enough data is available if horizon not in supported_horizons(): raise InvalidHorizonException("Invalid horizon on job %s: %s" % (rq_job.id, horizon)) query_window = get_query_window( model_specs.start_of_training, end, [lag * model_specs.frequency for lag in model_specs.lags], ) check_data_availability( sensor, TimedBelief, start, end, query_window, horizon, ) data_source = get_data_source( data_source_name="Seita (%s)" % rq_job.meta.get("model_identifier", "unknown model"), data_source_type="forecasting script", ) forecasts, model_state = make_rolling_forecasts( start=as_server_time(start), end=as_server_time(end), model_specs=model_specs, ) click.echo("Job %s made %d forecasts." % (rq_job.id, len(forecasts))) ts_value_forecasts = [ TimedBelief( event_start=dt, belief_horizon=horizon, event_value=value, sensor=sensor, source=data_source, ) for dt, value in forecasts.items() ] bdf = tb.BeliefsDataFrame(ts_value_forecasts) save_to_db(bdf) db.session.commit() return len(forecasts)
def populate_time_series_forecasts( # noqa: C901 db: SQLAlchemy, sensor_ids: List[int], horizons: List[timedelta], forecast_start: datetime, forecast_end: datetime, event_resolution: Optional[timedelta] = None, ): training_and_testing_period = timedelta(days=30) click.echo( "Populating the database %s with time series forecasts of %s ahead ..." % (db.engine, infl_eng.join([naturaldelta(horizon) for horizon in horizons]))) # Set a data source for the forecasts data_source = DataSource.query.filter_by(name="Seita", type="demo script").one_or_none() # List all sensors for which to forecast. sensors = [Sensor.query.filter(Sensor.id.in_(sensor_ids)).one_or_none()] if not sensors: click.echo("No such sensors in db, so I will not add any forecasts.") return # Make a model for each sensor and horizon, make rolling forecasts and save to database. # We cannot use (faster) bulk save, as forecasts might become regressors in other forecasts. for sensor in sensors: for horizon in horizons: try: default_model = lookup_model_specs_configurator() model_specs, model_identifier, model_fallback = default_model( sensor=sensor, forecast_start=forecast_start, forecast_end=forecast_end, forecast_horizon=horizon, custom_model_params=dict( training_and_testing_period=training_and_testing_period, event_resolution=event_resolution, ), ) click.echo( "Computing forecasts of %s ahead for sensor %s, " "from %s to %s with a training and testing period of %s, using %s ..." % ( naturaldelta(horizon), sensor.id, forecast_start, forecast_end, naturaldelta(training_and_testing_period), model_identifier, )) model_specs.creation_time = forecast_start forecasts, model_state = make_rolling_forecasts( start=forecast_start, end=forecast_end, model_specs=model_specs) # Upsample to sensor resolution if needed if forecasts.index.freq > pd.Timedelta( sensor.event_resolution): forecasts = model_specs.outcome_var.resample_data( forecasts, time_window=(forecasts.index.min(), forecasts.index.max()), expected_frequency=sensor.event_resolution, ) except (NotEnoughDataException, MissingData, NaNData) as e: click.echo("Skipping forecasts for sensor %s: %s" % (sensor, str(e))) continue beliefs = [ TimedBelief( event_start=ensure_local_timezone(dt, tz_name=LOCAL_TIME_ZONE), belief_horizon=horizon, event_value=value, sensor=sensor, source=data_source, ) for dt, value in forecasts.items() ] print("Saving %s %s-forecasts for %s..." % (len(beliefs), naturaldelta(horizon), sensor.id)) for belief in beliefs: db.session.add(belief) click.echo("DB now has %d forecasts" % db.session.query(TimedBelief).filter( TimedBelief.belief_horizon > timedelta(hours=0)).count())
def populate_time_series_forecasts( # noqa: C901 db: SQLAlchemy, generic_asset_type: str = None, generic_asset_name: str = None, from_date: str = "2015-02-08", to_date: str = "2015-12-31", ): start = ensure_local_timezone(datetime.strptime(from_date, "%Y-%m-%d"), tz_name=LOCAL_TIME_ZONE) end = ensure_local_timezone( datetime.strptime(to_date, "%Y-%m-%d") + timedelta(days=1), tz_name=LOCAL_TIME_ZONE, ) training_and_testing_period = timedelta(days=30) horizons = ( timedelta(hours=1), timedelta(hours=6), timedelta(hours=24), timedelta(hours=48), ) click.echo( "Populating the database %s with time series forecasts of %s ahead ..." % (db.engine, infl_eng.join([naturaldelta(horizon) for horizon in horizons]))) # Set a data source for the forecasts data_source = DataSource.query.filter_by(name="Seita", type="demo script").one_or_none() # List all generic assets for which to forecast. # Look into asset type if no asset name is given. If an asset name is given, generic_assets = [] if generic_asset_name is None: if generic_asset_type is None or generic_asset_type == "WeatherSensor": sensors = WeatherSensor.query.all() generic_assets.extend(sensors) if generic_asset_type is None or generic_asset_type == "Asset": assets = Asset.query.all() generic_assets.extend(assets) if generic_asset_type is None or generic_asset_type == "Market": markets = Market.query.all() generic_assets.extend(markets) else: if generic_asset_type is None: click.echo( "If you specify --asset-name, please also specify --asset-type, so we can look it up." ) return if generic_asset_type == "WeatherSensor": sensors = WeatherSensor.query.filter( WeatherSensor.name == generic_asset_name).one_or_none() if sensors is not None: generic_assets.append(sensors) if generic_asset_type == "Asset": assets = Asset.query.filter( Asset.name == generic_asset_name).one_or_none() if assets is not None: generic_assets.append(assets) if generic_asset_type == "Market": markets = Market.query.filter( Market.name == generic_asset_name).one_or_none() if markets is not None: generic_assets.append(markets) if not generic_assets: click.echo("No such assets in db, so I will not add any forecasts.") return # Make a model for each asset and horizon, make rolling forecasts and save to database. # We cannot use (faster) bulk save, as forecasts might become regressors in other forecasts. for generic_asset in generic_assets: for horizon in horizons: try: default_model = lookup_model_specs_configurator() model_specs, model_identifier, model_fallback = default_model( generic_asset=generic_asset, forecast_start=start, forecast_end=end, forecast_horizon=horizon, custom_model_params=dict( training_and_testing_period=training_and_testing_period ), ) click.echo( "Computing forecasts of %s ahead for %s, " "from %s to %s with a training and testing period of %s, using %s ..." % ( naturaldelta(horizon), generic_asset.name, start, end, naturaldelta(training_and_testing_period), model_identifier, )) model_specs.creation_time = start forecasts, model_state = make_rolling_forecasts( start=start, end=end, model_specs=model_specs) except (NotEnoughDataException, MissingData, NaNData) as e: click.echo("Skipping forecasts for asset %s: %s" % (generic_asset, str(e))) continue """ import matplotlib.pyplot as plt plt.plot( model_state.specs.outcome_var.load_series().loc[ pd.date_range(start, end=end, freq="15T") ], label="y", ) plt.plot(forecasts, label="y^hat") plt.legend() plt.show() """ beliefs = [] if isinstance(generic_asset, Asset): beliefs = [ Power( datetime=ensure_local_timezone( dt, tz_name=LOCAL_TIME_ZONE), horizon=horizon, value=value, asset_id=generic_asset.id, data_source_id=data_source.id, ) for dt, value in forecasts.items() ] elif isinstance(generic_asset, Market): beliefs = [ Price( datetime=ensure_local_timezone( dt, tz_name=LOCAL_TIME_ZONE), horizon=horizon, value=value, market_id=generic_asset.id, data_source_id=data_source.id, ) for dt, value in forecasts.items() ] elif isinstance(generic_asset, WeatherSensor): beliefs = [ Weather( datetime=ensure_local_timezone( dt, tz_name=LOCAL_TIME_ZONE), horizon=horizon, value=value, sensor_id=generic_asset.id, data_source_id=data_source.id, ) for dt, value in forecasts.items() ] print("Saving %s %s-forecasts for %s..." % (len(beliefs), naturaldelta(horizon), generic_asset.name)) for belief in beliefs: db.session.add(belief) click.echo("DB now has %d Power Forecasts" % db.session.query(Power).filter( Power.horizon > timedelta(hours=0)).count()) click.echo("DB now has %d Price Forecasts" % db.session.query(Price).filter( Price.horizon > timedelta(hours=0)).count()) click.echo("DB now has %d Weather Forecasts" % db.session.query(Weather).filter( Weather.horizon > timedelta(hours=0)).count())
def make_forecasts( asset_id: int, timed_value_type: str, horizon: timedelta, start: datetime, end: datetime, custom_model_params: dict = None, ) -> int: """ Build forecasting model specs, make rolling forecasts, save the forecasts made. Each individual forecast is a belief about an interval. Returns the number of forecasts made. Parameters ---------- :param asset_id: int To identify which asset to forecast :param timed_value_type: str This should go away after a refactoring - we now use it to create the DB entry for the forecasts :param horizon: timedelta duration between the end of each interval and the time at which the belief about that interval is formed :param start: datetime start of forecast period, i.e. start time of the first interval to be forecast :param end: datetime end of forecast period, i.e end time of the last interval to be forecast :param custom_model_params: dict pass in params which will be passed to the model specs configurator, e.g. outcome_var_transformation, only advisable to be used for testing. """ # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork db.engine.dispose() rq_job = get_current_job() # find out which model to run, fall back to latest recommended model_search_term = rq_job.meta.get("model_search_term", "linear-OLS") # find asset asset = get_asset(asset_id, timed_value_type) click.echo( "Running Forecasting Job %s: %s for %s on model '%s', from %s to %s" % (rq_job.id, asset, horizon, model_search_term, start, end) ) if hasattr(asset, "market_type"): ex_post_horizon = None # Todo: until we sorted out the ex_post_horizon, use all available price data else: ex_post_horizon = timedelta(hours=0) # Make model specs model_configurator = lookup_model_specs_configurator(model_search_term) model_specs, model_identifier, fallback_model_search_term = model_configurator( generic_asset=asset, forecast_start=as_server_time(start), forecast_end=as_server_time(end), forecast_horizon=horizon, ex_post_horizon=ex_post_horizon, custom_model_params=custom_model_params, ) model_specs.creation_time = server_now() rq_job.meta["model_identifier"] = model_identifier rq_job.meta["fallback_model_search_term"] = fallback_model_search_term rq_job.save() # before we run the model, check if horizon is okay and enough data is available if horizon not in supported_horizons(): raise InvalidHorizonException( "Invalid horizon on job %s: %s" % (rq_job.id, horizon) ) query_window = get_query_window( model_specs.start_of_training, end, [lag * model_specs.frequency for lag in model_specs.lags], ) check_data_availability( asset, determine_asset_value_class_by_asset(asset), start, end, query_window, horizon, ) data_source = get_data_source( data_source_name="Seita (%s)" % rq_job.meta.get("model_identifier", "unknown model"), data_source_type="forecasting script", ) forecasts, model_state = make_rolling_forecasts( start=as_server_time(start), end=as_server_time(end), model_specs=model_specs, ) click.echo("Job %s made %d forecasts." % (rq_job.id, len(forecasts))) ts_value_forecasts = [ make_timed_value(timed_value_type, asset_id, dt, value, horizon, data_source.id) for dt, value in forecasts.items() ] try: save_to_session(ts_value_forecasts) except IntegrityError as e: current_app.logger.warning(e) click.echo("Rolling back due to IntegrityError") db.session.rollback() if current_app.config.get("FLEXMEASURES_MODE", "") == "play": click.echo("Saving again, with overwrite=True") save_to_session(ts_value_forecasts, overwrite=True) db.session.commit() return len(forecasts)