def test_drop_unchanged_beliefs(setup_beliefs):
    """Trying to save beliefs that are already in the database shouldn't raise an error.

    Even after updating the belief time, we expect to persist only the older belief time.
    """

    # Set a reference for the number of beliefs stored and their belief times
    sensor = Sensor.query.filter_by(name="epex_da").one_or_none()
    bdf = sensor.search_beliefs(most_recent_beliefs_only=False)
    num_beliefs_before = len(bdf)
    belief_times_before = bdf.belief_times

    # See what happens when storing all existing beliefs verbatim
    save_to_db(bdf)

    # Verify that no new beliefs were saved
    bdf = sensor.search_beliefs(most_recent_beliefs_only=False)
    assert len(bdf) == num_beliefs_before

    # See what happens when storing all beliefs with their belief time updated
    bdf = tb_utils.replace_multi_index_level(
        bdf, "belief_time", bdf.belief_times + pd.Timedelta("1H"))
    save_to_db(bdf)

    # Verify that no new beliefs were saved
    bdf = sensor.search_beliefs(most_recent_beliefs_only=False)
    assert len(bdf) == num_beliefs_before
    assert list(bdf.belief_times) == list(belief_times_before)
def test_do_not_drop_changed_probabilistic_belief(setup_beliefs):
    """Trying to save a changed probabilistic belief should result in saving the whole belief.

    For example, given a belief that defines both cp=0.2 and cp=0.5,
    if that belief becomes more certain (e.g. cp=0.3 and cp=0.5),
    we expect to see the full new belief stored, rather than just the cp=0.3 value.
    """

    # Set a reference for the number of beliefs stored
    sensor = Sensor.query.filter_by(name="epex_da").one_or_none()
    bdf = sensor.search_beliefs(source="ENTSO-E",
                                most_recent_beliefs_only=False)
    num_beliefs_before = len(bdf)

    # See what happens when storing a belief with more certainty one hour later
    old_belief = bdf.loc[(bdf.index.get_level_values("event_start") ==
                          pd.Timestamp("2021-03-28 16:00:00+00:00"))
                         & (bdf.index.get_level_values("belief_time") ==
                            pd.Timestamp("2021-03-27 9:00:00+00:00"))]
    new_belief = tb_utils.replace_multi_index_level(old_belief,
                                                    "cumulative_probability",
                                                    pd.Index([0.3, 0.5]))
    new_belief = tb_utils.replace_multi_index_level(
        new_belief, "belief_time",
        new_belief.belief_times + pd.Timedelta("1H"))
    save_to_db(new_belief)

    # Verify that the whole probabilistic belief was added
    bdf = sensor.search_beliefs(source="ENTSO-E",
                                most_recent_beliefs_only=False)
    num_beliefs_after = len(bdf)
    assert num_beliefs_after == num_beliefs_before + len(new_belief)
Example #3
0
def resample_sensor_data(
    sensor_ids: List[int],
    event_resolution_in_minutes: int,
    start_str: Optional[str] = None,
    end_str: Optional[str] = None,
    skip_integrity_check: bool = False,
):
    """Assign a new event resolution to an existing sensor and resample its data accordingly."""
    event_resolution = timedelta(minutes=event_resolution_in_minutes)
    event_starts_after = pd.Timestamp(
        start_str)  # note that "" or None becomes NaT
    event_ends_before = pd.Timestamp(end_str)
    for sensor_id in sensor_ids:
        sensor = Sensor.query.get(sensor_id)
        if sensor.event_resolution == event_resolution:
            print(f"{sensor} already has the desired event resolution.")
            continue
        df_original = sensor.search_beliefs(
            most_recent_beliefs_only=False,
            event_starts_after=event_starts_after,
            event_ends_before=event_ends_before,
        ).sort_values("event_start")
        df_resampled = df_original.resample_events(
            event_resolution).sort_values("event_start")
        if not skip_integrity_check:
            message = ""
            if sensor.event_resolution < event_resolution:
                message += f"Downsampling {sensor} to {event_resolution} will result in a loss of data. "
            click.confirm(
                message +
                f"Data before:\n{df_original}\nData after:\n{df_resampled}\nMean before: {df_original['event_value'].mean()}\nMean after: {df_resampled['event_value'].mean()}\nContinue?",
                abort=True,
            )

        # Update sensor
        sensor.event_resolution = event_resolution
        db.session.add(sensor)

        # Update sensor data
        query = TimedBelief.query.filter(TimedBelief.sensor == sensor)
        if not pd.isnull(event_starts_after):
            query = query.filter(TimedBelief.event_start >= event_starts_after)
        if not pd.isnull(event_ends_before):
            query = query.filter(TimedBelief.event_start +
                                 sensor.event_resolution <= event_ends_before)
        query.delete()
        save_to_db(df_resampled, bulk_save_objects=True)
    db.session.commit()
    print("Successfully resampled sensor data.")
def test_do_not_drop_beliefs_copied_by_another_source(setup_beliefs):
    """Trying to copy beliefs from one source to another should double the number of beliefs."""

    # Set a reference for the number of beliefs stored
    sensor = Sensor.query.filter_by(name="epex_da").one_or_none()
    bdf = sensor.search_beliefs(most_recent_beliefs_only=False)
    num_beliefs_before = len(bdf)

    # See what happens when storing all belief with their source updated
    new_source = DataSource(name="Not Seita", type="demo script")
    bdf = tb_utils.replace_multi_index_level(
        bdf, "source", pd.Index([new_source] * num_beliefs_before))
    save_to_db(bdf)

    # Verify that all the new beliefs were added
    bdf = sensor.search_beliefs(most_recent_beliefs_only=False)
    num_beliefs_after = len(bdf)
    assert num_beliefs_after == 2 * num_beliefs_before
Example #5
0
def make_schedule(
    sensor_id: int,
    start: datetime,
    end: datetime,
    belief_time: datetime,
    resolution: timedelta,
    soc_at_start: Optional[float] = None,
    soc_targets: Optional[pd.Series] = None,
    soc_min: Optional[float] = None,
    soc_max: Optional[float] = None,
    roundtrip_efficiency: Optional[float] = None,
    price_sensor: Optional[Sensor] = None,
) -> bool:
    """Preferably, a starting soc is given.
    Otherwise, we try to retrieve the current state of charge from the asset (if that is the valid one at the start).
    Otherwise, we set the starting soc to 0 (some assets don't use the concept of a state of charge,
    and without soc targets and limits the starting soc doesn't matter).
    """
    # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork
    db.engine.dispose()

    rq_job = get_current_job()

    # find sensor
    sensor = Sensor.query.filter_by(id=sensor_id).one_or_none()

    if rq_job:
        click.echo("Running Scheduling Job %s: %s, from %s to %s" %
                   (rq_job.id, sensor, start, end))

    if soc_at_start is None:
        if (start == sensor.get_attribute("soc_datetime")
                and sensor.get_attribute("soc_in_mwh") is not None):
            soc_at_start = sensor.get_attribute("soc_in_mwh")
        else:
            soc_at_start = 0

    if soc_targets is None:
        soc_targets = pd.Series(np.nan,
                                index=pd.date_range(start,
                                                    end,
                                                    freq=resolution,
                                                    closed="right"))

    if sensor.generic_asset.generic_asset_type.name == "battery":
        consumption_schedule = schedule_battery(
            sensor,
            start,
            end,
            resolution,
            soc_at_start,
            soc_targets,
            soc_min,
            soc_max,
            roundtrip_efficiency,
            price_sensor=price_sensor,
        )
    elif sensor.generic_asset.generic_asset_type.name in (
            "one-way_evse",
            "two-way_evse",
    ):
        consumption_schedule = schedule_charging_station(
            sensor,
            start,
            end,
            resolution,
            soc_at_start,
            soc_targets,
            soc_min,
            soc_max,
            roundtrip_efficiency,
            price_sensor=price_sensor,
        )
    else:
        raise ValueError(
            "Scheduling is not (yet) supported for asset type %s." %
            sensor.generic_asset.generic_asset_type)

    data_source = get_data_source(
        data_source_name="Seita",
        data_source_type="scheduling script",
    )
    if rq_job:
        click.echo("Job %s made schedule." % rq_job.id)

    ts_value_schedule = [
        TimedBelief(
            event_start=dt,
            belief_time=belief_time,
            event_value=-value,
            sensor=sensor,
            source=data_source,
        ) for dt, value in consumption_schedule.items()
    ]  # For consumption schedules, positive values denote consumption. For the db, consumption is negative
    bdf = tb.BeliefsDataFrame(ts_value_schedule)
    save_to_db(bdf)
    db.session.commit()

    return True
Example #6
0
def make_rolling_viewpoint_forecasts(
    sensor_id: int,
    horizon: timedelta,
    start: datetime,
    end: datetime,
    custom_model_params: dict = None,
) -> int:
    """Build forecasting model specs, make rolling-viewpoint forecasts, and save the forecasts made.

    Each individual forecast is a belief about a time interval.
    Rolling-viewpoint forecasts share the same belief horizon (the duration between belief time and knowledge time).
    Model specs are also retrained in a rolling fashion, but with its own frequency set in custom_model_params.
    See the timely-beliefs lib for relevant terminology.

    Parameters
    ----------
    :param sensor_id: int
        To identify which sensor to forecast
    :param horizon: timedelta
        duration between the end of each interval and the time at which the belief about that interval is formed
    :param start: datetime
        start of forecast period, i.e. start time of the first interval to be forecast
    :param end: datetime
        end of forecast period, i.e end time of the last interval to be forecast
    :param custom_model_params: dict
        pass in params which will be passed to the model specs configurator,
        e.g. outcome_var_transformation, only advisable to be used for testing.
    :returns: int
        the number of forecasts made
    """
    # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork
    db.engine.dispose()

    rq_job = get_current_job()

    # find out which model to run, fall back to latest recommended
    model_search_term = rq_job.meta.get("model_search_term", "linear-OLS")

    # find sensor
    sensor = Sensor.query.filter_by(id=sensor_id).one_or_none()

    click.echo(
        "Running Forecasting Job %s: %s for %s on model '%s', from %s to %s" %
        (rq_job.id, sensor, horizon, model_search_term, start, end))

    if hasattr(sensor, "market_type"):
        ex_post_horizon = None  # Todo: until we sorted out the ex_post_horizon, use all available price data
    else:
        ex_post_horizon = timedelta(hours=0)

    # Make model specs
    model_configurator = lookup_model_specs_configurator(model_search_term)
    model_specs, model_identifier, fallback_model_search_term = model_configurator(
        sensor=sensor,
        forecast_start=as_server_time(start),
        forecast_end=as_server_time(end),
        forecast_horizon=horizon,
        ex_post_horizon=ex_post_horizon,
        custom_model_params=custom_model_params,
    )
    model_specs.creation_time = server_now()

    rq_job.meta["model_identifier"] = model_identifier
    rq_job.meta["fallback_model_search_term"] = fallback_model_search_term
    rq_job.save()

    # before we run the model, check if horizon is okay and enough data is available
    if horizon not in supported_horizons():
        raise InvalidHorizonException("Invalid horizon on job %s: %s" %
                                      (rq_job.id, horizon))

    query_window = get_query_window(
        model_specs.start_of_training,
        end,
        [lag * model_specs.frequency for lag in model_specs.lags],
    )
    check_data_availability(
        sensor,
        TimedBelief,
        start,
        end,
        query_window,
        horizon,
    )

    data_source = get_data_source(
        data_source_name="Seita (%s)" %
        rq_job.meta.get("model_identifier", "unknown model"),
        data_source_type="forecasting script",
    )

    forecasts, model_state = make_rolling_forecasts(
        start=as_server_time(start),
        end=as_server_time(end),
        model_specs=model_specs,
    )
    click.echo("Job %s made %d forecasts." % (rq_job.id, len(forecasts)))

    ts_value_forecasts = [
        TimedBelief(
            event_start=dt,
            belief_horizon=horizon,
            event_value=value,
            sensor=sensor,
            source=data_source,
        ) for dt, value in forecasts.items()
    ]
    bdf = tb.BeliefsDataFrame(ts_value_forecasts)
    save_to_db(bdf)
    db.session.commit()

    return len(forecasts)