Beispiel #1
0
def monitor_tasks(task):
    """
    Check if the given task's last successful execution happened less than the allowed time ago.
    If not, alert someone, via email or sentry.
    """
    for t in task:
        task_name = t[0]
        app.logger.info(f"Checking latest run of task {task_name} ...")
        latest_run: LatestTaskRun = LatestTaskRun.query.get(task_name)
        if latest_run is None:
            msg = f"Task {task_name} has no last run and thus cannot be monitored. Is it configured properly?"
            send_monitoring_alert(task_name, msg)
            return
        now = server_now()
        acceptable_interval = timedelta(minutes=t[1])
        # check if latest run was recently enough
        if latest_run.datetime >= now - acceptable_interval:
            # latest run time is okay, let's check the status
            if latest_run.status is False:
                msg = f"A failure has been reported on task {task_name}."
                send_monitoring_alert(task_name, msg, latest_run)
        else:
            msg = (
                f"Task {task_name}'s latest run time is outside of the acceptable range "
                f"({acceptable_interval}).")
            app.logger.error(msg)
            send_monitoring_alert(task_name, msg, latest_run)
    app.logger.info("Done checking task runs ...")
Beispiel #2
0
    def load_bdf(sensor_data: dict) -> BeliefsDataFrame:
        """
        Turn the de-serialized and validated data into a BeliefsDataFrame.
        """
        source = DataSource.query.filter(
            DataSource.user_id == current_user.id
        ).one_or_none()
        if not source:
            raise ValidationError(
                f"User {current_user.id} is not an accepted data source."
            )

        num_values = len(sensor_data["values"])
        event_resolution = sensor_data["duration"] / num_values
        dt_index = pd.date_range(
            sensor_data["start"],
            periods=num_values,
            freq=event_resolution,
        )
        s = pd.Series(sensor_data["values"], index=dt_index)

        # Work out what the recording time should be
        belief_timing = {}
        if "prior" in sensor_data:
            belief_timing["belief_time"] = sensor_data["prior"]
        elif "horizon" in sensor_data:
            belief_timing["belief_horizon"] = sensor_data["horizon"]
        else:
            belief_timing["belief_time"] = server_now()
        return BeliefsDataFrame(
            s,
            source=source,
            sensor=sensor_data["sensor"],
            **belief_timing,
        )
Beispiel #3
0
        def decorated_service(*args, **kwargs):
            form = get_form_from_request(request)
            if form is None:
                current_app.logger.warning(
                    "Unsupported request method for unpacking 'prior' from request."
                )
                return invalid_method(request.method)

            if "prior" in form:
                prior = parse_isodate_str(form["prior"])
                if ex_post is True:
                    start = parse_isodate_str(form["start"])
                    duration = parse_duration(form["duration"], start)
                    # todo: validate start and duration (refactor already duplicate code from period_required and optional_horizon_accepted)
                    knowledge_time = (
                        start + duration
                    )  # todo: take into account knowledge horizon function
                    if prior < knowledge_time:
                        extra_info = "Meter data can only be observed after the fact."
                        return invalid_horizon(extra_info)
            elif infer_missing is True or (
                    infer_missing_play is True and current_app.config.get(
                        "FLEXMEASURES_MODE", "") == "play"):
                # A missing prior is inferred by the server
                prior = server_now()
            else:
                # Otherwise, a missing prior is fine (a horizon may still be inferred by the server)
                prior = None

            kwargs["prior"] = prior
            return fn(*args, **kwargs)
Beispiel #4
0
def test_naturalized_datetime_str(
    app,
    monkeypatch,
    dt_tz,
    now,
    server_tz,
    delta_in_h,
    exp_result,
):
    monkeypatch.setitem(app.config, "FLEXMEASURES_TIMEZONE", server_tz)
    if now == "server_now":
        now = server_now()  # done this way as it needs (patched) app context
    if now.tzinfo is None:
        now = now.replace(tzinfo=pytz.utc)  # assuming UTC
    if delta_in_h is not None:
        h_ago = now - timedelta(hours=delta_in_h)
        if dt_tz is not None:
            h_ago = h_ago.astimezone(pytz.timezone(dt_tz))
    else:
        h_ago = None
    if isinstance(exp_result, tuple):
        assert naturalized_datetime_str(h_ago, now=now) in exp_result
    else:
        assert naturalized_datetime_str(h_ago, now=now) == exp_result
Beispiel #5
0
def make_rolling_viewpoint_forecasts(
    sensor_id: int,
    horizon: timedelta,
    start: datetime,
    end: datetime,
    custom_model_params: dict = None,
) -> int:
    """Build forecasting model specs, make rolling-viewpoint forecasts, and save the forecasts made.

    Each individual forecast is a belief about a time interval.
    Rolling-viewpoint forecasts share the same belief horizon (the duration between belief time and knowledge time).
    Model specs are also retrained in a rolling fashion, but with its own frequency set in custom_model_params.
    See the timely-beliefs lib for relevant terminology.

    Parameters
    ----------
    :param sensor_id: int
        To identify which sensor to forecast
    :param horizon: timedelta
        duration between the end of each interval and the time at which the belief about that interval is formed
    :param start: datetime
        start of forecast period, i.e. start time of the first interval to be forecast
    :param end: datetime
        end of forecast period, i.e end time of the last interval to be forecast
    :param custom_model_params: dict
        pass in params which will be passed to the model specs configurator,
        e.g. outcome_var_transformation, only advisable to be used for testing.
    :returns: int
        the number of forecasts made
    """
    # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork
    db.engine.dispose()

    rq_job = get_current_job()

    # find out which model to run, fall back to latest recommended
    model_search_term = rq_job.meta.get("model_search_term", "linear-OLS")

    # find sensor
    sensor = Sensor.query.filter_by(id=sensor_id).one_or_none()

    click.echo(
        "Running Forecasting Job %s: %s for %s on model '%s', from %s to %s" %
        (rq_job.id, sensor, horizon, model_search_term, start, end))

    if hasattr(sensor, "market_type"):
        ex_post_horizon = None  # Todo: until we sorted out the ex_post_horizon, use all available price data
    else:
        ex_post_horizon = timedelta(hours=0)

    # Make model specs
    model_configurator = lookup_model_specs_configurator(model_search_term)
    model_specs, model_identifier, fallback_model_search_term = model_configurator(
        sensor=sensor,
        forecast_start=as_server_time(start),
        forecast_end=as_server_time(end),
        forecast_horizon=horizon,
        ex_post_horizon=ex_post_horizon,
        custom_model_params=custom_model_params,
    )
    model_specs.creation_time = server_now()

    rq_job.meta["model_identifier"] = model_identifier
    rq_job.meta["fallback_model_search_term"] = fallback_model_search_term
    rq_job.save()

    # before we run the model, check if horizon is okay and enough data is available
    if horizon not in supported_horizons():
        raise InvalidHorizonException("Invalid horizon on job %s: %s" %
                                      (rq_job.id, horizon))

    query_window = get_query_window(
        model_specs.start_of_training,
        end,
        [lag * model_specs.frequency for lag in model_specs.lags],
    )
    check_data_availability(
        sensor,
        TimedBelief,
        start,
        end,
        query_window,
        horizon,
    )

    data_source = get_data_source(
        data_source_name="Seita (%s)" %
        rq_job.meta.get("model_identifier", "unknown model"),
        data_source_type="forecasting script",
    )

    forecasts, model_state = make_rolling_forecasts(
        start=as_server_time(start),
        end=as_server_time(end),
        model_specs=model_specs,
    )
    click.echo("Job %s made %d forecasts." % (rq_job.id, len(forecasts)))

    ts_value_forecasts = [
        TimedBelief(
            event_start=dt,
            belief_horizon=horizon,
            event_value=value,
            sensor=sensor,
            source=data_source,
        ) for dt, value in forecasts.items()
    ]
    bdf = tb.BeliefsDataFrame(ts_value_forecasts)
    save_to_db(bdf)
    db.session.commit()

    return len(forecasts)
def make_forecasts(
    asset_id: int,
    timed_value_type: str,
    horizon: timedelta,
    start: datetime,
    end: datetime,
    custom_model_params: dict = None,
) -> int:
    """
    Build forecasting model specs, make rolling forecasts, save the forecasts made.
    Each individual forecast is a belief about an interval.
    Returns the number of forecasts made.

    Parameters
    ----------
    :param asset_id: int
        To identify which asset to forecast
    :param timed_value_type: str
        This should go away after a refactoring - we now use it to create the DB entry for the forecasts
    :param horizon: timedelta
        duration between the end of each interval and the time at which the belief about that interval is formed
    :param start: datetime
        start of forecast period, i.e. start time of the first interval to be forecast
    :param end: datetime
        end of forecast period, i.e end time of the last interval to be forecast
    :param custom_model_params: dict
        pass in params which will be passed to the model specs configurator,
        e.g. outcome_var_transformation, only advisable to be used for testing.
    """
    # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork
    db.engine.dispose()

    rq_job = get_current_job()

    # find out which model to run, fall back to latest recommended
    model_search_term = rq_job.meta.get("model_search_term", "linear-OLS")

    # find asset
    asset = get_asset(asset_id, timed_value_type)

    click.echo(
        "Running Forecasting Job %s: %s for %s on model '%s', from %s to %s"
        % (rq_job.id, asset, horizon, model_search_term, start, end)
    )

    if hasattr(asset, "market_type"):
        ex_post_horizon = None  # Todo: until we sorted out the ex_post_horizon, use all available price data
    else:
        ex_post_horizon = timedelta(hours=0)

    # Make model specs
    model_configurator = lookup_model_specs_configurator(model_search_term)
    model_specs, model_identifier, fallback_model_search_term = model_configurator(
        generic_asset=asset,
        forecast_start=as_server_time(start),
        forecast_end=as_server_time(end),
        forecast_horizon=horizon,
        ex_post_horizon=ex_post_horizon,
        custom_model_params=custom_model_params,
    )
    model_specs.creation_time = server_now()

    rq_job.meta["model_identifier"] = model_identifier
    rq_job.meta["fallback_model_search_term"] = fallback_model_search_term
    rq_job.save()

    # before we run the model, check if horizon is okay and enough data is available
    if horizon not in supported_horizons():
        raise InvalidHorizonException(
            "Invalid horizon on job %s: %s" % (rq_job.id, horizon)
        )

    query_window = get_query_window(
        model_specs.start_of_training,
        end,
        [lag * model_specs.frequency for lag in model_specs.lags],
    )
    check_data_availability(
        asset,
        determine_asset_value_class_by_asset(asset),
        start,
        end,
        query_window,
        horizon,
    )

    data_source = get_data_source(
        data_source_name="Seita (%s)"
        % rq_job.meta.get("model_identifier", "unknown model"),
        data_source_type="forecasting script",
    )

    forecasts, model_state = make_rolling_forecasts(
        start=as_server_time(start),
        end=as_server_time(end),
        model_specs=model_specs,
    )
    click.echo("Job %s made %d forecasts." % (rq_job.id, len(forecasts)))

    ts_value_forecasts = [
        make_timed_value(timed_value_type, asset_id, dt, value, horizon, data_source.id)
        for dt, value in forecasts.items()
    ]

    try:
        save_to_session(ts_value_forecasts)
    except IntegrityError as e:

        current_app.logger.warning(e)
        click.echo("Rolling back due to IntegrityError")
        db.session.rollback()

        if current_app.config.get("FLEXMEASURES_MODE", "") == "play":
            click.echo("Saving again, with overwrite=True")
            save_to_session(ts_value_forecasts, overwrite=True)

    db.session.commit()

    return len(forecasts)