Ejemplo n.º 1
0
def get_revenues_costs_data(
    power_data: pd.DataFrame,
    prices_data: pd.DataFrame,
    power_forecast_data: pd.DataFrame,
    prices_forecast_data: pd.DataFrame,
    metrics: Dict[str, float],
    unit_factor: float,
    resolution: str,
    showing_individual_traces: bool,
) -> Tuple[pd.DataFrame, pd.DataFrame, dict]:
    """Compute revenues/costs data. These data are purely derivative from power and prices.
    For forecasts we use the WAPE metrics. Then we calculate metrics on this construct.
    The unit factor is used when multiplying quantities and prices, e.g. when multiplying quantities in kWh with prices
    in EUR/MWh, use a unit factor of 0.001.

    Return revenue/cost observations, revenue/cost forecasts (either might be an empty DataFrame)
    and a dict with the following metrics:
    - expected value
    - mean absolute error
    - mean absolute percentage error
    - weighted absolute percentage error
    """
    power_hour_factor = time_utils.resolution_to_hour_factor(resolution)

    rev_cost_data = multiply_dataframe_with_deterministic_beliefs(
        power_data,
        prices_data,
        result_source=None if showing_individual_traces else
        "Calculated from power and price data",
        multiplication_factor=power_hour_factor * unit_factor,
    )
    if power_data.empty or prices_data.empty:
        metrics["realised_revenues_costs"] = np.NaN
    else:
        metrics["realised_revenues_costs"] = np.nansum(
            rev_cost_data["event_value"].values)

    rev_cost_forecasts = multiply_dataframe_with_deterministic_beliefs(
        power_forecast_data,
        prices_forecast_data,
        result_source="Calculated from power and price data",
        multiplication_factor=power_hour_factor * unit_factor,
    )
    if power_forecast_data.empty or prices_forecast_data.empty:
        metrics["expected_revenues_costs"] = np.NaN
        metrics["mae_revenues_costs"] = np.NaN
        metrics["mape_revenues_costs"] = np.NaN
        metrics["wape_revenues_costs"] = np.NaN
    else:
        metrics["expected_revenues_costs"] = np.nansum(
            rev_cost_forecasts["event_value"])
        metrics["mae_revenues_costs"] = calculations.mean_absolute_error(
            rev_cost_data["event_value"], rev_cost_forecasts["event_value"])
        metrics[
            "mape_revenues_costs"] = calculations.mean_absolute_percentage_error(
                rev_cost_data["event_value"],
                rev_cost_forecasts["event_value"])
        metrics[
            "wape_revenues_costs"] = calculations.weighted_absolute_percentage_error(
                rev_cost_data["event_value"],
                rev_cost_forecasts["event_value"])

        # Todo: compute confidence interval properly - this is just a simple heuristic
        rev_cost_forecasts["yhat_upper"] = rev_cost_forecasts[
            "event_value"] * (1 + metrics["wape_revenues_costs"])
        rev_cost_forecasts["yhat_lower"] = rev_cost_forecasts[
            "event_value"] * (1 - metrics["wape_revenues_costs"])
    return rev_cost_data, rev_cost_forecasts, metrics
Ejemplo n.º 2
0
def get_weather_data(
    assets: List[Asset],
    metrics: dict,
    sensor_type: WeatherSensorType,
    query_window: Tuple[datetime, datetime],
    resolution: str,
    forecast_horizon: timedelta,
) -> Tuple[pd.DataFrame, pd.DataFrame, str, Sensor, dict]:
    """Get most recent weather data and forecast weather data for the requested forecast horizon.

    Return weather observations, weather forecasts (either might be an empty DataFrame),
    the name of the sensor type, the weather sensor and a dict with the following metrics:
    - expected value
    - mean absolute error
    - mean absolute percentage error
    - weighted absolute percentage error"""

    # Todo: for now we only collect weather data for a single asset
    asset = assets[0]

    weather_data = tb.BeliefsDataFrame(columns=["event_value"])
    weather_forecast_data = tb.BeliefsDataFrame(columns=["event_value"])
    sensor_type_name = ""
    closest_sensor = None
    if sensor_type:
        # Find the 50 closest weather sensors
        sensor_type_name = sensor_type.name
        closest_sensors = Sensor.find_closest(
            generic_asset_type_name=asset.generic_asset.generic_asset_type.
            name,
            sensor_name=sensor_type_name,
            n=50,
            object=asset,
        )
        if closest_sensors:
            closest_sensor = closest_sensors[0]

            # Collect the weather data for the requested time window
            sensor_names = [sensor.name for sensor in closest_sensors]

            # Get weather data
            weather_bdf_dict: Dict[str,
                                   tb.BeliefsDataFrame] = TimedBelief.search(
                                       sensor_names,
                                       event_starts_after=query_window[0],
                                       event_ends_before=query_window[1],
                                       resolution=resolution,
                                       horizons_at_least=None,
                                       horizons_at_most=timedelta(hours=0),
                                       sum_multiple=False,
                                   )
            weather_df_dict: Dict[str, pd.DataFrame] = {}
            for sensor_name in weather_bdf_dict:
                weather_df_dict[sensor_name] = simplify_index(
                    weather_bdf_dict[sensor_name],
                    index_levels_to_columns=["belief_horizon", "source"],
                )

            # Get weather forecasts
            weather_forecast_bdf_dict: Dict[
                str, tb.BeliefsDataFrame] = TimedBelief.search(
                    sensor_names,
                    event_starts_after=query_window[0],
                    event_ends_before=query_window[1],
                    resolution=resolution,
                    horizons_at_least=forecast_horizon,
                    horizons_at_most=None,
                    source_types=["user", "forecasting script", "script"],
                    sum_multiple=False,
                )
            weather_forecast_df_dict: Dict[str, pd.DataFrame] = {}
            for sensor_name in weather_forecast_bdf_dict:
                weather_forecast_df_dict[sensor_name] = simplify_index(
                    weather_forecast_bdf_dict[sensor_name],
                    index_levels_to_columns=["belief_horizon", "source"],
                )

            # Take the closest weather sensor which contains some data for the selected time window
            for sensor, sensor_name in zip(closest_sensors, sensor_names):
                if (not weather_df_dict[sensor_name]
                    ["event_value"].isnull().values.all()
                        or not weather_forecast_df_dict[sensor_name]
                    ["event_value"].isnull().values.all()):
                    closest_sensor = sensor
                    break

            weather_data = weather_df_dict[sensor_name]
            weather_forecast_data = weather_forecast_df_dict[sensor_name]

            # Calculate the weather metrics
            if not weather_data.empty:
                metrics["realised_weather"] = weather_data["event_value"].mean(
                )
            else:
                metrics["realised_weather"] = np.NaN
            if (not weather_forecast_data.empty
                    and weather_forecast_data.size == weather_data.size):
                metrics["expected_weather"] = weather_forecast_data[
                    "event_value"].mean()
                metrics["mae_weather"] = calculations.mean_absolute_error(
                    weather_data["event_value"],
                    weather_forecast_data["event_value"])
                metrics[
                    "mape_weather"] = calculations.mean_absolute_percentage_error(
                        weather_data["event_value"],
                        weather_forecast_data["event_value"])
                metrics[
                    "wape_weather"] = calculations.weighted_absolute_percentage_error(
                        weather_data["event_value"],
                        weather_forecast_data["event_value"])
            else:
                metrics["expected_weather"] = np.NaN
                metrics["mae_weather"] = np.NaN
                metrics["mape_weather"] = np.NaN
                metrics["wape_weather"] = np.NaN
    return (
        weather_data,
        weather_forecast_data,
        sensor_type_name,
        closest_sensor,
        metrics,
    )
Ejemplo n.º 3
0
def get_power_data(
    resource: Union[str, Resource],  # name or instance
    show_consumption_as_positive: bool,
    showing_individual_traces_for: str,
    metrics: dict,
    query_window: Tuple[datetime, datetime],
    resolution: str,
    forecast_horizon: timedelta,
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, dict]:
    """Get power data and metrics.

    Return power observations, power forecasts and power schedules (each might be an empty DataFrame)
    and a dict with the following metrics:
    - expected value
    - mean absolute error
    - mean absolute percentage error
    - weighted absolute percentage error

    Todo: Power schedules ignore horizon.
    """
    if isinstance(resource, str):
        resource = Resource(resource)

    default_columns = ["event_value", "belief_horizon", "source"]

    # Get power data
    if showing_individual_traces_for != "schedules":
        resource.load_sensor_data(
            sensor_types=[Power],
            start=query_window[0],
            end=query_window[-1],
            resolution=resolution,
            belief_horizon_window=(None, timedelta(hours=0)),
            exclude_source_types=["scheduling script"],
        )
        if showing_individual_traces_for == "power":
            power_bdf = resource.power_data
            # In this case, power_bdf is actually a dict of BeliefDataFrames.
            # We join the frames into one frame, remembering -per frame- the sensor name as source.
            power_bdf = pd.concat([
                set_bdf_source(bdf, sensor_name)
                for sensor_name, bdf in power_bdf.items()
            ])
        else:
            # Here, we aggregate all rows together
            power_bdf = resource.aggregate_power_data
        power_df: pd.DataFrame = simplify_index(
            power_bdf, index_levels_to_columns=["belief_horizon", "source"])
        if showing_individual_traces_for == "power":
            # In this case, we keep on indexing by source (as we have more than one)
            power_df.set_index("source", append=True, inplace=True)
    else:
        power_df = pd.DataFrame(columns=default_columns)

    # Get power forecast
    if showing_individual_traces_for == "none":
        power_forecast_bdf: tb.BeliefsDataFrame = resource.load_sensor_data(
            sensor_types=[Power],
            start=query_window[0],
            end=query_window[-1],
            resolution=resolution,
            belief_horizon_window=(forecast_horizon, None),
            exclude_source_types=["scheduling script"],
        ).aggregate_power_data
        power_forecast_df: pd.DataFrame = simplify_index(
            power_forecast_bdf,
            index_levels_to_columns=["belief_horizon", "source"])
    else:
        power_forecast_df = pd.DataFrame(columns=default_columns)

    # Get power schedule
    if showing_individual_traces_for != "power":
        resource.load_sensor_data(
            sensor_types=[Power],
            start=query_window[0],
            end=query_window[-1],
            resolution=resolution,
            belief_horizon_window=(None, None),
            source_types=["scheduling script"],
        )
        if showing_individual_traces_for == "schedules":
            power_schedule_bdf = resource.power_data
            power_schedule_bdf = pd.concat([
                set_bdf_source(bdf, sensor_name)
                for sensor_name, bdf in power_schedule_bdf.items()
            ])
        else:
            power_schedule_bdf = resource.aggregate_power_data
        power_schedule_df: pd.DataFrame = simplify_index(
            power_schedule_bdf,
            index_levels_to_columns=["belief_horizon", "source"])
        if showing_individual_traces_for == "schedules":
            power_schedule_df.set_index("source", append=True, inplace=True)
    else:
        power_schedule_df = pd.DataFrame(columns=default_columns)

    if show_consumption_as_positive:
        power_df["event_value"] *= -1
        power_forecast_df["event_value"] *= -1
        power_schedule_df["event_value"] *= -1

    # Calculate the power metrics
    power_hour_factor = time_utils.resolution_to_hour_factor(resolution)
    realised_power_in_mwh = pd.Series(power_df["event_value"] *
                                      power_hour_factor).values

    if not power_df.empty:
        metrics["realised_power_in_mwh"] = np.nansum(realised_power_in_mwh)
    else:
        metrics["realised_power_in_mwh"] = np.NaN
    if not power_forecast_df.empty and power_forecast_df.size == power_df.size:
        expected_power_in_mwh = pd.Series(power_forecast_df["event_value"] *
                                          power_hour_factor).values
        metrics["expected_power_in_mwh"] = np.nansum(expected_power_in_mwh)
        metrics["mae_power_in_mwh"] = calculations.mean_absolute_error(
            realised_power_in_mwh, expected_power_in_mwh)
        metrics["mape_power"] = calculations.mean_absolute_percentage_error(
            realised_power_in_mwh, expected_power_in_mwh)
        metrics[
            "wape_power"] = calculations.weighted_absolute_percentage_error(
                realised_power_in_mwh, expected_power_in_mwh)
    else:
        metrics["expected_power_in_mwh"] = np.NaN
        metrics["mae_power_in_mwh"] = np.NaN
        metrics["mape_power"] = np.NaN
        metrics["wape_power"] = np.NaN
    return power_df, power_forecast_df, power_schedule_df, metrics
Ejemplo n.º 4
0
def get_prices_data(
    metrics: dict,
    market_sensor: Sensor,
    query_window: Tuple[datetime, datetime],
    resolution: str,
    forecast_horizon: timedelta,
) -> Tuple[pd.DataFrame, pd.DataFrame, dict]:
    """Get price data and metrics.

    Return price observations, price forecasts (either might be an empty DataFrame)
    and a dict with the following metrics:
    - expected value
    - mean absolute error
    - mean absolute percentage error
    - weighted absolute percentage error
    """

    market_name = "" if market_sensor is None else market_sensor.name

    # Get price data
    price_bdf: tb.BeliefsDataFrame = TimedBelief.search(
        [market_name],
        event_starts_after=query_window[0],
        event_ends_before=query_window[1],
        resolution=resolution,
        horizons_at_least=None,
        horizons_at_most=timedelta(hours=0),
    )
    price_df: pd.DataFrame = simplify_index(
        price_bdf, index_levels_to_columns=["belief_horizon", "source"])

    if not price_bdf.empty:
        metrics["realised_unit_price"] = price_df["event_value"].mean()
    else:
        metrics["realised_unit_price"] = np.NaN

    # Get price forecast
    price_forecast_bdf: tb.BeliefsDataFrame = TimedBelief.search(
        [market_name],
        event_starts_after=query_window[0],
        event_ends_before=query_window[1],
        resolution=resolution,
        horizons_at_least=forecast_horizon,
        horizons_at_most=None,
        source_types=["user", "forecasting script", "script"],
    )
    price_forecast_df: pd.DataFrame = simplify_index(
        price_forecast_bdf,
        index_levels_to_columns=["belief_horizon", "source"])

    # Calculate the price metrics
    if not price_forecast_df.empty and price_forecast_df.size == price_df.size:
        metrics["expected_unit_price"] = price_forecast_df["event_value"].mean(
        )
        metrics["mae_unit_price"] = calculations.mean_absolute_error(
            price_df["event_value"], price_forecast_df["event_value"])
        metrics[
            "mape_unit_price"] = calculations.mean_absolute_percentage_error(
                price_df["event_value"], price_forecast_df["event_value"])
        metrics[
            "wape_unit_price"] = calculations.weighted_absolute_percentage_error(
                price_df["event_value"], price_forecast_df["event_value"])
    else:
        metrics["expected_unit_price"] = np.NaN
        metrics["mae_unit_price"] = np.NaN
        metrics["mape_unit_price"] = np.NaN
        metrics["wape_unit_price"] = np.NaN
    return price_df, price_forecast_df, metrics