def test_empty_dataframe(self): empty_compl_homogenise = calc_completeness( pd.DataFrame(data={"load": []}, index=pd.DatetimeIndex([]))) empty_compl_nohomogenise = calc_completeness( pd.DataFrame(data={"load": []}, index=pd.DatetimeIndex([])), homogenise=False, ) self.assertAlmostEqual(empty_compl_homogenise, 0.0) self.assertAlmostEqual(empty_compl_nohomogenise, 0.0)
def test_homogenise_timeindex_incomplete(self): df_incomplete = pd.DataFrame( {"aggregated": [10, 20, 30, 40]}, index=pd.to_datetime([ "2019-01-01 10:00:00", "2019-01-01 10:05:00", # Note the missing value "2019-01-01 10:15:00", "2019-01-01 10:20:00", ]), ) completeness_df_incomplete = calc_completeness(df_incomplete, homogenise=True) completeness_df_incomplete_nothomogenised = calc_completeness( df_incomplete, homogenise=False) self.assertAlmostEqual(completeness_df_incomplete, 0.8) self.assertAlmostEqual(completeness_df_incomplete_nothomogenised, 1)
def test_homogenise_timeindex_complete(self): df_complete = pd.DataFrame( {"aggregated": [10, 20, 30]}, index=pd.to_datetime([ "2019-01-01 10:00:00", "2019-01-01 10:05:00", "2019-01-01 10:10:00" ]), ) completeness_df_complete = calc_completeness(df_complete) self.assertAlmostEqual(completeness_df_complete, 1)
def test_timedelayed_incomplete_dataframe(self): df = pd.DataFrame( index=[0, 1, 3], data={ "T-15min": [1, np.nan, np.nan], "T-30min": [2, np.nan, np.nan] }, ) # first nan is unexpected completeness = calc_completeness(df, time_delayed=True) self.assertAlmostEqual(completeness, 1 - 1 / 6, places=3)
def test_timedelayed_dataframe(self): df = pd.DataFrame( index=[0, 1, 3], data={ "T-15min": [1, np.nan, np.nan], "T-30min": [2, 3, np.nan] }, ) completeness = calc_completeness(df, time_delayed=True) self.assertEqual(completeness, 1)
def test_weighted_dataframe(self): df = pd.DataFrame(index=[0, 1], data={ "col1": [1, np.nan], "col2": [3, 4] }) weights = [1, 2] completeness = calc_completeness(df, weights) self.assertEqual(completeness, (1 * 0.5 + 2 * 1) / 3)
def test_timedelayed_advanced_dataframe(self): df = pd.DataFrame( index=[0, 1, 3], data={ "T-15min": [1, np.nan, np.nan], "T-30min": [2, 3, np.nan], "col1": [1, np.nan, 2], }, ) weights = [1, 1, 2] completeness = calc_completeness(df, weights, time_delayed=True) self.assertEqual(completeness, (1 + 1 + 2 / 3 * 2) / 4)
def test_calc_completeness_no_negatives(self): """Test added after bug. If time delayed is True, T-7d gave a negative weight, falsely resulting in a very low completeness""" df = pd.DataFrame( index=[0, 1, 3], data={ "T-15min": [1, np.nan, np.nan], "T-7d": [2, 3, 4], "T-24d": [4, 5, 6], "col1": [1, np.nan, 2], }, ) completeness = calc_completeness(df, time_delayed=True) self.assertEqual(completeness, 11 / 12.0)
def test_incomplete_dataframe(self): df = pd.DataFrame(index=[0, 1, 2], data={"col1": [1, np.nan, 3]}) completeness = calc_completeness(df) self.assertEqual(completeness, 2 / 3)
def test_APX_missing(self): df = pd.DataFrame(index=range(2 * 96), data={"APX": [np.nan] * 2 * 96}) completeness = calc_completeness(df, time_delayed=True) self.assertEqual(completeness, 1 / 2)
def test_complete_dataframe(self): df = pd.DataFrame(index=[0, 1], data={"col1": [1, 1]}) completeness = calc_completeness(df) self.assertEqual(completeness, 1.0)
def calc_kpi_for_specific_pid( pid: int, realised: pd.DataFrame, predicted_load: pd.DataFrame, basecase: pd.DataFrame, ) -> dict: """Function that checks the model performance based on a pid. This function - loads and combines forecast and realised data - calculated several key performance indicators (KPIs) These metric include: - RMSE, - bias, - NSME (model efficiency, between -inf and 1) - Mean absolute Error Args: pj (PredictionJobDataclass): Prediction ID for a given prediction job start_time (datetime): Start time from when to retrieve the historic load prediction. end_time (datetime): Start time till when to retrieve the historic load prediction. Returns: Dictionary that includes a dictonary for each t_ahead. Dict includes enddate en window (in days) for clarification Raises: NoPredictedLoadError: When no predicted load for given datatime range. NoRealisedLoadError: When no realised load for given datetime range. Example: To get the rMAE for the 24 hours ahead prediction: kpis['24h']['rMAE'] """ COMPLETENESS_REALISED_THRESHOLDS = 0.7 COMPLETENESS_PREDICTED_LOAD_THRESHOLD = 0.7 log = structlog.get_logger(__name__) # If predicted is empty if len(predicted_load) == 0: raise NoPredictedLoadError(pid) # If realised is empty if len(realised) == 0: raise NoRealisedLoadError(pid) # Define start and end time start_time = realised.index.min().to_pydatetime() end_time = realised.index.max().to_pydatetime() completeness_realised = validation.calc_completeness(realised) # Interpolate missing data if needed realised = realised.resample("15T").interpolate(limit=3) completeness_predicted_load = validation.calc_completeness(predicted_load) # Combine the forecast and the realised to make sure indices are matched nicely combined = pd.merge(realised, predicted_load, left_index=True, right_index=True) # Add basecase (load in same time period 7 days ago) # Check if basecase is not empty, else make a dummy dataframe if len(basecase) == 0: basecase = pd.DataFrame(columns=["load"]) basecase = basecase.rename(columns=dict(load="basecase")) combined = combined.merge(basecase, how="left", left_index=True, right_index=True) # Raise exception in case of constant load if combined.load.nunique() == 1: structlog.get_logger(__name__).warning( "The load is constant! KPIs will still be calculated, but relative metrics" " will be nan") # Define output dictonary kpis = dict() # Extract t_aheads from predicted_load, # Make a list of tuples with [(forecast_xh, stdev_xh),(..,..),..] hor_list = [("forecast_" + t_ahead, "stdev_" + t_ahead) for t_ahead in set( col.split("_")[1] for col in predicted_load.columns)] # cast date to int date = pd.to_datetime(end_time) # Calculate model metrics and add them to the output dictionary log.info("Start calculating kpis") for hor_cols in hor_list: t_ahead_h = hor_cols[0].split("_")[1] fc = combined[hor_cols[0]] # load predictions st = combined[hor_cols[1]] # standard deviations of load predictions completeness_predicted_load_specific_hor = validation.calc_completeness( fc.to_frame(name=t_ahead_h)) kpis.update({ t_ahead_h: { "RMSE": metrics.rmse(combined["load"], fc), "bias": metrics.bias(combined["load"], fc), "NSME": metrics.nsme(combined["load"], fc), "MAE": metrics.mae(combined["load"], fc), "rMAE": metrics.r_mae(combined["load"], fc), "rMAE_highest": metrics.r_mae_highest(combined["load"], fc), "rMNE_highest": metrics.r_mne_highest(combined["load"], fc), "rMPE_highest": metrics.r_mpe_highest(combined["load"], fc), "rMAE_lowest": metrics.r_mae_lowest(combined["load"], fc), "skill_score_basecase": metrics.skill_score( combined["load"], combined["basecase"], np.mean(combined["basecase"]), ), "skill_score": metrics.skill_score(combined["load"], fc, np.mean(combined["basecase"])), "skill_score_positive_peaks": metrics.skill_score_positive_peaks( combined["load"], fc, np.mean(combined["basecase"])), "skill_score_positive_peaks_basecase": metrics.skill_score_positive_peaks( combined["load"], combined["basecase"], np.mean(combined["basecase"]), ), "franks_skill_score": metrics.franks_skill_score(combined["load"], fc, combined["basecase"]), "franks_skill_score_peaks": metrics.franks_skill_score_peaks(combined["load"], fc, combined["basecase"]), "load_range": combined["load"].max() - combined["load"].min(), "frac_in_1sdev": metrics.frac_in_stdev(combined["load"], fc, st), "frac_in_2sdev": metrics.frac_in_stdev(combined["load"], fc, 2 * st), "completeness_realised": completeness_realised, "completeness_predicted": completeness_predicted_load_specific_hor, "date": date, # cast to date "window_days": np.round((end_time - start_time).total_seconds() / 60.0 / 60.0 / 24.0), } }) if completeness_realised < COMPLETENESS_REALISED_THRESHOLDS: log.warning( "Completeness realised load too low", prediction_id=pid, start_time=start_time, end_time=end_time, completeness=completeness_realised, completeness_threshold=COMPLETENESS_REALISED_THRESHOLDS, ) set_incomplete_kpi_to_nan(kpis, t_ahead_h) if (completeness_predicted_load_specific_hor < COMPLETENESS_PREDICTED_LOAD_THRESHOLD): log.warning( "Completeness predicted load of specific horizon too low", prediction_id=pid, horizon=t_ahead_h, start_time=start_time, end_time=end_time, completeness=completeness_predicted_load, completeness_threshold=COMPLETENESS_PREDICTED_LOAD_THRESHOLD, ) set_incomplete_kpi_to_nan(kpis, t_ahead_h) # Return output dictionary return kpis