Esempio n. 1
0
def _compute_metrics(df: pd.DataFrame, metrics: List[str]):
    """
    Compute metrics for one data frame.

    :param df: The data frame which contains actual_level and predicted_level columns.
    :return: The data frame with two columns - metric_name and metric.
    """
    scores = scoring.score_regression(
        y_test=df[ACTUALS], y_pred=df[PREDICTIONS], metrics=metrics
    )
    metrics_df = pd.DataFrame(list(scores.items()), columns=["metric_name", "metric"])
    metrics_df.sort_values(["metric_name"], inplace=True)
    metrics_df.reset_index(drop=True, inplace=True)
    return metrics_df
def compute_metrics(fcst_df, metric_name=None, ts_id_colnames=None):
    """
    Calculate metrics per grain.

    :param fcst_df: forecast data frame. Must contain 2 columns: 'actual_level' and 'predicted_level'
    :param metric_name: (optional) name of the metric to return
    :param ts_id_colnames: (optional) list of grain column names
    :return: dictionary of summary table for all tests and final decision on stationary vs nonstaionary
    """
    if ts_id_colnames is None:
        ts_id_colnames = []
    if len(ts_id_colnames) == 0:
        ts_id_colnames = ["TS_ID"]
        fcst_df[ts_id_colnames[0]] = "dummy"
    metrics_list = []
    for grain, df in fcst_df.groupby(ts_id_colnames):
        try:
            scores = scoring.score_regression(
                y_test=df["actual_level"],
                y_pred=df["predicted_level"],
                metrics=list(constants.Metric.SCALAR_REGRESSION_SET),
            )
        except BaseException:
            msg = "{}: metrics calculation failed.".format(grain)
            print(msg)
            scores = {}
        one_grain_metrics_df = pd.DataFrame(list(scores.items()),
                                            columns=["metric_name",
                                                     "metric"]).sort_values(
                                                         ["metric_name"])
        one_grain_metrics_df.reset_index(inplace=True, drop=True)
        if len(ts_id_colnames) < 2:
            one_grain_metrics_df["grain"] = ts_id_colnames[0]
        else:
            one_grain_metrics_df["grain"] = "|".join(list(grain))

        metrics_list.append(one_grain_metrics_df)
    # collect into a data frame
    grain_metrics = pd.concat(metrics_list)
    if metric_name is not None:
        grain_metrics = grain_metrics.query("metric_name == @metric_name")
    return grain_metrics
        max_horizon,
        freq,
    )

print(df_all)

print("target values:::")
print(df_all[target_column_name])
print("predicted values:::")
print(df_all["predicted"])

# Use the AutoML scoring module
regression_metrics = list(constants.REGRESSION_SCALAR_SET)
y_test = np.array(df_all[target_column_name])
y_pred = np.array(df_all["predicted"])
scores = scoring.score_regression(y_test, y_pred, regression_metrics)

print("scores:")
print(scores)

for key, value in scores.items():
    run.log(key, value)

print("Simple forecasting model")
rmse = np.sqrt(
    mean_squared_error(df_all[target_column_name], df_all["predicted"]))
print("[Test Data] \nRoot Mean squared error: %.2f" % rmse)
mae = mean_absolute_error(df_all[target_column_name], df_all["predicted"])
print("mean_absolute_error score: %.2f" % mae)
print("MAPE: %.2f" % MAPE(df_all[target_column_name], df_all["predicted"]))