#%%
from utils import *
from utils.metrics import regression_report
from data_processing import Data, evaluate_by_label, fill_label

import pandas as pd
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor

if __name__ == "__main__":
    # data
    data = Data(use_dummies=False, normalize=False)
    X_train_df, X_test_df, y_train_df, y_test_df = data.train_test_split_by_date(
        ["actual_adr"], test_ratio=0.3)
    X_train, X_test, y_train, y_test = (
        X_train_df.to_numpy(),
        X_test_df.to_numpy(),
        y_train_df["actual_adr"].to_numpy(),
        y_test_df["actual_adr"].to_numpy(),
    )
    print(f"X_train shape {X_train.shape}, y_train shape {y_train.shape}")
    print(f"X_test shape {X_test.shape}, y_test shape {y_test.shape}")

    #%% evaluate performance with training data
    eval_reg = HistGradientBoostingRegressor(random_state=1129)
    eval_reg.fit(X_train, y_train)

    print("-" * 10, "regression report", "-" * 10)
    report = regression_report(y_test, eval_reg.predict(X_test),
                               X_test.shape[1])
    print(report)
Example #2
0
#%%
from utils import *
from utils.metrics import regression_report
from data_processing import Data, evaluate_by_label, fill_label

import pandas as pd
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor

if __name__ == "__main__":
    # data
    data = Data(use_dummies=False, normalize=False)
    X_train_df, X_test_df, y_train_df, y_test_df = data.train_test_split_by_date(
        ["revenue"], test_ratio=0.3)
    X_train, X_test, y_train, y_test = (
        X_train_df.to_numpy(),
        X_test_df.to_numpy(),
        y_train_df["revenue"].to_numpy(),
        y_test_df["revenue"].to_numpy(),
    )
    print(f"X_train shape {X_train.shape}, y_train shape {y_train.shape}")
    print(f"X_test shape {X_test.shape}, y_test shape {y_test.shape}")

    #%% evaluate performance with training data
    eval_reg = HistGradientBoostingRegressor(random_state=1129)
    eval_reg.fit(X_train, y_train)

    print("-" * 10, "regression report", "-" * 10)
    report = regression_report(y_test, eval_reg.predict(X_test),
                               X_test.shape[1])
    print(report)
    pred_df = X_test_df.copy()
    pred_df["pred_revenue"] = revenue_pred
    pred_label_df = data.to_label(pred_df)
    true_label_df = pd.read_csv("data/revenue_per_day.csv",
                                index_col="arrival_date")

    report.append("[ label evaluation ]")
    report.append(evaluate_by_label2(pred_label_df, true_label_df, "label"))
    report.append("[ revenue_per_day evaluation ]")
    report.append(evaluate_by_label2(pred_label_df, true_label_df, "revenue"))
    return "\n".join(report) + "\n"


#%% data
data = Data(use_dummies=False, normalize=False)
X_train_df, X_test_df, y_train_df, y_test_df = data.train_test_split_by_date(
    ["revenue", "is_canceled", "adr"], test_ratio=0.3)
print(f"X_train shape {X_train_df.shape}, y_train shape {y_train_df.shape}")
print(f"X_test shape {X_test_df.shape}, y_test shape {y_test_df.shape}")

report = main(
    HistGradientBoostingRegressor,
    X_train_df,
    X_test_df,
    y_train_df,
    y_test_df,
    nsplit=2,
)
clfs, regs = get_models()
print(report)
print(f"*Save result to Ensemble_w_Pred_Report.txt")
with open(f"Ensemble_w_Pred_Report2.txt", "a") as ofile: