#%% evaluate performance with training data eval_reg = HistGradientBoostingRegressor(random_state=1129) eval_reg.fit(X_train, y_train) print("-" * 10, "regression report", "-" * 10) report = regression_report(y_test, eval_reg.predict(X_test), X_test.shape[1]) print(report) print("-" * 10, "evaluation of label", "-" * 10) label_df = data.get_true_label( columns=["adr", "revenue", "is_canceled", "label"]) pred_label_df = data.predict_label(eval_reg, X_test_df) print("[ label evaluation ]") report_label = evaluate_by_label(pred_label_df, label_df, target="label") print(report_label) print("[ revenue_per_day evaluation ]") report_revenue = evaluate_by_label(pred_label_df, label_df, target="revenue") print(report_revenue) #%% training with all data X_df, y_df = data.processing(["revenue"]) reg = HistGradientBoostingRegressor(random_state=1129) reg.fit(X_df.to_numpy(), y_df["revenue"].to_numpy()) #%% fill predict label to csv test_X_df = data.processing_test_data("data/test.csv") predict_df = data.predict_label(reg, test_X_df)
revenue_pred = reg.predict(X_df) revenue_preds.append(revenue_pred) revenue_pred = np.sum(revenue_preds, axis=0) / len(revenue_preds) # print report report = [] report.append("[ revenue_per_order evaluation ]") y_test = y_test_df["revenue"].to_numpy() reg_report = regression_report(y_test, revenue_pred, X_test_df.shape[1]) report.append(reg_report) pred_df = X_test_df.copy() pred_df["pred_revenue"] = revenue_pred pred_label_df = data.to_label(pred_df) true_label_df = data.get_true_label( columns=["adr", "revenue", "is_canceled", "label"]) report.append("[ label evaluation ]") report.append(evaluate_by_label(pred_label_df, true_label_df, "label")) report.append("[ revenue_per_day evaluation ]") report.append(evaluate_by_label(pred_label_df, true_label_df, "revenue")) report = "\n".join(report) + "\n" print(report) # training with all data X_df, y_df = data.processing(["revenue", "is_canceled", "adr"]) regs = split_train(regressor, X_df, y_df, nsplit) test_X_df = data.processing_test_data() save_output(regs, test_X_df)