eval_reg.fit(X_train, y_train) print("-" * 10, "regression report", "-" * 10) report = regression_report(y_test, eval_reg.predict(X_test), X_test.shape[1]) print(report) print("-" * 10, "evaluation of label", "-" * 10) label_df = data.get_true_label( columns=["adr", "revenue", "is_canceled", "label"]) pred_label_df = data.predict_label(eval_reg, X_test_df) print("[ label evaluation ]") report_label = evaluate_by_label(pred_label_df, label_df, target="label") print(report_label) print("[ revenue_per_day evaluation ]") report_revenue = evaluate_by_label(pred_label_df, label_df, target="revenue") print(report_revenue) #%% training with all data X_df, y_df = data.processing(["revenue"]) reg = HistGradientBoostingRegressor(random_state=1129) reg.fit(X_df.to_numpy(), y_df["revenue"].to_numpy()) #%% fill predict label to csv test_X_df = data.processing_test_data("data/test.csv") predict_df = data.predict_label(reg, test_X_df) fill_label(predict_df, "data/test_nolabel.csv")
print("-" * 10, "regression report", "-" * 10) report = regression_report(y_test, eval_reg.predict(X_test), X_test.shape[1]) print(report) print("-" * 10, "evaluation of label", "-" * 10) label_df = data.get_true_label( columns=["adr", "revenue", "is_canceled", "label"]) pred_label_df = data.predict_label(eval_reg, X_test_df, reg_out="adr") #%% print("[ label evaluation ]") report_label = evaluate_by_label(pred_label_df, label_df, target="label") print(report_label) print("[ revenue_per_day evaluation ]") report_revenue = evaluate_by_label(pred_label_df, label_df, target="revenue") print(report_revenue) #%% training with all data X_df, y_df = data.processing(["actual_adr"]) reg = HistGradientBoostingRegressor(random_state=1129) reg.fit(X_df.to_numpy(), y_df["actual_adr"].to_numpy()) #%% fill predict label to csv test_X_df = data.processing_test_data("data/test.csv") pred_label_df = data.predict_label(reg, test_X_df, reg_out="adr") fill_label(pred_label_df, "data/test_nolabel.csv") #%%
# %% from utils import * from data_processing import Data import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F #%% data = Data(use_dummies=False) X_df, y_df = data.processing(target="reservation_status") X_np, y_np = X_df.to_numpy(), y_df.to_numpy() reservation_status_cats = data.get_y_cats() print(f"X_np's shape: {X_np.shape}") print(f"y_np's shape: {y_np.shape}") train_loader, val_loader, test_loader = LoadData( X_y=(X_np, y_np), X_y_dtype=("float", "long")).get_dataloader([0.7, 0.2, 0.1], batch_size=64) # %% start from here! if __name__ == "__main__": # setting model = Input1DModel(X_np.shape[1], len(reservation_status_cats)) loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) modelwrapper = ModelWrapper(model, loss_func, optimizer) # training model = modelwrapper.train(train_loader, val_loader, max_epochs=50)
#%% from utils import * from data_processing import Data # start from here! if __name__ == "__main__": data = Data(use_dummies=False, normalize=False) # test classifiers X_df, y_df = data.processing(["is_canceled"]) mlmodelwrapper = MLModelWrapper(X_df.to_numpy(), y_df.to_numpy()) mlmodelwrapper.quick_test("classifier") # test regressors X_df, y_df = data.processing(["adr"]) mlmodelwrapper = MLModelWrapper(X_df.to_numpy(), y_df.to_numpy()) mlmodelwrapper.quick_test("regressor") X_df, y_df = data.processing(["revenue"]) mlmodelwrapper = MLModelWrapper(X_df.to_numpy(), y_df.to_numpy()) mlmodelwrapper.quick_test("regressor")
print("-" * 10, "classification report", "-" * 10) report = classification_report( y_test_canceled.copy(), eval_clf.predict(X_test.copy()) ) print(report) #%% pred_df = predict(eval_clf, eval_reg, X_test_df) pred_label_df = data.to_label(pred_df) label_df = data.get_true_label(columns=["adr", "revenue", "is_canceled", "label"]) print("[ label evaluation ]") report_label = evaluate_by_label(pred_label_df, label_df, target="label") print(report_label) print("[ revenue_per_day evaluation ]") report_revenue = evaluate_by_label(pred_label_df, label_df, target="revenue") print(report_revenue) #%% training with all data X_df, y_df = data.processing(["adr", "is_canceled"]) reg = HistGradientBoostingRegressor(random_state=1126) reg.fit(X_df.to_numpy(), y_df["adr"].to_numpy()) clf = HistGradientBoostingClassifier(random_state=1126) clf.fit(X_df.to_numpy(), y_df["is_canceled"].to_numpy()) #%% fill predict label to csv test_X_df = data.processing_test_data("data/test.csv") pred_df = predict(clf, reg, test_X_df) pred_label_df = data.to_label(pred_df) fill_label(pred_label_df, "data/test_nolabel.csv")