예제 #1
0
    eval_reg.fit(X_train, y_train)

    print("-" * 10, "regression report", "-" * 10)
    report = regression_report(y_test, eval_reg.predict(X_test),
                               X_test.shape[1])
    print(report)

    print("-" * 10, "evaluation of label", "-" * 10)
    label_df = data.get_true_label(
        columns=["adr", "revenue", "is_canceled", "label"])
    pred_label_df = data.predict_label(eval_reg, X_test_df)

    print("[ label evaluation ]")
    report_label = evaluate_by_label(pred_label_df, label_df, target="label")
    print(report_label)
    print("[ revenue_per_day evaluation ]")
    report_revenue = evaluate_by_label(pred_label_df,
                                       label_df,
                                       target="revenue")
    print(report_revenue)

    #%% training with all data
    X_df, y_df = data.processing(["revenue"])
    reg = HistGradientBoostingRegressor(random_state=1129)
    reg.fit(X_df.to_numpy(), y_df["revenue"].to_numpy())

    #%% fill predict label to csv
    test_X_df = data.processing_test_data("data/test.csv")
    predict_df = data.predict_label(reg, test_X_df)
    fill_label(predict_df, "data/test_nolabel.csv")
    print("-" * 10, "regression report", "-" * 10)
    report = regression_report(y_test, eval_reg.predict(X_test),
                               X_test.shape[1])
    print(report)

    print("-" * 10, "evaluation of label", "-" * 10)
    label_df = data.get_true_label(
        columns=["adr", "revenue", "is_canceled", "label"])
    pred_label_df = data.predict_label(eval_reg, X_test_df, reg_out="adr")

    #%%
    print("[ label evaluation ]")
    report_label = evaluate_by_label(pred_label_df, label_df, target="label")
    print(report_label)
    print("[ revenue_per_day evaluation ]")
    report_revenue = evaluate_by_label(pred_label_df,
                                       label_df,
                                       target="revenue")
    print(report_revenue)

    #%% training with all data
    X_df, y_df = data.processing(["actual_adr"])
    reg = HistGradientBoostingRegressor(random_state=1129)
    reg.fit(X_df.to_numpy(), y_df["actual_adr"].to_numpy())

    #%% fill predict label to csv
    test_X_df = data.processing_test_data("data/test.csv")
    pred_label_df = data.predict_label(reg, test_X_df, reg_out="adr")
    fill_label(pred_label_df, "data/test_nolabel.csv")

#%%
# %%
from utils import *
from data_processing import Data

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

#%%
data = Data(use_dummies=False)
X_df, y_df = data.processing(target="reservation_status")
X_np, y_np = X_df.to_numpy(), y_df.to_numpy()
reservation_status_cats = data.get_y_cats()
print(f"X_np's shape: {X_np.shape}")
print(f"y_np's shape: {y_np.shape}")

train_loader, val_loader, test_loader = LoadData(
    X_y=(X_np, y_np),
    X_y_dtype=("float", "long")).get_dataloader([0.7, 0.2, 0.1], batch_size=64)

# %% start from here!
if __name__ == "__main__":
    # setting
    model = Input1DModel(X_np.shape[1], len(reservation_status_cats))
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    modelwrapper = ModelWrapper(model, loss_func, optimizer)

    # training
    model = modelwrapper.train(train_loader, val_loader, max_epochs=50)
#%%
from utils import *
from data_processing import Data

# start from here!
if __name__ == "__main__":
    data = Data(use_dummies=False, normalize=False)
    # test classifiers
    X_df, y_df = data.processing(["is_canceled"])
    mlmodelwrapper = MLModelWrapper(X_df.to_numpy(), y_df.to_numpy())
    mlmodelwrapper.quick_test("classifier")

    # test regressors
    X_df, y_df = data.processing(["adr"])
    mlmodelwrapper = MLModelWrapper(X_df.to_numpy(), y_df.to_numpy())
    mlmodelwrapper.quick_test("regressor")

    X_df, y_df = data.processing(["revenue"])
    mlmodelwrapper = MLModelWrapper(X_df.to_numpy(), y_df.to_numpy())
    mlmodelwrapper.quick_test("regressor")
예제 #5
0
    print("-" * 10, "classification report", "-" * 10)
    report = classification_report(
        y_test_canceled.copy(), eval_clf.predict(X_test.copy())
    )
    print(report)

    #%%
    pred_df = predict(eval_clf, eval_reg, X_test_df)
    pred_label_df = data.to_label(pred_df)
    label_df = data.get_true_label(columns=["adr", "revenue", "is_canceled", "label"])

    print("[ label evaluation ]")
    report_label = evaluate_by_label(pred_label_df, label_df, target="label")
    print(report_label)
    print("[ revenue_per_day evaluation ]")
    report_revenue = evaluate_by_label(pred_label_df, label_df, target="revenue")
    print(report_revenue)

    #%% training with all data
    X_df, y_df = data.processing(["adr", "is_canceled"])
    reg = HistGradientBoostingRegressor(random_state=1126)
    reg.fit(X_df.to_numpy(), y_df["adr"].to_numpy())
    clf = HistGradientBoostingClassifier(random_state=1126)
    clf.fit(X_df.to_numpy(), y_df["is_canceled"].to_numpy())

    #%% fill predict label to csv
    test_X_df = data.processing_test_data("data/test.csv")
    pred_df = predict(clf, reg, test_X_df)
    pred_label_df = data.to_label(pred_df)
    fill_label(pred_label_df, "data/test_nolabel.csv")