コード例 #1
0
import os
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.externals import joblib

from data_utils import DataUtils

# basic_only | acidic only
DATA_CATEGORY = "basic_only"
FEATURE_TYPE = "morgan+macc"

cur_dir = os.path.dirname(__file__)
d_utils = DataUtils(filepath=os.path.join(cur_dir, "data/pKaInWater.csv"))
X_data, y_data = d_utils.get_regression_data(data_category=DATA_CATEGORY, feature_type=FEATURE_TYPE)

# train test split
seed = 7
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=seed)
print("\n ========================= \n")
print("X_train.shape:", X_train.shape, "X_test.shape", X_test.shape)
print("\n ========================= \n")


def model_evaluation(model, x_input, y_input):
    y_pred = model.predict(x_input)
    rmse_value = np.sqrt(mean_squared_error(y_true=y_input, y_pred=y_pred))
    r2_value = r2_score(y_true=y_input, y_pred=y_pred)
    return rmse_value, r2_value