# -*- coding: utf-8
import os
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.externals import joblib

from data_utils import DataUtils

cur_dir = os.path.dirname(__file__)
d_utils = DataUtils(filepath=os.path.join(cur_dir, "data/pKaInWater.csv"))
X_data, y_data_acidic, y_data_basic = d_utils.get_classification_data(
    feature_type="morgan+macc")
y_data = np.array([y_data_acidic, y_data_basic]).T

# train test split
seed = 7
X_train, X_test, y_train, y_test = train_test_split(X_data,
                                                    y_data,
                                                    test_size=0.2,
                                                    random_state=seed)
print("\n ========================= \n")
print("X_train.shape:", X_train.shape, "X_test.shape", X_test.shape)
print("\n ========================= \n")


def model_evaluation(model, x_input, y_input):
    y_pred = model.predict(x_input)
    print(classification_report(y_true=y_input, y_pred=y_pred))