class CrossValidation: """ This class does k cross validation """ def __init__(self, model, hyperparameters, kfold): self.metrics = Metrics() cross_validation = StratifiedKFold(n_splits=kfold, shuffle=True) self.clf = GridSearchCV(model, hyperparameters, cv=cross_validation, n_jobs=-1, verbose=1) def fit_and_predict(self, x_train, y_train, x_test, y_test, metrics): prediction = self.clf.fit(x_train, y_train).best_estimator_.predict(x_test) if metrics == "accuracy": self.metrics.accuracy(self.clf, y=y_test, pred=prediction) elif metrics == "confusion_matrix": self.metrics.confusion_matrix(self.clf, y=y_test, pred=prediction) elif metrics == "roc": prob = self.clf.fit(x_train, y_train).best_estimator_.predict_proba(x_test) self.metrics.plot_roc(self.clf, y=y_test, prob=prob[:, 1]) def get_score(self, x_test, y_test): return round(self.clf.score(x_test, y_test) * 100, 2)
class AbstractClassifier: """ Parent class of all project classifiers. Attributes: model : An object that defines the classifier model to implement. metrics : An object that defines the different metrics that can be used to evaluate a model. X_train : The features of the training data Y_train : The targets of training data (the ground truth label) X_test : The features of the testing data Y_test : The targets of training data (the ground truth label) """ def __init__(self, model, mode='0'): self.model = model self.metrics = Metrics() if mode == '0': self.X_train, self.Y_train, self.X_test, self.Y_test = DataPreprocessing( ).naive_preprocessing_data() elif mode == '1': self.X_train, self.Y_train, self.X_test, self.Y_test = DataPreprocessing( ).advanced_preprocessing_data() def train(self): self.model.fit(self.X_train, self.Y_train) def predict(self, x): return self.model.predict(x) def evaluate(self, label="Training", metrics="accuracy"): if label == 'Training': x, y = self.X_train, self.Y_train else: x, y = self.X_test, self.Y_test if metrics == "accuracy": self.metrics.accuracy(self.model, y, x, label) elif metrics == "confusion_matrix": self.metrics.confusion_matrix(self.model, y, x, label) elif metrics == "roc": self.metrics.plot_roc(self.model, y, x, label) def tunning_model(self, hyperparameters, kfold, metrics): cross_validate_model = CrossValidation(self.model, hyperparameters, kfold) cross_validate_model.fit_and_predict(self.X_train, self.Y_train, self.X_test, self.Y_test, metrics) return cross_validate_model.get_score(self.X_test, self.Y_test)