def GridSearch(self, model=modelMap.get('svm'), grid=None): import time st = time.time import importlib modelClassName = model.modelClass modelClass = getattr(importlib.import_module(model.libPath), modelClassName) logger.info('Training the %s -- %s' % (modelClassName, model.type)) _classifier = modelClass(**model.params) self.param_grid = {'C': [10, 100], 'gamma': [0.01, 0.1]} self.grid = grid or GridSearchCV( estimator=_classifier, param_grid=self.param_grid, cv=5) self.grid.fit(self.X_train[..., self.adhocDependentColsLen:], self.y_train) with open(gridPath, 'wb') as saved_grid: pickle.dump(self.grid, saved_grid, protocol=pickle.HIGHEST_PROTOCOL) print("Test set score: {:.2f}".format(self.grid.score())) print("best cross-validation score: {:.2f}".format( self.grid.best_score_)) print("best parameter: {}".format(self.grid.best_params_)) print("best estimator: {}".format(self.grid.best_estimator_)) print("it took {} seconds".format(time.time - st))
def moreMetrics(self, model=modelMap.get('svm')): global log print("=" * 30) print(model.modelClass) print('****Results****') acc = accuracy_score(self.y_test, self.result) print("Accuracy: {:.4%}".format(acc)) ll = log_loss(self.y_test, self.result) print("Log Loss: {}".format(ll)) log_entry = pd.DataFrame([[model.modelClass, acc * 100, ll]], columns=log_cols) log = log.append(log_entry) print("=" * 30) # horizontal bar plot import seaborn as sns sns.set_color_codes("muted") sns.barplot(x='Accuracy', y='Classifier', data=log, color="b") plt.xlabel('Accuracy %') plt.title('Classifier Accuracy') plt.show() sns.set_color_codes("muted") sns.barplot(x='Log Loss', y='Classifier', data=log, color="g") plt.xlabel('Log Loss') plt.title('Classifier Log Loss') plt.show()
def getCrossValidationScore(self, model=modelMap.get('svm')): from sklearn.model_selection import cross_val_score import importlib modelClassName = model.modelClass modelClass = getattr(importlib.import_module(model.libPath), modelClassName) logger.info('Training the %s -- %s' % (modelClassName, model.type)) _classifier = modelClass(**model.params) scores = cross_val_score(_classifier, self.X_train[..., self.adhocDependentColsLen:], self.y_train, cv=5) print("Mean cross-validation accuracy: {:.2f}".format(np.mean(scores)))
def modelTraining(self, model=modelMap.get('svm'), saved=None): import importlib modelClassName = model.modelClass modelClass = getattr(importlib.import_module(model.libPath), modelClassName) logger.info('Training the %s -- %s' % (modelClassName, model.type)) self.classifier = modelClass(**model.params) self.classifier = saved or self.classifier.fit( self.X_train[..., self.adhocDependentColsLen:], self.y_train) logger.info('Creating a pickle of trained classifier') with open(model.picklePath, 'wb') as saved_predictor: pickle.dump(self.classifier, saved_predictor, protocol=pickle.HIGHEST_PROTOCOL)
def getSavedPredictor(predictor='svm'): model = modelMap.get(predictor) with open(model.picklePath, 'rb') as handle: saved_predictor = pickle.load(handle) return saved_predictor