def GridSearch(self, model=modelMap.get('svm'), grid=None):

        import time

        st = time.time

        import importlib
        modelClassName = model.modelClass
        modelClass = getattr(importlib.import_module(model.libPath),
                             modelClassName)
        logger.info('Training the %s -- %s' % (modelClassName, model.type))
        _classifier = modelClass(**model.params)
        self.param_grid = {'C': [10, 100], 'gamma': [0.01, 0.1]}
        self.grid = grid or GridSearchCV(
            estimator=_classifier, param_grid=self.param_grid, cv=5)
        self.grid.fit(self.X_train[..., self.adhocDependentColsLen:],
                      self.y_train)
        with open(gridPath, 'wb') as saved_grid:
            pickle.dump(self.grid,
                        saved_grid,
                        protocol=pickle.HIGHEST_PROTOCOL)
        print("Test set score: {:.2f}".format(self.grid.score()))
        print("best cross-validation score: {:.2f}".format(
            self.grid.best_score_))
        print("best parameter: {}".format(self.grid.best_params_))
        print("best estimator: {}".format(self.grid.best_estimator_))
        print("it took {} seconds".format(time.time - st))
    def moreMetrics(self, model=modelMap.get('svm')):

        global log

        print("=" * 30)
        print(model.modelClass)

        print('****Results****')
        acc = accuracy_score(self.y_test, self.result)
        print("Accuracy: {:.4%}".format(acc))
        ll = log_loss(self.y_test, self.result)
        print("Log Loss: {}".format(ll))

        log_entry = pd.DataFrame([[model.modelClass, acc * 100, ll]],
                                 columns=log_cols)
        log = log.append(log_entry)

        print("=" * 30)

        # horizontal bar plot

        import seaborn as sns
        sns.set_color_codes("muted")
        sns.barplot(x='Accuracy', y='Classifier', data=log, color="b")

        plt.xlabel('Accuracy %')
        plt.title('Classifier Accuracy')
        plt.show()

        sns.set_color_codes("muted")
        sns.barplot(x='Log Loss', y='Classifier', data=log, color="g")

        plt.xlabel('Log Loss')
        plt.title('Classifier Log Loss')
        plt.show()
    def getCrossValidationScore(self, model=modelMap.get('svm')):

        from sklearn.model_selection import cross_val_score
        import importlib
        modelClassName = model.modelClass
        modelClass = getattr(importlib.import_module(model.libPath),
                             modelClassName)
        logger.info('Training the %s -- %s' % (modelClassName, model.type))
        _classifier = modelClass(**model.params)

        scores = cross_val_score(_classifier,
                                 self.X_train[...,
                                              self.adhocDependentColsLen:],
                                 self.y_train,
                                 cv=5)
        print("Mean cross-validation accuracy: {:.2f}".format(np.mean(scores)))
    def modelTraining(self, model=modelMap.get('svm'), saved=None):

        import importlib
        modelClassName = model.modelClass
        modelClass = getattr(importlib.import_module(model.libPath),
                             modelClassName)

        logger.info('Training the %s -- %s' % (modelClassName, model.type))

        self.classifier = modelClass(**model.params)

        self.classifier = saved or self.classifier.fit(
            self.X_train[..., self.adhocDependentColsLen:], self.y_train)
        logger.info('Creating a pickle of trained classifier')
        with open(model.picklePath, 'wb') as saved_predictor:
            pickle.dump(self.classifier,
                        saved_predictor,
                        protocol=pickle.HIGHEST_PROTOCOL)
Example #5
0
def getSavedPredictor(predictor='svm'):
    model = modelMap.get(predictor)
    with open(model.picklePath, 'rb') as handle:
        saved_predictor = pickle.load(handle)
    return saved_predictor