コード例 #1
0
 def fit(self, train_x, train_y, folds=3):
     estimators = all_estimators(type_filter="classifier")
     for name, ClassifierClass in estimators:
         if name in model_param_map:
             param_grid = model_param_map[name]
             grid_clf = GridSearchCV(
                 ClassifierClass(),
                 param_grid,
                 cv=folds,
                 scoring="accuracy",
                 verbose=0,
                 n_jobs=-1,
             )
             start = time.time()
             grid_clf.fit(train_x, train_y)
             end = time.time()
             clf = SimpleClassifier()
             clf.metrics["Training Accuracy"] = grid_clf.best_score_
             pred_y = grid_clf.predict(train_x)
             clf.metrics["Jaccard Score"] = jaccard_score(train_y,
                                                          pred_y,
                                                          average="macro")
             clf.metrics["F1 Score"] = f1_score(train_y,
                                                pred_y,
                                                average="macro")
             clf.sk_model = grid_clf.best_estimator_
             clf.name = name
             clf.attributes = grid_clf.best_params_
             clf.train_duration = grid_clf.refit_time_
             clf.gridsearch_duration = end - start
             self.ranked_list.append(clf)
     metrik = lambda clf: clf.metrics[self.metric]
     self.ranked_list.sort(reverse=True, key=metrik)
コード例 #2
0
 def simple_classifier(self):
     clf = SimpleClassifier()
     clf.fit(self.x, self.y)
     clf.save()
     clf2 = SimpleClassifier()
     clf2.load("simple_classifier.zip")
     print("\nSuccessfully saved/loaded SimpleClassifier:\n", clf2)
     print(clf2)
     os.remove("simple_classifier.zip")
コード例 #3
0
    def fit(self, train_x, train_y, folds=3):
        """Trains all classification models from
        parameter grid by running model algorithm search.

        Creates a ranked list of models based on selected
        scoring metric.

        Parameters
        ----------
        train_x : numpy.ndarray
            The features for training classification model
        train_y : numpy.ndarray
            The corresponding label for feature array
        folds : int, optional
            The number of folds for cross validation
        """

        estimators = all_estimators(type_filter="classifier")
        for name, ClassifierClass in estimators:
            if name in model_param_map:
                param_grid = model_param_map[name]
                grid_clf = GridSearchCV(
                    ClassifierClass(),
                    param_grid,
                    cv=folds,
                    scoring="accuracy",
                    verbose=0,
                    n_jobs=-1,
                )
                start = time.time()
                try:
                    grid_clf.fit(train_x, train_y)
                except BaseException as error:
                    self.logger.warning(
                        f"{name} failed due to, Error : {error}.")
                    continue
                end = time.time()
                clf = SimpleClassifier()
                clf.metrics["Training Accuracy"] = grid_clf.best_score_
                pred_y = grid_clf.predict(train_x)
                clf.metrics["Jaccard Score"] = jaccard_score(train_y,
                                                             pred_y,
                                                             average="macro")
                clf.metrics["F1 Score"] = f1_score(train_y,
                                                   pred_y,
                                                   average="macro")
                clf.sk_model = grid_clf.best_estimator_
                clf.name = name
                clf.attributes = grid_clf.best_params_
                clf.train_duration = grid_clf.refit_time_
                clf.gridsearch_duration = end - start
                self.ranked_list.append(clf)
        metrik = lambda clf: clf.metrics[self.metric]
        self.ranked_list.sort(reverse=True, key=metrik)
コード例 #4
0
    def test_iris(self):
        iris = datasets.load_iris()
        true_x = iris.data
        true_y = iris.target

        clf = SimpleClassifier()
        clf.fit(true_x, true_y)
        self.assertIsNotNone(clf.sk_model)
        self.assertTrue(clf.metrics["Training Accuracy"] > 0.0)

        pred_y = clf.predict(true_x)
        self.assertTrue(accuracy_score(true_y, pred_y) > 0.95)
コード例 #5
0
    def test_init(self):
        """
        Test initialization of SimpleClassifier

        Expected
        -----------------
        name : "Empty Model"
        """
        clf = SimpleClassifier()
        self.assertEqual(clf.name, "Empty Model")
コード例 #6
0
    def test_digits(self):
        """
        Test SimpleClassifier against sklearn digits dataset

        Expected
        -----------------
        model : Not None
        training accuracy : > 0.95
        accuracy score : > 0.95
        """
        digits = datasets.load_digits()
        true_x = digits.data
        true_y = digits.target

        clf = SimpleClassifier()
        clf.fit(true_x, true_y)
        self.assertIsNotNone(clf.sk_model)
        self.assertTrue(clf.metrics["Training Accuracy"] > 0.95)

        pred_y = clf.predict(true_x)
        self.assertTrue(accuracy_score(true_y, pred_y) > 0.95)
コード例 #7
0
 def test_init(self):
     clf = SimpleClassifier()
     self.assertEqual(clf.name, "Empty Model")
コード例 #8
0
 def simple_classifier(self):
     clf = SimpleClassifier()
     clf.fit(self.x, self.y)
     print(clf)
コード例 #9
0
    def fit(self, train_x, train_y, folds=3):
        """Trains all classification models from
        parameter grid by running model algorithm search.

        Creates a ranked list of models based on selected
        scoring metric.

        Parameters
        ----------
        train_x : numpy.ndarray
            The features for training classification model
        train_y : numpy.ndarray
            The corresponding label for feature array
        folds : int, optional
            The number of folds for cross validation
        """
        log = logging.getLogger(__name__)
        log.setLevel(logging.INFO)
        log.addHandler(custom_logging.TqdmLoggingHandler())
        with tqdm(
                total=(len(model_param_map)),
                desc="Fitting Models",
                unit=" Algorithm",
                ncols=100,
        ) as progressbar:
            estimators = all_estimators(type_filter="classifier")
            for name, ClassifierClass in estimators:
                if name in model_param_map:
                    param_grid = model_param_map[name]
                    grid_clf = GridSearchCV(
                        ClassifierClass(),
                        param_grid,
                        cv=folds,
                        scoring="accuracy",
                        verbose=0,
                        n_jobs=-1,
                    )
                    progressbar.update(1)
                    start = time.time()
                    try:
                        grid_clf.fit(train_x, train_y)
                    except BaseException as error:
                        log.info(f"{name} failed due to, Error : {error}.")
                        continue
                    end = time.time()
                    clf = SimpleClassifier()
                    clf.metrics["Training Accuracy"] = grid_clf.best_score_
                    pred_y = grid_clf.predict(train_x)
                    clf.metrics["Jaccard Score"] = jaccard_score(
                        train_y, pred_y, average="macro")
                    clf.metrics["F1 Score"] = f1_score(train_y,
                                                       pred_y,
                                                       average="macro")
                    clf.sk_model = grid_clf.best_estimator_
                    clf.name = name
                    clf.attributes = grid_clf.best_params_
                    clf.train_duration = grid_clf.refit_time_
                    clf.gridsearch_duration = end - start
                    self.ranked_list.append(clf)
            metrik = lambda clf: clf.metrics[self.metric]
            self.ranked_list.sort(reverse=True, key=metrik)