def fit(self, train_x, train_y, folds=3): estimators = all_estimators(type_filter="classifier") for name, ClassifierClass in estimators: if name in model_param_map: param_grid = model_param_map[name] grid_clf = GridSearchCV( ClassifierClass(), param_grid, cv=folds, scoring="accuracy", verbose=0, n_jobs=-1, ) start = time.time() grid_clf.fit(train_x, train_y) end = time.time() clf = SimpleClassifier() clf.metrics["Training Accuracy"] = grid_clf.best_score_ pred_y = grid_clf.predict(train_x) clf.metrics["Jaccard Score"] = jaccard_score(train_y, pred_y, average="macro") clf.metrics["F1 Score"] = f1_score(train_y, pred_y, average="macro") clf.sk_model = grid_clf.best_estimator_ clf.name = name clf.attributes = grid_clf.best_params_ clf.train_duration = grid_clf.refit_time_ clf.gridsearch_duration = end - start self.ranked_list.append(clf) metrik = lambda clf: clf.metrics[self.metric] self.ranked_list.sort(reverse=True, key=metrik)
def simple_classifier(self): clf = SimpleClassifier() clf.fit(self.x, self.y) clf.save() clf2 = SimpleClassifier() clf2.load("simple_classifier.zip") print("\nSuccessfully saved/loaded SimpleClassifier:\n", clf2) print(clf2) os.remove("simple_classifier.zip")
def fit(self, train_x, train_y, folds=3): """Trains all classification models from parameter grid by running model algorithm search. Creates a ranked list of models based on selected scoring metric. Parameters ---------- train_x : numpy.ndarray The features for training classification model train_y : numpy.ndarray The corresponding label for feature array folds : int, optional The number of folds for cross validation """ estimators = all_estimators(type_filter="classifier") for name, ClassifierClass in estimators: if name in model_param_map: param_grid = model_param_map[name] grid_clf = GridSearchCV( ClassifierClass(), param_grid, cv=folds, scoring="accuracy", verbose=0, n_jobs=-1, ) start = time.time() try: grid_clf.fit(train_x, train_y) except BaseException as error: self.logger.warning( f"{name} failed due to, Error : {error}.") continue end = time.time() clf = SimpleClassifier() clf.metrics["Training Accuracy"] = grid_clf.best_score_ pred_y = grid_clf.predict(train_x) clf.metrics["Jaccard Score"] = jaccard_score(train_y, pred_y, average="macro") clf.metrics["F1 Score"] = f1_score(train_y, pred_y, average="macro") clf.sk_model = grid_clf.best_estimator_ clf.name = name clf.attributes = grid_clf.best_params_ clf.train_duration = grid_clf.refit_time_ clf.gridsearch_duration = end - start self.ranked_list.append(clf) metrik = lambda clf: clf.metrics[self.metric] self.ranked_list.sort(reverse=True, key=metrik)
def test_iris(self): iris = datasets.load_iris() true_x = iris.data true_y = iris.target clf = SimpleClassifier() clf.fit(true_x, true_y) self.assertIsNotNone(clf.sk_model) self.assertTrue(clf.metrics["Training Accuracy"] > 0.0) pred_y = clf.predict(true_x) self.assertTrue(accuracy_score(true_y, pred_y) > 0.95)
def test_init(self): """ Test initialization of SimpleClassifier Expected ----------------- name : "Empty Model" """ clf = SimpleClassifier() self.assertEqual(clf.name, "Empty Model")
def test_digits(self): """ Test SimpleClassifier against sklearn digits dataset Expected ----------------- model : Not None training accuracy : > 0.95 accuracy score : > 0.95 """ digits = datasets.load_digits() true_x = digits.data true_y = digits.target clf = SimpleClassifier() clf.fit(true_x, true_y) self.assertIsNotNone(clf.sk_model) self.assertTrue(clf.metrics["Training Accuracy"] > 0.95) pred_y = clf.predict(true_x) self.assertTrue(accuracy_score(true_y, pred_y) > 0.95)
def test_init(self): clf = SimpleClassifier() self.assertEqual(clf.name, "Empty Model")
def simple_classifier(self): clf = SimpleClassifier() clf.fit(self.x, self.y) print(clf)
def fit(self, train_x, train_y, folds=3): """Trains all classification models from parameter grid by running model algorithm search. Creates a ranked list of models based on selected scoring metric. Parameters ---------- train_x : numpy.ndarray The features for training classification model train_y : numpy.ndarray The corresponding label for feature array folds : int, optional The number of folds for cross validation """ log = logging.getLogger(__name__) log.setLevel(logging.INFO) log.addHandler(custom_logging.TqdmLoggingHandler()) with tqdm( total=(len(model_param_map)), desc="Fitting Models", unit=" Algorithm", ncols=100, ) as progressbar: estimators = all_estimators(type_filter="classifier") for name, ClassifierClass in estimators: if name in model_param_map: param_grid = model_param_map[name] grid_clf = GridSearchCV( ClassifierClass(), param_grid, cv=folds, scoring="accuracy", verbose=0, n_jobs=-1, ) progressbar.update(1) start = time.time() try: grid_clf.fit(train_x, train_y) except BaseException as error: log.info(f"{name} failed due to, Error : {error}.") continue end = time.time() clf = SimpleClassifier() clf.metrics["Training Accuracy"] = grid_clf.best_score_ pred_y = grid_clf.predict(train_x) clf.metrics["Jaccard Score"] = jaccard_score( train_y, pred_y, average="macro") clf.metrics["F1 Score"] = f1_score(train_y, pred_y, average="macro") clf.sk_model = grid_clf.best_estimator_ clf.name = name clf.attributes = grid_clf.best_params_ clf.train_duration = grid_clf.refit_time_ clf.gridsearch_duration = end - start self.ranked_list.append(clf) metrik = lambda clf: clf.metrics[self.metric] self.ranked_list.sort(reverse=True, key=metrik)