class TestClassifiers(unittest.TestCase): ''' Integrations tests for classifier objective functions. Checks to see that at least one iteration can be executed on the given dataset and algorithm. To increase test speed: - iterations set to 1 - num_cv_folds=2 ''' iters=1 folds=2 x, y = clf_prep('pima-indians-diabetes.csv') print('Data prepped') def test_objective01_xgboost(self): automator=MLAutomator(self.x, self.y, iterations=self.iters, specific_algos=['01'], num_cv_folds=self.folds) automator.find_best_algorithm() self.assertEqual(automator.best_algo, 'xgboost_classifier') def test_objective02_sgd_classifier(self): automator=MLAutomator(self.x, self.y, iterations=self.iters, specific_algos=['02'], num_cv_folds=self.folds) automator.find_best_algorithm() self.assertEqual(automator.best_algo, 'SGDClassifier') def test_objective03_sgd_classifier(self): automator=MLAutomator(self.x, self.y, iterations=self.iters, specific_algos=['03'], num_cv_folds=self.folds) automator.find_best_algorithm() self.assertEqual(automator.best_algo, 'RandomForestClassifier') def test_objective04_bag_of_svc(self): automator=MLAutomator(self.x, self.y, iterations=self.iters, specific_algos=['04'], num_cv_folds=self.folds) automator.find_best_algorithm() self.assertEqual(automator.best_algo, 'SVC') def test_objective05_naive_bayes(self): automator=MLAutomator(self.x, self.y, iterations=self.iters, specific_algos=['05'], num_cv_folds=self.folds) automator.find_best_algorithm() self.assertEqual(automator.best_algo, 'GaussianNB') def test_objective06_logistic_regression(self): automator=MLAutomator(self.x, self.y, iterations=self.iters, specific_algos=['06'], num_cv_folds=self.folds) automator.find_best_algorithm() self.assertEqual(automator.best_algo, 'LogisticRegression') def test_objective07_knn(self): automator=MLAutomator(self.x, self.y, iterations=self.iters, specific_algos=['07'], num_cv_folds=self.folds) automator.find_best_algorithm() self.assertEqual(automator.best_algo, 'KNeighborClassifier')
class TestMLAutomator(unittest.TestCase): directory = mkdtemp() x, y = clf_prep('pima-indians-diabetes.csv') automator = MLAutomator(x, y, iterations=2, specific_algos=['01']) automator.find_best_algorithm() def test_fit_best_pipeline(self): self.automator.fit_best_pipeline() self.assertIsNotNone(self.automator.best_pipeline) print(self.automator.best_pipeline) def test_model_dump(self): self.automator.save_best_pipeline(self.directory) def test_model_load(self): self.automator.load_best_pipeline(filename=self.directory + '/pipeline.joblib')
class TestMLAutomator(unittest.TestCase): x, y = clf_prep('pima-indians-diabetes.csv') automator = MLAutomator(x, y) def test_automator_initialization(self): ''' Test that all class properties are being initialized properly. ''' self.assertEqual(self.automator.best, 0) self.assertEqual(self.automator.count, 0) self.assertEqual(self.automator.start_time, None) self.assertEqual(self.automator.objective, None) self.assertEqual(self.automator.keys, None) self.assertEqual(self.automator.master_results, []) self.assertEqual(self.automator.type, 'classifier') self.assertEqual(self.automator.score_metric, 'accuracy') self.assertEqual(self.automator.iterations, 25) self.assertEqual(self.automator.num_cv_folds, 10) self.assertEqual(self.automator.repeats, 1) def test_get_obj_key_list(self): self.assertIsNotNone(classifiers().keys()) self.assertIsNotNone(regressors().keys()) def test_get_keys(self): for key in ALGORITHM_KEYS.keys(): self.assertIsNotNone(get_keys(key)) print(get_keys(key)) def test_get_space_regressors(self): for key in regressors().keys(): self.assertIsNotNone(get_space(self.automator, key)) def test_get_space_classifiers(self): for key in classifiers().keys(): self.assertIsNotNone(get_space(self.automator, key)) def test_user_feedback_went_best_space_not_evaluated(self): self.assertIsNone(self.automator.print_best_space()) self.assertIsNone(self.automator.save_best_model()) self.assertIsNone(self.automator.fit_best_model())
from data.utilities import clf_prep from mlautomator.mlautomator import MLAutomator if __name__ == '__main__': x, y = clf_prep('boston_housing.csv') automator = MLAutomator(x, y, iterations=20) automator.find_best_algorithm() automator.print_best_space()
from data.utilities import clf_prep from mlautomator.mlautomator import MLAutomator if __name__=='__main__': x,y=clf_prep('GOLD_D.csv') automator=MLAutomator(x, y, iterations=200, specific_algos=['01'], score_metric='neg_log_loss') automator.find_best_algorithm() automator.print_best_space() #automator.fit_best_model()
from data.utilities import clf_prep from mlautomator.mlautomator import MLAutomator if __name__=='__main__': x,y=clf_prep('pima-indians-diabetes.csv') automator=MLAutomator(x,y,iterations=30) #automator.find_best_algorithm() automator.print_best_space() print(automator)