def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.lr.logistic_regression", "LogisticRegressionApproach")().run()
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.IRIS_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset, ) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "decision_tree", self.sbds, path=str(Path(testenv.TEST_PATH, "dt"))) shutil.copyfile(testenv.IRIS_APPROACH, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.dt.decision_tree", "DecisionTreeApproach")().run()
def generate_subdataset(dataset, method, by): def parse_by(method, by): if method == "k_fold": return int(by) return float(by) by = parse_by(method, by) sbds = SubDataset(dataset=Dataset.load(dataset), method=method, by=by) sbds.save() print("Subdataset with id {} created".format(sbds.id))
class ResultReportTest(unittest.TestCase): def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.lr.logistic_regression", "LogisticRegressionApproach")().run() def tearDown(self): testenv.delete_mock_projects() def test_create_result_report(self): metrics = ["recall", "precision", "f1"] r = ResultReport(approach=Approach.load(self.approach.id), metrics=[recall, precision, f1]) df = r.as_dataframe() self.assertTrue(all(m in df.columns for m in metrics)) def test_using_sklearn_metrics(self): from sklearn.metrics import classification_report r = ResultReport(approach=Approach.load(self.approach.id), metrics=[classification_report]) df = r.as_dataframe() self.assertIsNotNone(df.classification_report[0])
def test_load_subdataset_with_kfold_method(self): sbds1 = self.test_generate_subdataset_with_train_kfold_method() sbds2 = SubDataset.load(sbds1.id) self.assertTrue(sbds1 == sbds2) return sbds2
class MulticlassResultReportTest(unittest.TestCase): def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.IRIS_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset, ) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "decision_tree", self.sbds, path=str(Path(testenv.TEST_PATH, "dt"))) shutil.copyfile(testenv.IRIS_APPROACH, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.dt.decision_tree", "DecisionTreeApproach")().run() def tearDown(self): testenv.delete_mock_projects() def test_create_result_report(self): metrics = [multiclass_recall, multiclass_precision, multiclass_f1] r = ResultReport(approach=Approach.load(self.approach.id), metrics=metrics) df = r.as_dataframe() for m in [f.__name__ for f in metrics]: self.assertTrue(m in df.columns)
def get_subdatasets(self): """ Get all subdatasets Returns ------- list(Subdataset) All subdatasets related to current project """ return [SubDataset.load_from_data(p) for p in Collections.subdatasets().all()]
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.p = Project(path=testenv.TEST_PATH, name=testenv.MOCK_PROJECT_NAME) self.ds = Dataset.read_file(path=testenv.MOCK_DATASET, first_line_heading=False) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save()
class RunGeneratorTest(unittest.TestCase): def tearDown(self): testenv.delete_mock_projects() def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.p = Project(path=testenv.TEST_PATH, name=testenv.MOCK_PROJECT_NAME) self.ds = Dataset.read_file(path=testenv.MOCK_DATASET, first_line_heading=False) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() def test_generate_runs_from_subdataset(self): # Trick to load runnable approach LogisticRegressionApproach = import_from("test.lr.logistic_regression", "LogisticRegressionApproach") ra = LogisticRegressionApproach() # Generate the runs run_gens = RunGenerator.from_runnable_approach(ra) # Write runs to database ra.approach.runs = run_gens ra.approach.update() # Reload approach to test if runs were correctly stored approach = Approach.load(ra.approach.id) self.assertEqual(len(approach.runs), len(run_gens))
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "test_approach", self.sbds) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save()
class ApproachTest(unittest.TestCase): def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.p = Project(path=testenv.TEST_PATH, name=testenv.MOCK_PROJECT_NAME) self.ds = Dataset.read_file(path=testenv.MOCK_DATASET, first_line_heading=False) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() def tearDown(self): testenv.delete_mock_projects() def test_get_subdataset_runs(self): runnable = import_from("test.lr.logistic_regression", "LogisticRegressionApproach") runs = RunGenerator.from_runnable_approach(runnable()) for run in runs: run.save() runs = Approach.load(self.approach.id).runs self.assertTrue(len(runs) > 0) for run in runs: self.assertIsInstance(run, Run) return runs
def test_get_latest_subdataset(self): p = self.test_load_project() ds = Dataset.read_file(path=testenv.MOCK_DATASET, first_line_heading=False) ds.save() method = "k_fold" by = 5 sbds1 = SubDataset(ds, method=method, by=by) sbds1.save() sbds2 = SubDataset(ds, method=method, by=by*2) sbds2.save() l_sbds = p.get_last_subdataset() self.assertEqual(l_sbds.id, sbds2.id)
def test_generate_subdataset(self, method="train_test", by=0.8): """ Creates a Project and a Dataset and links each other. Then creates a SubDataset out of the created Dataset. The subdataset are created using method and by paramenters Asserts ------- - Indicies are not None """ Project(path=self.path_to_test_dir, name=self.aux_project_name) ds = Dataset.read_file(path=self.path_to_dataset) ds.save() sbds = SubDataset(dataset=ds, method=method, by=by) self.assertIsNotNone(sbds.indices) return sbds
return lr def inference(model, test_data): output = model.predict(test_data["X"]) return output def my_func(x): print(x[2]) m = learn(train_data=x[0], parameters=x[2]) o = inference(model=m, test_data=x[1]) return o sbs = SubDataset.load(path=subdataset_path) runpool = RunPool(subdataset_path=sbs.path) run_list = [] for run in runpool.iteruns(): run.set_status("finished") run_list.append(run) i = 0 window = int(job_window) while (i * window) < len(run_list): print([ r.get_train_data()["X"].index for r in run_list[i * window:(i + 1) * window] ])
class RunTest(unittest.TestCase): def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "test_approach", self.sbds) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() def tearDown(self): testenv.delete_mock_projects() def test_create_run(self): Run( approach_id=self.approach.id, subdataset=self.sbds, subdataset_set="A", run_parameters={ "param1": 1, "param2": 2 }, ) def test_create_run_and_save(self): run = Run( approach_id=self.approach.id, subdataset=self.sbds, subdataset_set="A", run_parameters={ "param1": 1, "param2": 2 }, ) run.save() data = Run.load(self.approach.id, run.id) self.assertIsNotNone(data) return run def test_load_run(self): run1 = self.test_create_run_and_save() run2 = Run.load(self.approach.id, run1.id) self.assertEqual(run1.id, run2.id) def test_create_runpool(self): # Force reload runs from database self.approach = Approach.load(self.approach.id) runpool = RunPool(self.approach.runs) for run in runpool.iteruns(): self.assertTrue(isinstance(run, Run)) def test_iterate_all_runs_runpool(self): self.approach = Approach.load(self.approach.id) runpool = RunPool(self.approach.runs) i = 0 for run in runpool.iteruns(): self.assertTrue(isinstance(run, Run)) run.status = "finished" i += 1 self.assertEqual(i, runpool.iter) def test_iterate_all_runs_runpool_twice(self): self.approach = Approach.load(self.approach.id) runpool = RunPool(self.approach.runs) i = 0 for run in runpool.iteruns(): self.assertTrue(isinstance(run, Run)) run.status = "finished" i += 1 self.assertEqual(i, runpool.iter) i = 0 no_iterations = True for run in runpool.iteruns(): no_iterations = False self.assertTrue(no_iterations)
def generate_approach(identifier, subdataset_id): a = Approach( project=Project.load(), name=identifier, subdataset=SubDataset.load(subdataset_id)) a.save()
# If project exist delete and recreate it if project_path.is_dir(): proj = Project.load(project_path) clean(proj) proj = Project(name="test_project", path=path_to_project) # add a datasource path_to_dataset = str(Path(r"./test/resources/test_dataset.csv").absolute()) ds = Dataset.read_file(path_to_dataset) ds.set_project_path(proj.path) ds.save() # create subdataset sbds = SubDataset(ds, method="k_fold", by=5) sbds.save() # set apporach example_approach_path = r"./test/resources/approach_example.py" param_path = r"./test/resources/parameters_example.yml" a = Approach(proj, "approach_example", sbds) shutil.copyfile(example_approach_path, str(a.script_path)) shutil.copyfile(param_path, str(a.params_path)) a.save() # generate runs rg = RunGenerator.from_approach(a) # run experiment a.run(kind="single")