def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.lr.logistic_regression", "LogisticRegressionApproach")().run()
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.IRIS_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset, ) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "decision_tree", self.sbds, path=str(Path(testenv.TEST_PATH, "dt"))) shutil.copyfile(testenv.IRIS_APPROACH, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.dt.decision_tree", "DecisionTreeApproach")().run()
def test_generate_subdataset(self, method="train_test", by=0.8): """ Creates a Project and a Dataset and links each other. Then creates a SubDataset out of the created Dataset. The subdataset are created using method and by paramenters Asserts ------- - Indicies are not None """ Project(path=self.path_to_test_dir, name=self.aux_project_name) ds = Dataset.read_file(path=self.path_to_dataset) ds.save() sbds = SubDataset(dataset=ds, method=method, by=by) self.assertIsNotNone(sbds.indices) return sbds
def test_create_dataset(self): """ Takes a dataset file and constructs a new dataset Asserts ------- - If the object created is an instance of a Dataset object - If the datasource attribute is an instance of a FileDataset object Returns ------- A Dataset object instance created from the dataset file """ Project(name=self.aux_project_name, path=self.path_to_test_dir) ds = Dataset.read_file(path=self.path_to_dataset, first_line_heading=False) self.assertIsInstance(ds, Dataset) self.assertIsInstance(ds.datasource, FileDatasource) return ds
def test_get_latest_subdataset(self): p = self.test_load_project() ds = Dataset.read_file(path=testenv.MOCK_DATASET, first_line_heading=False) ds.save() method = "k_fold" by = 5 sbds1 = SubDataset(ds, method=method, by=by) sbds1.save() sbds2 = SubDataset(ds, method=method, by=by*2) sbds2.save() l_sbds = p.get_last_subdataset() self.assertEqual(l_sbds.id, sbds2.id)
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.p = Project(path=testenv.TEST_PATH, name=testenv.MOCK_PROJECT_NAME) self.ds = Dataset.read_file(path=testenv.MOCK_DATASET, first_line_heading=False) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save()
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "test_approach", self.sbds) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save()
# create new project path_to_project = str(Path(".").absolute()) project_name = "test_project" project_path = Path(path_to_project, project_name) # If project exist delete and recreate it if project_path.is_dir(): proj = Project.load(project_path) clean(proj) proj = Project(name="test_project", path=path_to_project) # add a datasource path_to_dataset = str(Path(r"./test/resources/test_dataset.csv").absolute()) ds = Dataset.read_file(path_to_dataset) ds.set_project_path(proj.path) ds.save() # create subdataset sbds = SubDataset(ds, method="k_fold", by=5) sbds.save() # set apporach example_approach_path = r"./test/resources/approach_example.py" param_path = r"./test/resources/parameters_example.yml" a = Approach(proj, "approach_example", sbds) shutil.copyfile(example_approach_path, str(a.script_path)) shutil.copyfile(param_path, str(a.params_path)) a.save()
def test_automatically_detect_regression(self): Project(name=self.aux_project_name, path=self.path_to_test_dir) ds = Dataset.read_file("test/resources/housing.csv", label="median_house_value") self.assertEqual(ds.problem_type, "regression")
def test_automatically_detect_binary_clf(self): Project(name=self.aux_project_name, path=self.path_to_test_dir) ds = Dataset.read_file("test/resources/titanic.csv", label="Survived") self.assertEqual(ds.problem_type, "binary_clf")
def test_automatically_detect_clf(self): Project(name=self.aux_project_name, path=self.path_to_test_dir) ds = Dataset.read_file("test/resources/Iris.csv") self.assertEqual(ds.problem_type, "clf")