def test_create_with_validation_dataset(self):

        #Create experiment with validation with dataset.

        # add vald dataset
        cols = ['sepal length', 'sepal width', 'petal length', 'petal width']
        target = 'class'
        df = pd.read_csv('tests/data/test_1_vald.csv')
        dc = DatasetClient(self.project.hid)
        vald_dataset = dc.add_dataset_if_not_exists(df[cols], df[target])
        # add experiment
        ec = ExperimentClient(self.project.hid)
        self.assertNotEqual(ec, None)
        # there should be none experiments
        experiments = ec.get_experiments()
        self.assertEqual(experiments, [])
        # create new experiment
        experiment = ec.add_experiment_if_not_exists(
            self.dataset, vald_dataset, self.expt_title, self.project.task,
            self.validation_kfolds, self.validation_shuffle,
            self.validation_stratify, 0.72, self.algorithms, self.metric,
            self.tuning_mode, self.time_constraint, self.create_enseble)
        self.assertNotEqual(experiment, None)
        self.assertEqual(experiment.title, self.expt_title)
        self.assertEqual(experiment.validation_scheme, "With dataset")
Example #2
0
 def test_add_existing_dataset(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get initial number of datasets
     init_datasets_cnt = len(dc.get_datasets())
     # add dataset
     dc.add_dataset_if_not_exists(self.X, self.y)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt + 1)
     # add the same dataset
     # it shouldn't be added
     dc.add_dataset_if_not_exists(self.X, self.y)
     # number of all datasets in project should be 1
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt + 1)
 def test_create_and_delete(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get initial number of datasets
     init_datasets_cnt = len(dc.get_datasets())
     # add dataset
     my_dataset_1 = dc.add_dataset_if_not_exists(self.X, self.y)
     my_dataset_2 = dc.add_dataset_if_not_exists(self.X, y = None)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt+2)
     # delete added dataset
     dc.delete_dataset(my_dataset_1.hid)
     # check number of datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt+1)
Example #4
0
 def test_add_dataset_for_prediction(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get datasets, there should be none
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 0)
     # add dataset
     my_dataset = dc.add_dataset_if_not_exists(self.X, None)
     self.assertNotEqual(my_dataset, None)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 1)
     my_dataset_2 = dc.get_dataset(my_dataset.hid)
     self.assertEqual(my_dataset.hid, my_dataset_2.hid)
     self.assertEqual(my_dataset.title, my_dataset_2.title)
Example #5
0
 def test_add_dataset_for_training(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get datasets, there should be none
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 0)
     # add dataset
     my_dataset = dc.add_dataset_if_not_exists(self.X, self.y)
     self.assertNotEqual(my_dataset, None)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 1)
     my_dataset_2 = dc.get_dataset(my_dataset.hid)
     self.assertEqual(my_dataset.hid, my_dataset_2.hid)
     self.assertEqual(my_dataset.title, my_dataset_2.title)
     # test __str__ method
     self.assertTrue('id' in str(my_dataset_2))
     self.assertTrue('title' in str(my_dataset_2))
     self.assertTrue('file' in str(my_dataset_2))
 def setUp(self):
     proj_title = 'Test project-01' + get_postfix()
     proj_task = 'bin_class'
     self.expt_title = 'Test experiment-01'
     self.validation_kfolds = 5
     self.validation_shuffle = True
     self.validation_stratify = True
     self.validation_train_split = None
     self.algorithms = ['xgb']
     self.metric = 'logloss'
     self.tuning_mode = 'Normal'
     self.time_constraint = 1
     self.create_enseble = False
     # setup project
     self.project_client = ProjectClient()
     self.project = self.project_client.create_project(title=proj_title,
                                                       task=proj_task)
     # add training data
     df = pd.read_csv('tests/data/test_1.csv')
     cols = ['sepal length', 'sepal width', 'petal length', 'petal width']
     target = 'class'
     dc = DatasetClient(self.project.hid)
     self.dataset = dc.add_dataset_if_not_exists(df[cols], df[target])