def test_create_and_delete(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get initial number of datasets
     init_datasets_cnt = len(dc.get_datasets())
     # add dataset
     my_dataset_1 = dc.add_dataset_if_not_exists(self.X, self.y)
     my_dataset_2 = dc.add_dataset_if_not_exists(self.X, y = None)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt+2)
     # delete added dataset
     dc.delete_dataset(my_dataset_1.hid)
     # check number of datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt+1)
 def test_add_existing_dataset(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get initial number of datasets
     init_datasets_cnt = len(dc.get_datasets())
     # add dataset
     dc.add_dataset_if_not_exists(self.X, self.y)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt+1)
     # add the same dataset
     # it shouldn't be added
     dc.add_dataset_if_not_exists(self.X, self.y)
     # number of all datasets in project should be 1
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt+1)
 def setUp(self):
     proj_title = 'Test project-01' + get_postfix()
     proj_task = 'bin_class'
     self.expt_title = 'Test experiment-01'
     self.validation_kfolds = 5
     self.validation_shuffle = True
     self.validation_stratify = True
     self.validation_train_split = None
     self.algorithms = ['xgb']
     self.metric = 'logloss'
     self.tuning_mode = 'Normal'
     self.time_constraint = 1
     self.create_enseble = False
     # setup project
     self.project_client = ProjectClient()
     self.project = self.project_client.create_project(title=proj_title,
                                                       task=proj_task)
     # add training data
     df = pd.read_csv('tests/data/test_1.csv')
     cols = ['sepal length', 'sepal width', 'petal length', 'petal width']
     target = 'class'
     dc = DatasetClient(self.project.hid)
     self.dataset = dc.add_dataset_if_not_exists(df[cols], df[target])
Esempio n. 4
0
 def test_add_dataset_for_prediction(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get datasets, there should be none
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 0)
     # add dataset
     my_dataset = dc.add_dataset_if_not_exists(self.X, None)
     self.assertNotEqual(my_dataset, None)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 1)
     my_dataset_2 = dc.get_dataset(my_dataset.hid)
     self.assertEqual(my_dataset.hid, my_dataset_2.hid)
     self.assertEqual(my_dataset.title, my_dataset_2.title)
Esempio n. 5
0
 def test_add_dataset_for_training(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get datasets, there should be none
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 0)
     # add dataset
     my_dataset = dc.add_dataset_if_not_exists(self.X, self.y)
     self.assertNotEqual(my_dataset, None)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 1)
     my_dataset_2 = dc.get_dataset(my_dataset.hid)
     self.assertEqual(my_dataset.hid, my_dataset_2.hid)
     self.assertEqual(my_dataset.title, my_dataset_2.title)
     # test __str__ method
     self.assertTrue('id' in str(my_dataset_2))
     self.assertTrue('title' in str(my_dataset_2))
     self.assertTrue('file' in str(my_dataset_2))
Esempio n. 6
0
 def test_get_dataset_for_wrong_hid(self):
     """ Get dataset for wrong hid should return None """
     dc = DatasetClient(self.project.hid)
     dataset = dc.get_dataset('some-wrong-hid')
     self.assertTrue(dataset is None)
Esempio n. 7
0
 def test_prepare_data_two_sources_numpy(self):
     dc = DatasetClient(self.project.hid)
     data_1, data_hash_1 = dc._prepare_data(np.array(self.X),
                                            np.array(self.y))
     data_2, data_hash_2 = dc._prepare_data(np.array(self.X), None)
     self.assertNotEqual(data_hash_1, data_hash_2)
Esempio n. 8
0
 def test_prepare_data_two_sources(self):
     dc = DatasetClient(self.project.hid)
     data_1, data_hash_1 = dc._prepare_data(self.X, self.y)
     data_2, data_hash_2 = dc._prepare_data(self.X, None)
     self.assertNotEqual(data_hash_1, data_hash_2)