def clean_projects(): project_client = ProjectClient() projects = project_client.get_projects() for proj in projects: if proj.title.startswith('Test') and proj.title.endswith( get_postfix()): project_client.delete_project(proj.hid)
def test_project_get_unknown_hid(self): ''' Test invalid hid value in project get method. ''' pc = ProjectClient() project = pc.get_project(hid='invalid_hid_value') self.assertEqual(project, None)
def setUp(self): proj_title = 'Test project-01'+get_postfix() proj_task = 'bin_class' # setup project self.project_client = ProjectClient() self.project = self.project_client.create_project(title = proj_title, task = proj_task) # load data df = pd.read_csv('tests/data/test_1.csv') cols = ['sepal length', 'sepal width', 'petal length', 'petal width'] target = 'class' self.X = df.loc[:,cols] self.y = df[target]
def setUp(self): proj_title = 'Test project-01' + get_postfix() proj_task = 'bin_class' self.expt_title = 'Test experiment-01' self.validation_kfolds = 5 self.validation_shuffle = True self.validation_stratify = True self.validation_train_split = None self.algorithms = ['xgb'] self.metric = 'logloss' self.tuning_mode = 'Normal' self.time_constraint = 1 self.create_enseble = False # setup project self.project_client = ProjectClient() self.project = self.project_client.create_project(title=proj_title, task=proj_task) # add training data df = pd.read_csv('tests/data/test_1.csv') cols = ['sepal length', 'sepal width', 'petal length', 'petal width'] target = 'class' dc = DatasetClient(self.project.hid) self.dataset = dc.add_dataset_if_not_exists(df[cols], df[target])
def test_create_and_delete(self): ''' Get list of projects, add new project, again get lists of projects and compare if new list length is greater than old one. ''' proj_title = 'Test project-01' proj_task = 'bin_class' pc = ProjectClient() projects_before = pc.get_projects() new_project = pc.create_project(title=proj_title, task=proj_task) self.assertEqual(new_project.title, proj_title) projects_after = pc.get_projects() self.assertEqual(len(projects_before) + 1, len(projects_after)) pc.delete_project(new_project.hid) projects_after = pc.get_projects() self.assertEqual(len(projects_before), len(projects_after))
def test_create_if_not_exists(self): proj_title = 'Test project-02' proj_task = 'bin_class' pc = ProjectClient() project = pc.create_project_if_not_exists(title=proj_title, task=proj_task) self.assertNotEqual(project, None) pc.delete_project(project.hid) project = pc.get_project(hid=project.hid) self.assertEqual(project, None)
def test_project_get(self): ''' Test project get method. ''' proj_title = 'Test project-02' proj_task = 'bin_class' pc = ProjectClient() projects_before = pc.get_projects() new_project = pc.create_project(title=proj_title, task=proj_task) project = pc.get_project(hid=new_project.hid) self.assertEqual(new_project.hid, project.hid) self.assertEqual(new_project.title, project.title) self.assertEqual(new_project.task, project.task) self.assertEqual(new_project.scope, project.scope) self.assertEqual(new_project.hardware, project.hardware) # test __str__ method self.assertTrue('id' in str(new_project)) self.assertTrue('title' in str(new_project)) self.assertTrue('task' in str(new_project)) pc.delete_project(new_project.hid) project = pc.get_project(hid=new_project.hid) self.assertEqual(project, None)
class ExperimentClientTest(ProjectBasedTest): def setUp(self): proj_title = 'Test project-01' + get_postfix() proj_task = 'bin_class' self.expt_title = 'Test experiment-01' self.validation_kfolds = 5 self.validation_shuffle = True self.validation_stratify = True self.validation_train_split = None self.algorithms = ['xgb'] self.metric = 'logloss' self.tuning_mode = 'Normal' self.time_constraint = 1 self.create_enseble = False # setup project self.project_client = ProjectClient() self.project = self.project_client.create_project(title=proj_title, task=proj_task) # add training data df = pd.read_csv('tests/data/test_1.csv') cols = ['sepal length', 'sepal width', 'petal length', 'petal width'] target = 'class' dc = DatasetClient(self.project.hid) self.dataset = dc.add_dataset_if_not_exists(df[cols], df[target]) def tearDown(self): # wait before clean, to have time to initialize models time.sleep(60) # clean self.project_client.delete_project(self.project.hid) def test_create_with_kfold_cv(self): #Create experiment test with k-fold CV. # add experiment ec = ExperimentClient(self.project.hid) self.assertNotEqual(ec, None) # there should be none experiments experiments = ec.get_experiments() self.assertEqual(experiments, []) # create new experiment experiment = ec.add_experiment_if_not_exists( self.dataset, None, self.expt_title, self.project.task, self.validation_kfolds, self.validation_shuffle, self.validation_stratify, self.validation_train_split, self.algorithms, self.metric, self.tuning_mode, self.time_constraint, self.create_enseble) self.assertNotEqual(experiment, None) self.assertEqual(experiment.title, self.expt_title) self.assertEqual(experiment.validation_scheme, "5-fold CV, Shuffle, Stratify") self.assertEqual(experiment.metric, self.metric) # get all experiments, should be only one experiments = ec.get_experiments() self.assertEqual(len(experiments), 1) # get experiment by hid, there should be the same experiment_2 = ec.get_experiment(experiment.hid) self.assertEqual(experiment_2.hid, experiment.hid) self.assertEqual(experiment_2.title, experiment.title) self.assertEqual(experiment_2.metric, experiment.metric) self.assertEqual(experiment_2.validation_scheme, experiment.validation_scheme) self.assertTrue(experiment.equal(experiment_2)) # test __str__ method self.assertTrue('id' in str(experiment_2)) self.assertTrue('title' in str(experiment_2)) self.assertTrue('metric' in str(experiment_2)) self.assertTrue('validation' in str(experiment_2)) def test_create_with_train_split(self): #Create experiment with validation by train split. # add experiment ec = ExperimentClient(self.project.hid) self.assertNotEqual(ec, None) # there should be none experiments experiments = ec.get_experiments() self.assertEqual(experiments, []) # create new experiment experiment = ec.add_experiment_if_not_exists( self.dataset, None, self.expt_title, self.project.task, self.validation_kfolds, self.validation_shuffle, self.validation_stratify, 0.72, self.algorithms, self.metric, self.tuning_mode, self.time_constraint, self.create_enseble) self.assertNotEqual(experiment, None) self.assertEqual(experiment.title, self.expt_title) self.assertEqual(experiment.validation_scheme, "Split 72/28, Shuffle, Stratify") def test_create_with_validation_dataset(self): #Create experiment with validation with dataset. # add vald dataset cols = ['sepal length', 'sepal width', 'petal length', 'petal width'] target = 'class' df = pd.read_csv('tests/data/test_1_vald.csv') dc = DatasetClient(self.project.hid) vald_dataset = dc.add_dataset_if_not_exists(df[cols], df[target]) # add experiment ec = ExperimentClient(self.project.hid) self.assertNotEqual(ec, None) # there should be none experiments experiments = ec.get_experiments() self.assertEqual(experiments, []) # create new experiment experiment = ec.add_experiment_if_not_exists( self.dataset, vald_dataset, self.expt_title, self.project.task, self.validation_kfolds, self.validation_shuffle, self.validation_stratify, 0.72, self.algorithms, self.metric, self.tuning_mode, self.time_constraint, self.create_enseble) self.assertNotEqual(experiment, None) self.assertEqual(experiment.title, self.expt_title) self.assertEqual(experiment.validation_scheme, "With dataset") def test_create_if_exists(self): #Create experiment after experiment is already in project. # add experiment ec = ExperimentClient(self.project.hid) self.assertNotEqual(ec, None) # there should be none experiments experiments = ec.get_experiments() self.assertEqual(experiments, []) # create new experiment experiment = ec.add_experiment_if_not_exists( self.dataset, None, self.expt_title, self.project.task, self.validation_kfolds, self.validation_shuffle, self.validation_stratify, self.validation_train_split, self.algorithms, self.metric, self.tuning_mode, self.time_constraint, self.create_enseble) self.assertNotEqual(experiment, None) # get all experiments, should be only one experiments = ec.get_experiments() self.assertEqual(len(experiments), 1) # try to create the same experiment experiment_2 = ec.add_experiment_if_not_exists( self.dataset, None, self.expt_title, self.project.task, self.validation_kfolds, self.validation_shuffle, self.validation_stratify, self.validation_train_split, self.algorithms, self.metric, self.tuning_mode, self.time_constraint, self.create_enseble) self.assertNotEqual(experiment, None) # get all experiments, should be only one experiments = ec.get_experiments() self.assertEqual(len(experiments), 1) # both should be the same self.assertEqual(experiment_2.hid, experiment.hid) self.assertEqual(experiment_2.title, experiment.title) self.assertEqual(experiment_2.metric, experiment.metric) self.assertEqual(experiment_2.validation_scheme, experiment.validation_scheme) self.assertTrue(experiment.equal(experiment_2))
class DatasetClientTest(ProjectBasedTest): def setUp(self): proj_title = 'Test project-01' proj_task = 'bin_class' # setup project self.project_client = ProjectClient() self.project = self.project_client.create_project(title=proj_title, task=proj_task) # load data df = pd.read_csv('tests/data/test_1.csv') cols = ['sepal length', 'sepal width', 'petal length', 'petal width'] target = 'class' self.X = df[cols] self.y = df[target] def tearDown(self): # clean self.project_client.delete_project(self.project.hid) def test_get_datasests(self): """ Get empty list of datasets in project. """ # get datasets datasets = DatasetClient(self.project.hid).get_datasets() self.assertEqual(datasets, []) def test_prepare_data(self): """ Test _prepare_data method on numpy array data """ dc = DatasetClient(self.project.hid) samples = 100 columns = 10 X = np.random.rand(samples, columns) y = np.random.choice([0, 1], samples, replace=True) data, data_hash = dc._prepare_data(X, y) self.assertTrue(data is not None) self.assertTrue(data_hash is not None) self.assertTrue(isinstance(data_hash, str)) self.assertEqual(11, len(data.columns)) self.assertTrue('target' in data.columns) self.assertTrue('attribute_1' in data.columns) self.assertTrue('attribute_10' in data.columns) def test_get_dataset_for_wrong_hid(self): """ Get dataset for wrong hid should return None """ dc = DatasetClient(self.project.hid) dataset = dc.get_dataset('some-wrong-hid') self.assertTrue(dataset is None) def test_add_dataset_for_training(self): # setup dataset client dc = DatasetClient(self.project.hid) self.assertNotEqual(dc, None) # get datasets, there should be none datasets = dc.get_datasets() self.assertEqual(len(datasets), 0) # add dataset my_dataset = dc.add_dataset_if_not_exists(self.X, self.y) self.assertNotEqual(my_dataset, None) # get datasets datasets = dc.get_datasets() self.assertEqual(len(datasets), 1) my_dataset_2 = dc.get_dataset(my_dataset.hid) self.assertEqual(my_dataset.hid, my_dataset_2.hid) self.assertEqual(my_dataset.title, my_dataset_2.title) # test __str__ method self.assertTrue('id' in str(my_dataset_2)) self.assertTrue('title' in str(my_dataset_2)) self.assertTrue('file' in str(my_dataset_2)) def test_add_dataset_for_prediction(self): # setup dataset client dc = DatasetClient(self.project.hid) self.assertNotEqual(dc, None) # get datasets, there should be none datasets = dc.get_datasets() self.assertEqual(len(datasets), 0) # add dataset my_dataset = dc.add_dataset_if_not_exists(self.X, None) self.assertNotEqual(my_dataset, None) # get datasets datasets = dc.get_datasets() self.assertEqual(len(datasets), 1) my_dataset_2 = dc.get_dataset(my_dataset.hid) self.assertEqual(my_dataset.hid, my_dataset_2.hid) self.assertEqual(my_dataset.title, my_dataset_2.title) def test_add_existing_dataset(self): # setup dataset client dc = DatasetClient(self.project.hid) self.assertNotEqual(dc, None) # get initial number of datasets init_datasets_cnt = len(dc.get_datasets()) # add dataset dc.add_dataset_if_not_exists(self.X, self.y) # get datasets datasets = dc.get_datasets() self.assertEqual(len(datasets), init_datasets_cnt + 1) # add the same dataset # it shouldn't be added dc.add_dataset_if_not_exists(self.X, self.y) # number of all datasets in project should be 1 datasets = dc.get_datasets() self.assertEqual(len(datasets), init_datasets_cnt + 1)
class ResultClientTest(ProjectBasedTest): def setUp(self): proj_title = 'Test project-01' + get_postfix() proj_task = 'bin_class' self.expt_title = 'Test experiment-01' self.validation_kfolds = 5 self.validation_shuffle = True self.validation_stratify = True self.validation_train_split = None self.algorithms = ['xgb'] self.metric = 'logloss' self.tuning_mode = 'Normal' self.time_constraint = 1 self.create_enseble = False # setup project self.project_client = ProjectClient() self.project = self.project_client.create_project(title=proj_title, task=proj_task) # load data df = pd.read_csv('tests/data/test_1.csv') cols = ['sepal length', 'sepal width', 'petal length', 'petal width'] target = 'class' # add dataset self.dataset = DatasetClient( self.project.hid).add_dataset_if_not_exists(df[cols], df[target]) def tearDown(self): # clean self.project_client.delete_project(self.project.hid) def test_get_results_for_wrong_project(self): with self.assertRaises(BadRequestException) as context: # init result client rc = ResultClient('wrong-hid') self.assertTrue(rc is not None) # get results - should raise exception rc.get_results() def test_get_results_for_project(self): # init result client rc = ResultClient(self.project.hid) self.assertNotEqual(rc, None) # get results - should be empty results = rc.get_results() self.assertEqual(results, []) # add experiment ec = ExperimentClient(self.project.hid) # create new experiment self.experiment = ec.add_experiment_if_not_exists( self.dataset, None, self.expt_title, self.project.task, self.validation_kfolds, self.validation_shuffle, self.validation_stratify, self.validation_train_split, self.algorithms, self.metric, self.tuning_mode, self.time_constraint, self.create_enseble) # wait some time till models are initialized time.sleep(60) # get results - should be some models there results = rc.get_results() self.assertNotEqual(len(results), 0) def test_get_results_for_experiment(self): # init result client rc = ResultClient(self.project.hid) self.assertNotEqual(rc, None) # get results - should be empty results = rc.get_results() self.assertEqual(results, []) # get results for wrong experiment hid results = rc.get_results('wrong-hid') self.assertEqual(results, []) # add experiment ec = ExperimentClient(self.project.hid) # create new experiment self.experiment = ec.add_experiment_if_not_exists( self.dataset, None, self.expt_title, self.project.task, self.validation_kfolds, self.validation_shuffle, self.validation_stratify, self.validation_train_split, self.algorithms, self.metric, self.tuning_mode, self.time_constraint, self.create_enseble) # wait some time till models are initialized time.sleep(60) # get results for experiment - should be some models there results = rc.get_results(self.experiment.hid) self.assertNotEqual(len(results), 0) # get results for project project_results = rc.get_results() self.assertNotEqual(results, []) # get results for wrong experiment hid # all results from project should be returned results_2 = rc.get_results('wrong-hid') self.assertEqual(len(project_results), len(results_2)) for r in project_results: # test __str__ method self.assertTrue('id' in str(r)) self.assertTrue('model' in str(r)) self.assertTrue('status' in str(r))