def test_create_and_delete(self):
     '''
     Get list of projects, add new project, again get lists of projects and
     compare if new list length is greater than old one.
     '''
     proj_title = 'Test project-01'
     proj_task = 'bin_class'
     pc = ProjectClient()
     projects_before = pc.get_projects()
     new_project = pc.create_project(title=proj_title, task=proj_task)
     self.assertEqual(new_project.title, proj_title)
     projects_after = pc.get_projects()
     self.assertEqual(len(projects_before) + 1, len(projects_after))
     pc.delete_project(new_project.hid)
     projects_after = pc.get_projects()
     self.assertEqual(len(projects_before), len(projects_after))
    def test_project_get(self):
        '''
        Test project get method.
        '''
        proj_title = 'Test project-02'
        proj_task = 'bin_class'
        pc = ProjectClient()
        projects_before = pc.get_projects()
        new_project = pc.create_project(title=proj_title, task=proj_task)
        project = pc.get_project(hid=new_project.hid)
        self.assertEqual(new_project.hid, project.hid)
        self.assertEqual(new_project.title, project.title)
        self.assertEqual(new_project.task, project.task)
        self.assertEqual(new_project.scope, project.scope)
        self.assertEqual(new_project.hardware, project.hardware)
        # test __str__ method
        self.assertTrue('id' in str(new_project))
        self.assertTrue('title' in str(new_project))
        self.assertTrue('task' in str(new_project))

        pc.delete_project(new_project.hid)
        project = pc.get_project(hid=new_project.hid)
        self.assertEqual(project, None)
class ExperimentClientTest(ProjectBasedTest):
    def setUp(self):
        proj_title = 'Test project-01' + get_postfix()
        proj_task = 'bin_class'
        self.expt_title = 'Test experiment-01'
        self.validation_kfolds = 5
        self.validation_shuffle = True
        self.validation_stratify = True
        self.validation_train_split = None
        self.algorithms = ['xgb']
        self.metric = 'logloss'
        self.tuning_mode = 'Normal'
        self.time_constraint = 1
        self.create_enseble = False
        # setup project
        self.project_client = ProjectClient()
        self.project = self.project_client.create_project(title=proj_title,
                                                          task=proj_task)
        # add training data
        df = pd.read_csv('tests/data/test_1.csv')
        cols = ['sepal length', 'sepal width', 'petal length', 'petal width']
        target = 'class'
        dc = DatasetClient(self.project.hid)
        self.dataset = dc.add_dataset_if_not_exists(df[cols], df[target])

    def tearDown(self):
        # wait before clean, to have time to initialize models
        time.sleep(60)
        # clean
        self.project_client.delete_project(self.project.hid)

    def test_create_with_kfold_cv(self):

        #Create experiment test with k-fold CV.

        # add experiment
        ec = ExperimentClient(self.project.hid)
        self.assertNotEqual(ec, None)
        # there should be none experiments
        experiments = ec.get_experiments()
        self.assertEqual(experiments, [])
        # create new experiment
        experiment = ec.add_experiment_if_not_exists(
            self.dataset, None, self.expt_title, self.project.task,
            self.validation_kfolds, self.validation_shuffle,
            self.validation_stratify, self.validation_train_split,
            self.algorithms, self.metric, self.tuning_mode,
            self.time_constraint, self.create_enseble)
        self.assertNotEqual(experiment, None)
        self.assertEqual(experiment.title, self.expt_title)
        self.assertEqual(experiment.validation_scheme,
                         "5-fold CV, Shuffle, Stratify")
        self.assertEqual(experiment.metric, self.metric)
        # get all experiments, should be only one
        experiments = ec.get_experiments()
        self.assertEqual(len(experiments), 1)
        # get experiment by hid, there should be the same
        experiment_2 = ec.get_experiment(experiment.hid)
        self.assertEqual(experiment_2.hid, experiment.hid)
        self.assertEqual(experiment_2.title, experiment.title)
        self.assertEqual(experiment_2.metric, experiment.metric)
        self.assertEqual(experiment_2.validation_scheme,
                         experiment.validation_scheme)
        self.assertTrue(experiment.equal(experiment_2))
        # test __str__ method
        self.assertTrue('id' in str(experiment_2))
        self.assertTrue('title' in str(experiment_2))
        self.assertTrue('metric' in str(experiment_2))
        self.assertTrue('validation' in str(experiment_2))

    def test_create_with_train_split(self):

        #Create experiment with validation by train split.

        # add experiment
        ec = ExperimentClient(self.project.hid)
        self.assertNotEqual(ec, None)
        # there should be none experiments
        experiments = ec.get_experiments()
        self.assertEqual(experiments, [])
        # create new experiment
        experiment = ec.add_experiment_if_not_exists(
            self.dataset, None, self.expt_title, self.project.task,
            self.validation_kfolds, self.validation_shuffle,
            self.validation_stratify, 0.72, self.algorithms, self.metric,
            self.tuning_mode, self.time_constraint, self.create_enseble)
        self.assertNotEqual(experiment, None)
        self.assertEqual(experiment.title, self.expt_title)
        self.assertEqual(experiment.validation_scheme,
                         "Split 72/28, Shuffle, Stratify")

    def test_create_with_validation_dataset(self):

        #Create experiment with validation with dataset.

        # add vald dataset
        cols = ['sepal length', 'sepal width', 'petal length', 'petal width']
        target = 'class'
        df = pd.read_csv('tests/data/test_1_vald.csv')
        dc = DatasetClient(self.project.hid)
        vald_dataset = dc.add_dataset_if_not_exists(df[cols], df[target])
        # add experiment
        ec = ExperimentClient(self.project.hid)
        self.assertNotEqual(ec, None)
        # there should be none experiments
        experiments = ec.get_experiments()
        self.assertEqual(experiments, [])
        # create new experiment
        experiment = ec.add_experiment_if_not_exists(
            self.dataset, vald_dataset, self.expt_title, self.project.task,
            self.validation_kfolds, self.validation_shuffle,
            self.validation_stratify, 0.72, self.algorithms, self.metric,
            self.tuning_mode, self.time_constraint, self.create_enseble)
        self.assertNotEqual(experiment, None)
        self.assertEqual(experiment.title, self.expt_title)
        self.assertEqual(experiment.validation_scheme, "With dataset")

    def test_create_if_exists(self):

        #Create experiment after experiment is already in project.

        # add experiment
        ec = ExperimentClient(self.project.hid)
        self.assertNotEqual(ec, None)
        # there should be none experiments
        experiments = ec.get_experiments()
        self.assertEqual(experiments, [])
        # create new experiment
        experiment = ec.add_experiment_if_not_exists(
            self.dataset, None, self.expt_title, self.project.task,
            self.validation_kfolds, self.validation_shuffle,
            self.validation_stratify, self.validation_train_split,
            self.algorithms, self.metric, self.tuning_mode,
            self.time_constraint, self.create_enseble)
        self.assertNotEqual(experiment, None)
        # get all experiments, should be only one
        experiments = ec.get_experiments()
        self.assertEqual(len(experiments), 1)
        # try to create the same experiment
        experiment_2 = ec.add_experiment_if_not_exists(
            self.dataset, None, self.expt_title, self.project.task,
            self.validation_kfolds, self.validation_shuffle,
            self.validation_stratify, self.validation_train_split,
            self.algorithms, self.metric, self.tuning_mode,
            self.time_constraint, self.create_enseble)
        self.assertNotEqual(experiment, None)
        # get all experiments, should be only one
        experiments = ec.get_experiments()
        self.assertEqual(len(experiments), 1)
        # both should be the same
        self.assertEqual(experiment_2.hid, experiment.hid)
        self.assertEqual(experiment_2.title, experiment.title)
        self.assertEqual(experiment_2.metric, experiment.metric)
        self.assertEqual(experiment_2.validation_scheme,
                         experiment.validation_scheme)
        self.assertTrue(experiment.equal(experiment_2))
Exemple #4
0
class DatasetClientTest(ProjectBasedTest):
    def setUp(self):
        proj_title = 'Test project-01'
        proj_task = 'bin_class'
        # setup project
        self.project_client = ProjectClient()
        self.project = self.project_client.create_project(title=proj_title,
                                                          task=proj_task)
        # load data
        df = pd.read_csv('tests/data/test_1.csv')
        cols = ['sepal length', 'sepal width', 'petal length', 'petal width']
        target = 'class'
        self.X = df[cols]
        self.y = df[target]

    def tearDown(self):
        # clean
        self.project_client.delete_project(self.project.hid)

    def test_get_datasests(self):
        """
        Get empty list of datasets in project.
        """
        # get datasets
        datasets = DatasetClient(self.project.hid).get_datasets()
        self.assertEqual(datasets, [])

    def test_prepare_data(self):
        """ Test _prepare_data method on numpy array data """
        dc = DatasetClient(self.project.hid)
        samples = 100
        columns = 10
        X = np.random.rand(samples, columns)
        y = np.random.choice([0, 1], samples, replace=True)
        data, data_hash = dc._prepare_data(X, y)
        self.assertTrue(data is not None)
        self.assertTrue(data_hash is not None)
        self.assertTrue(isinstance(data_hash, str))
        self.assertEqual(11, len(data.columns))
        self.assertTrue('target' in data.columns)
        self.assertTrue('attribute_1' in data.columns)
        self.assertTrue('attribute_10' in data.columns)

    def test_get_dataset_for_wrong_hid(self):
        """ Get dataset for wrong hid should return None """
        dc = DatasetClient(self.project.hid)
        dataset = dc.get_dataset('some-wrong-hid')
        self.assertTrue(dataset is None)

    def test_add_dataset_for_training(self):
        # setup dataset client
        dc = DatasetClient(self.project.hid)
        self.assertNotEqual(dc, None)
        # get datasets, there should be none
        datasets = dc.get_datasets()
        self.assertEqual(len(datasets), 0)
        # add dataset
        my_dataset = dc.add_dataset_if_not_exists(self.X, self.y)
        self.assertNotEqual(my_dataset, None)
        # get datasets
        datasets = dc.get_datasets()
        self.assertEqual(len(datasets), 1)
        my_dataset_2 = dc.get_dataset(my_dataset.hid)
        self.assertEqual(my_dataset.hid, my_dataset_2.hid)
        self.assertEqual(my_dataset.title, my_dataset_2.title)
        # test __str__ method
        self.assertTrue('id' in str(my_dataset_2))
        self.assertTrue('title' in str(my_dataset_2))
        self.assertTrue('file' in str(my_dataset_2))

    def test_add_dataset_for_prediction(self):
        # setup dataset client
        dc = DatasetClient(self.project.hid)
        self.assertNotEqual(dc, None)
        # get datasets, there should be none
        datasets = dc.get_datasets()
        self.assertEqual(len(datasets), 0)
        # add dataset
        my_dataset = dc.add_dataset_if_not_exists(self.X, None)
        self.assertNotEqual(my_dataset, None)
        # get datasets
        datasets = dc.get_datasets()
        self.assertEqual(len(datasets), 1)
        my_dataset_2 = dc.get_dataset(my_dataset.hid)
        self.assertEqual(my_dataset.hid, my_dataset_2.hid)
        self.assertEqual(my_dataset.title, my_dataset_2.title)

    def test_add_existing_dataset(self):
        # setup dataset client
        dc = DatasetClient(self.project.hid)
        self.assertNotEqual(dc, None)
        # get initial number of datasets
        init_datasets_cnt = len(dc.get_datasets())
        # add dataset
        dc.add_dataset_if_not_exists(self.X, self.y)
        # get datasets
        datasets = dc.get_datasets()
        self.assertEqual(len(datasets), init_datasets_cnt + 1)
        # add the same dataset
        # it shouldn't be added
        dc.add_dataset_if_not_exists(self.X, self.y)
        # number of all datasets in project should be 1
        datasets = dc.get_datasets()
        self.assertEqual(len(datasets), init_datasets_cnt + 1)
Exemple #5
0
class ResultClientTest(ProjectBasedTest):
    def setUp(self):
        proj_title = 'Test project-01' + get_postfix()
        proj_task = 'bin_class'
        self.expt_title = 'Test experiment-01'
        self.validation_kfolds = 5
        self.validation_shuffle = True
        self.validation_stratify = True
        self.validation_train_split = None
        self.algorithms = ['xgb']
        self.metric = 'logloss'
        self.tuning_mode = 'Normal'
        self.time_constraint = 1
        self.create_enseble = False
        # setup project
        self.project_client = ProjectClient()
        self.project = self.project_client.create_project(title=proj_title,
                                                          task=proj_task)
        # load data
        df = pd.read_csv('tests/data/test_1.csv')
        cols = ['sepal length', 'sepal width', 'petal length', 'petal width']
        target = 'class'
        # add dataset
        self.dataset = DatasetClient(
            self.project.hid).add_dataset_if_not_exists(df[cols], df[target])

    def tearDown(self):
        # clean
        self.project_client.delete_project(self.project.hid)

    def test_get_results_for_wrong_project(self):
        with self.assertRaises(BadRequestException) as context:
            # init result client
            rc = ResultClient('wrong-hid')
            self.assertTrue(rc is not None)
            # get results - should raise exception
            rc.get_results()

    def test_get_results_for_project(self):
        # init result client
        rc = ResultClient(self.project.hid)
        self.assertNotEqual(rc, None)
        # get results - should be empty
        results = rc.get_results()
        self.assertEqual(results, [])
        # add experiment
        ec = ExperimentClient(self.project.hid)
        # create new experiment
        self.experiment = ec.add_experiment_if_not_exists(
            self.dataset, None, self.expt_title, self.project.task,
            self.validation_kfolds, self.validation_shuffle,
            self.validation_stratify, self.validation_train_split,
            self.algorithms, self.metric, self.tuning_mode,
            self.time_constraint, self.create_enseble)
        # wait some time till models are initialized
        time.sleep(60)
        # get results - should be some models there
        results = rc.get_results()
        self.assertNotEqual(len(results), 0)

    def test_get_results_for_experiment(self):
        # init result client
        rc = ResultClient(self.project.hid)
        self.assertNotEqual(rc, None)
        # get results - should be empty
        results = rc.get_results()
        self.assertEqual(results, [])
        # get results for wrong experiment hid
        results = rc.get_results('wrong-hid')
        self.assertEqual(results, [])
        # add experiment
        ec = ExperimentClient(self.project.hid)
        # create new experiment
        self.experiment = ec.add_experiment_if_not_exists(
            self.dataset, None, self.expt_title, self.project.task,
            self.validation_kfolds, self.validation_shuffle,
            self.validation_stratify, self.validation_train_split,
            self.algorithms, self.metric, self.tuning_mode,
            self.time_constraint, self.create_enseble)
        # wait some time till models are initialized
        time.sleep(60)
        # get results for experiment - should be some models there
        results = rc.get_results(self.experiment.hid)
        self.assertNotEqual(len(results), 0)

        # get results for project
        project_results = rc.get_results()
        self.assertNotEqual(results, [])
        # get results for wrong experiment hid
        # all results from project should be returned
        results_2 = rc.get_results('wrong-hid')
        self.assertEqual(len(project_results), len(results_2))

        for r in project_results:
            # test __str__ method
            self.assertTrue('id' in str(r))
            self.assertTrue('model' in str(r))
            self.assertTrue('status' in str(r))