Ejemplo n.º 1
0
 def test_init_config_application_config_loaded(self):
     """
     :test : mlsurvey.Config()
     :condition : app_config.json exists
     :main_result : application config loaded
     """
     config = mls.Config('config.json',
                         directory=os.path.join(self.base_directory,
                                                'config'))
     self.assertFalse(config.app_config['app_section']['value'])
Ejemplo n.º 2
0
 def test_get_dataset_dataset_config_obtained(self):
     config = mls.Config('complete_config_loaded.json',
                         directory=os.path.join(self.base_directory,
                                                'config'))
     self.assertEqual('NClassRandomClassificationWithNoise',
                      config.data['#refs']['datasets']['DataSet1']['type'])
     self.assertEqual(
         100, config.data['#refs']['datasets']['DataSet1']['parameters']
         ['n_samples'])
     self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
Ejemplo n.º 3
0
 def test_load_multiple_config_config_loaded(self):
     config = mls.Config('multiple_config.json',
                         directory=os.path.join(self.base_directory,
                                                'config'))
     self.assertEqual('NClassRandomClassificationWithNoise',
                      config.data['#refs']['datasets']['DataSet1']['type'])
     self.assertListEqual([
         '@datasets.DataSet1', '@datasets.DataSet2', '@datasets.DataSet3',
         '@datasets.DataSet4'
     ], config.data['learning_process']['parameters']['input'])
     self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
Ejemplo n.º 4
0
 def store_config(self):
     """
     Read all config.json files and store them into a db
     """
     for i, d in enumerate(self.list_full_dir):
         config_json = mls.FileOperation.load_json_as_dict('config.json', d, tuple_to_string=True)
         config = mls.Config(config=config_json)
         config.compact()
         config.data['location'] = d
         self.db.insert(config.data)
         print('\rAnalyze logs..' + str(int((i + 1) * 100 / len(self.list_full_dir))) + '%', end='')
     self.fill_lists()
Ejemplo n.º 5
0
 def test_init_config_file_not_json(self):
     """
     :test : mlsurvey.Config()
     :condition : config file is not a json file
     :main_result : raise ConfigError
     """
     try:
         _ = mls.Config('config_loaded_not_json.json',
                        directory=os.path.join(self.base_directory,
                                               'config'))
         self.assertTrue(False)
     except mls.exceptions.ConfigError:
         self.assertTrue(True)
Ejemplo n.º 6
0
 def test_init_config_file_not_exists(self):
     """
     :test : mlsurvey.Config()
     :condition : Config file not exist
     :main_result : raise FileNotFoundError
     """
     try:
         _ = mls.Config('config_loaded_not_exists.json',
                        directory=os.path.join(self.base_directory,
                                               'config'))
         self.assertTrue(False)
     except FileNotFoundError:
         self.assertTrue(True)
Ejemplo n.º 7
0
 def _init_config_log(cls,
                      config_filename,
                      base_directory,
                      config_directory,
                      mlflow_log=True):
     final_config_directory = os.path.join(str(base_directory),
                                           str(config_directory))
     config = mls.Config(name=config_filename,
                         directory=final_config_directory)
     config.compact()
     # init logging
     log = mls.Logging(base_dir=os.path.join(base_directory, 'logs'),
                       mlflow_log=mlflow_log)
     return config, log
Ejemplo n.º 8
0
    def test_run_input_all_should_have_expanded(self):
        """
        :test : mlsurvey.sl.workflows.tasks.ExpandConfigTask.run()
        :condition : config file contains multiple values (more values)
        :main_result : configs have been expanded
        """
        log, data_catalog = self._run_one_task('full_multiple_config.json')

        list_files = [
            name for name in os.listdir(log.directory)
            if os.path.isfile(os.path.join(log.directory, name))
        ]
        list_files = list(
            filter(lambda x: x.startswith('expand_config'),
                   list_files))  # keeps only the expanded config
        list_files.sort()
        nb_files = len(list_files)
        self.assertEqual(72, nb_files)
        ds0 = {
            "type": "make_classification",
            "parameters": {
                "n_samples": 100,
                "shuffle": True,
                "noise": 0,
                "random_state": 0
            }
        }
        al32 = {"type": "svm", "hyperparameters": {"kernel": "rbf", "C": 1.0}}
        configs = []
        for id_file, file in enumerate(list_files):
            configs.append(mls.Config(file, directory=log.directory))
        self.assertDictEqual(
            ds0, configs[0].data['learning_process']['parameters']['input'])
        self.assertDictEqual(
            al32,
            configs[32].data['learning_process']['parameters']['algorithm'])
        self.assertIsInstance(
            configs[0].data['learning_process']['parameters']['input'], dict)
Ejemplo n.º 9
0
    def test_task_expand_config_fairness_should_have_expanded(self):
        """
        :test : mlsurvey.sl.workflows.tasks.ExpandConfigTask.run()
        :condition : config file contains lists in fairness parameters
        :main_result : should expand
        """
        log, data_catalog = self._run_one_task(
            'multiple_config_multiple_fairness.json')

        list_files = [
            name for name in os.listdir(log.directory)
            if os.path.isfile(os.path.join(log.directory, name))
        ]
        list_files = list(
            filter(lambda x: x.startswith('expand_config'),
                   list_files))  # keeps only the expanded config
        list_files.sort()
        nb_files = len(list_files)
        self.assertEqual(2, nb_files)
        f1 = {
            "type": "FileDataSet",
            "parameters": {
                "directory": "files/dataset",
                "filename": "test-fairness.arff"
            },
            "fairness": {
                "protected_attribute": 1,
                "privileged_classes": "x >= 35"
            }
        }
        configs = []
        for id_file, file in enumerate(list_files):
            configs.append(mls.Config(file, directory=log.directory))
        self.assertDictEqual(
            f1, configs[1].data['learning_process']['parameters']['input'])
        self.assertIsInstance(
            configs[0].data['learning_process']['parameters']['input'], dict)
Ejemplo n.º 10
0
    def test_run_input_should_have_expanded(self):
        """
        :test : mlsurvey.sl.workflows.tasks.ExpandConfigTask.run()
        :condition : config file contains multiple values
        :main_result : configs have been expanded
        """
        log, data_catalog = self._run_one_task('multiple_config.json')
        expanded_config = data_catalog.load('expanded_config')

        self.assertTrue(
            os.path.isfile(
                os.path.join(log.base_dir, log.dir_name, 'config.json')))
        self.assertEqual(
            '9d21f7582b06adf062e384b6fd3f83bb',
            mls.Utils.md5_file(os.path.join(log.directory, 'config.json')))
        list_files = [
            name for name in os.listdir(log.directory)
            if os.path.isfile(os.path.join(log.directory, name))
        ]
        list_files = list(
            filter(lambda x: x.startswith('expand_config'),
                   list_files))  # keeps only the expanded config
        list_files.sort()
        nb_files = len(list_files)
        self.assertEqual(4, nb_files)
        d = [{
            "input": {
                "type": "NClassRandomClassificationWithNoise",
                "parameters": {
                    "n_samples": 100,
                    "shuffle": True,
                    "random_state": 0,
                    "noise": 0
                }
            },
            "split": {
                "type": "traintest",
                "parameters": {
                    "test_size": 5,
                    "random_state": 0,
                    "shuffle": True
                }
            },
            "algorithm": {
                "type": "sklearn.neighbors.KNeighborsClassifier",
                "hyperparameters": {
                    "n_neighbors": 2,
                    "algorithm": "auto",
                    "weights": "uniform"
                }
            }
        }, {
            "input": {
                "type": "make_circles",
                "parameters": {
                    "n_samples": 100,
                    "shuffle": True,
                    "noise": 0,
                    "random_state": 0,
                    "factor": 0.3
                }
            },
            "split": {
                "type": "traintest",
                "parameters": {
                    "test_size": 5,
                    "random_state": 0,
                    "shuffle": True
                }
            },
            "algorithm": {
                "type": "sklearn.neighbors.KNeighborsClassifier",
                "hyperparameters": {
                    "n_neighbors": 2,
                    "algorithm": "auto",
                    "weights": "uniform"
                }
            }
        }, {
            "input": {
                "type": "load_iris",
                "parameters": {}
            },
            "split": {
                "type": "traintest",
                "parameters": {
                    "test_size": 5,
                    "random_state": 0,
                    "shuffle": True
                }
            },
            "algorithm": {
                "type": "sklearn.neighbors.KNeighborsClassifier",
                "hyperparameters": {
                    "n_neighbors": 2,
                    "algorithm": "auto",
                    "weights": "uniform"
                }
            }
        }, {
            "input": {
                "type": "FileDataSet",
                "parameters": {
                    "directory": "files/dataset",
                    "filename": "test-fairness.arff"
                },
                "metadata": {
                    "y_col_name": "class"
                }
            },
            "split": {
                "type": "traintest",
                "parameters": {
                    "test_size": 5,
                    "random_state": 0,
                    "shuffle": True
                }
            },
            "algorithm": {
                "type": "sklearn.neighbors.KNeighborsClassifier",
                "hyperparameters": {
                    "n_neighbors": 2,
                    "algorithm": "auto",
                    "weights": "uniform"
                }
            }
        }]
        configs = []
        for id_file, file in enumerate(list_files):
            configs.append(mls.Config(file, directory=log.directory))
            self.assertDictEqual(
                d[id_file],
                expanded_config[id_file]['learning_process']['parameters'])
            self.assertDictEqual(
                d[id_file],
                configs[id_file].data['learning_process']['parameters'])
Ejemplo n.º 11
0
 def test_init_config_with_dictionary_not_python_ready(self):
     c = {'testconfig': {"__type__": "__tuple__", "__value__": "(1, 2, 3)"}}
     config = mls.Config(config=c)
     self.assertTupleEqual((1, 2, 3), config.data['testconfig'])
     self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
Ejemplo n.º 12
0
 def test_init_config_with_dictionary(self):
     c = {'testconfig': 'config loaded'}
     config = mls.Config(config=c)
     self.assertEqual('config loaded', config.data['testconfig'])
     self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
Ejemplo n.º 13
0
 def test_load_config_file_not_python_ready_config_loaded(self):
     config = mls.Config('full_multiple_config.json',
                         directory=os.path.join(self.base_directory,
                                                'config'))
     self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
Ejemplo n.º 14
0
 def test_load_config_from_other_directory_without_end_slash(self):
     config = mls.Config('config_loaded.json',
                         directory=os.path.join(self.base_directory,
                                                'files'))
     self.assertEqual('config loaded', config.data['testconfig'])
     self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
Ejemplo n.º 15
0
 def task_load_data(self):
     """
     Load config from directory
     """
     self.config = mls.Config('config.json', self.source_directory)
Ejemplo n.º 16
0
 def test_load_config_default_config_loaded(self):
     config = mls.Config(
         directory=os.path.join(self.base_directory, 'config'))
     self.assertEqual('config loaded', config.data['testconfig'])
     self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
Ejemplo n.º 17
0
 def test_compact_should_compact(self):
     """
     :test : mlsurvey.Config.compact()
     :condition : config file format to compacted config format
     :main_result : transformation ok
     """
     base_config = {
         '#refs': {
             'algorithms': {
                 'knn-base': {
                     'type': 'sklearn.neighbors.KNeighborsClassifier',
                     'hyperparameters': {
                         'algorithm': 'auto',
                         'n_neighbors': 2,
                         'weights': 'uniform'
                     }
                 }
             },
             'datasets': {
                 'DataSetNClassRandom': {
                     'parameters': {
                         'n_samples': [100, 200],
                         'noise': 0,
                         'random_state': 0,
                         'shuffle': True
                     },
                     'type': 'NClassRandomClassificationWithNoise'
                 }
             },
             'splits': {
                 'traintest20': {
                     'parameters': {
                         'random_state': 0,
                         'shuffle': True,
                         'test_size': 20
                     },
                     'type': 'traintest'
                 },
                 'traintest40': {
                     'parameters': {
                         'random_state': 0,
                         'shuffle': True,
                         'test_size': 40
                     },
                     'type': 'traintest'
                 }
             }
         },
         'learning_process': {
             'parameters': {
                 'algorithm': '@algorithms.knn-base',
                 'input': '@datasets.DataSetNClassRandom',
                 'split': ['@splits.traintest20', '@splits.traintest40']
             },
         },
     }
     expected_config = {
         'learning_process': {
             'parameters': {
                 'algorithm': {
                     'type': 'sklearn.neighbors.KNeighborsClassifier',
                     'hyperparameters': {
                         'algorithm': 'auto',
                         'n_neighbors': 2,
                         'weights': 'uniform'
                     }
                 },
                 'input': {
                     'parameters': {
                         'n_samples': [100, 200],
                         'noise': 0,
                         'random_state': 0,
                         'shuffle': True
                     },
                     'type': 'NClassRandomClassificationWithNoise'
                 },
                 'split': [{
                     'parameters': {
                         'random_state': 0,
                         'shuffle': True,
                         'test_size': 20
                     },
                     'type': 'traintest'
                 }, {
                     'parameters': {
                         'random_state': 0,
                         'shuffle': True,
                         'test_size': 40
                     },
                     'type': 'traintest'
                 }]
             }
         }
     }
     config = mls.Config(config=base_config)
     config.compact()
     self.assertDictEqual(expected_config, config.data)
     self.assertTrue(config.is_compacted())