def test_init_config_application_config_loaded(self): """ :test : mlsurvey.Config() :condition : app_config.json exists :main_result : application config loaded """ config = mls.Config('config.json', directory=os.path.join(self.base_directory, 'config')) self.assertFalse(config.app_config['app_section']['value'])
def test_get_dataset_dataset_config_obtained(self): config = mls.Config('complete_config_loaded.json', directory=os.path.join(self.base_directory, 'config')) self.assertEqual('NClassRandomClassificationWithNoise', config.data['#refs']['datasets']['DataSet1']['type']) self.assertEqual( 100, config.data['#refs']['datasets']['DataSet1']['parameters'] ['n_samples']) self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
def test_load_multiple_config_config_loaded(self): config = mls.Config('multiple_config.json', directory=os.path.join(self.base_directory, 'config')) self.assertEqual('NClassRandomClassificationWithNoise', config.data['#refs']['datasets']['DataSet1']['type']) self.assertListEqual([ '@datasets.DataSet1', '@datasets.DataSet2', '@datasets.DataSet3', '@datasets.DataSet4' ], config.data['learning_process']['parameters']['input']) self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
def store_config(self): """ Read all config.json files and store them into a db """ for i, d in enumerate(self.list_full_dir): config_json = mls.FileOperation.load_json_as_dict('config.json', d, tuple_to_string=True) config = mls.Config(config=config_json) config.compact() config.data['location'] = d self.db.insert(config.data) print('\rAnalyze logs..' + str(int((i + 1) * 100 / len(self.list_full_dir))) + '%', end='') self.fill_lists()
def test_init_config_file_not_json(self): """ :test : mlsurvey.Config() :condition : config file is not a json file :main_result : raise ConfigError """ try: _ = mls.Config('config_loaded_not_json.json', directory=os.path.join(self.base_directory, 'config')) self.assertTrue(False) except mls.exceptions.ConfigError: self.assertTrue(True)
def test_init_config_file_not_exists(self): """ :test : mlsurvey.Config() :condition : Config file not exist :main_result : raise FileNotFoundError """ try: _ = mls.Config('config_loaded_not_exists.json', directory=os.path.join(self.base_directory, 'config')) self.assertTrue(False) except FileNotFoundError: self.assertTrue(True)
def _init_config_log(cls, config_filename, base_directory, config_directory, mlflow_log=True): final_config_directory = os.path.join(str(base_directory), str(config_directory)) config = mls.Config(name=config_filename, directory=final_config_directory) config.compact() # init logging log = mls.Logging(base_dir=os.path.join(base_directory, 'logs'), mlflow_log=mlflow_log) return config, log
def test_run_input_all_should_have_expanded(self): """ :test : mlsurvey.sl.workflows.tasks.ExpandConfigTask.run() :condition : config file contains multiple values (more values) :main_result : configs have been expanded """ log, data_catalog = self._run_one_task('full_multiple_config.json') list_files = [ name for name in os.listdir(log.directory) if os.path.isfile(os.path.join(log.directory, name)) ] list_files = list( filter(lambda x: x.startswith('expand_config'), list_files)) # keeps only the expanded config list_files.sort() nb_files = len(list_files) self.assertEqual(72, nb_files) ds0 = { "type": "make_classification", "parameters": { "n_samples": 100, "shuffle": True, "noise": 0, "random_state": 0 } } al32 = {"type": "svm", "hyperparameters": {"kernel": "rbf", "C": 1.0}} configs = [] for id_file, file in enumerate(list_files): configs.append(mls.Config(file, directory=log.directory)) self.assertDictEqual( ds0, configs[0].data['learning_process']['parameters']['input']) self.assertDictEqual( al32, configs[32].data['learning_process']['parameters']['algorithm']) self.assertIsInstance( configs[0].data['learning_process']['parameters']['input'], dict)
def test_task_expand_config_fairness_should_have_expanded(self): """ :test : mlsurvey.sl.workflows.tasks.ExpandConfigTask.run() :condition : config file contains lists in fairness parameters :main_result : should expand """ log, data_catalog = self._run_one_task( 'multiple_config_multiple_fairness.json') list_files = [ name for name in os.listdir(log.directory) if os.path.isfile(os.path.join(log.directory, name)) ] list_files = list( filter(lambda x: x.startswith('expand_config'), list_files)) # keeps only the expanded config list_files.sort() nb_files = len(list_files) self.assertEqual(2, nb_files) f1 = { "type": "FileDataSet", "parameters": { "directory": "files/dataset", "filename": "test-fairness.arff" }, "fairness": { "protected_attribute": 1, "privileged_classes": "x >= 35" } } configs = [] for id_file, file in enumerate(list_files): configs.append(mls.Config(file, directory=log.directory)) self.assertDictEqual( f1, configs[1].data['learning_process']['parameters']['input']) self.assertIsInstance( configs[0].data['learning_process']['parameters']['input'], dict)
def test_run_input_should_have_expanded(self): """ :test : mlsurvey.sl.workflows.tasks.ExpandConfigTask.run() :condition : config file contains multiple values :main_result : configs have been expanded """ log, data_catalog = self._run_one_task('multiple_config.json') expanded_config = data_catalog.load('expanded_config') self.assertTrue( os.path.isfile( os.path.join(log.base_dir, log.dir_name, 'config.json'))) self.assertEqual( '9d21f7582b06adf062e384b6fd3f83bb', mls.Utils.md5_file(os.path.join(log.directory, 'config.json'))) list_files = [ name for name in os.listdir(log.directory) if os.path.isfile(os.path.join(log.directory, name)) ] list_files = list( filter(lambda x: x.startswith('expand_config'), list_files)) # keeps only the expanded config list_files.sort() nb_files = len(list_files) self.assertEqual(4, nb_files) d = [{ "input": { "type": "NClassRandomClassificationWithNoise", "parameters": { "n_samples": 100, "shuffle": True, "random_state": 0, "noise": 0 } }, "split": { "type": "traintest", "parameters": { "test_size": 5, "random_state": 0, "shuffle": True } }, "algorithm": { "type": "sklearn.neighbors.KNeighborsClassifier", "hyperparameters": { "n_neighbors": 2, "algorithm": "auto", "weights": "uniform" } } }, { "input": { "type": "make_circles", "parameters": { "n_samples": 100, "shuffle": True, "noise": 0, "random_state": 0, "factor": 0.3 } }, "split": { "type": "traintest", "parameters": { "test_size": 5, "random_state": 0, "shuffle": True } }, "algorithm": { "type": "sklearn.neighbors.KNeighborsClassifier", "hyperparameters": { "n_neighbors": 2, "algorithm": "auto", "weights": "uniform" } } }, { "input": { "type": "load_iris", "parameters": {} }, "split": { "type": "traintest", "parameters": { "test_size": 5, "random_state": 0, "shuffle": True } }, "algorithm": { "type": "sklearn.neighbors.KNeighborsClassifier", "hyperparameters": { "n_neighbors": 2, "algorithm": "auto", "weights": "uniform" } } }, { "input": { "type": "FileDataSet", "parameters": { "directory": "files/dataset", "filename": "test-fairness.arff" }, "metadata": { "y_col_name": "class" } }, "split": { "type": "traintest", "parameters": { "test_size": 5, "random_state": 0, "shuffle": True } }, "algorithm": { "type": "sklearn.neighbors.KNeighborsClassifier", "hyperparameters": { "n_neighbors": 2, "algorithm": "auto", "weights": "uniform" } } }] configs = [] for id_file, file in enumerate(list_files): configs.append(mls.Config(file, directory=log.directory)) self.assertDictEqual( d[id_file], expanded_config[id_file]['learning_process']['parameters']) self.assertDictEqual( d[id_file], configs[id_file].data['learning_process']['parameters'])
def test_init_config_with_dictionary_not_python_ready(self): c = {'testconfig': {"__type__": "__tuple__", "__value__": "(1, 2, 3)"}} config = mls.Config(config=c) self.assertTupleEqual((1, 2, 3), config.data['testconfig']) self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
def test_init_config_with_dictionary(self): c = {'testconfig': 'config loaded'} config = mls.Config(config=c) self.assertEqual('config loaded', config.data['testconfig']) self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
def test_load_config_file_not_python_ready_config_loaded(self): config = mls.Config('full_multiple_config.json', directory=os.path.join(self.base_directory, 'config')) self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
def test_load_config_from_other_directory_without_end_slash(self): config = mls.Config('config_loaded.json', directory=os.path.join(self.base_directory, 'files')) self.assertEqual('config loaded', config.data['testconfig']) self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
def task_load_data(self): """ Load config from directory """ self.config = mls.Config('config.json', self.source_directory)
def test_load_config_default_config_loaded(self): config = mls.Config( directory=os.path.join(self.base_directory, 'config')) self.assertEqual('config loaded', config.data['testconfig']) self.assertTrue(mls.Utils.check_dict_python_ready(config.data))
def test_compact_should_compact(self): """ :test : mlsurvey.Config.compact() :condition : config file format to compacted config format :main_result : transformation ok """ base_config = { '#refs': { 'algorithms': { 'knn-base': { 'type': 'sklearn.neighbors.KNeighborsClassifier', 'hyperparameters': { 'algorithm': 'auto', 'n_neighbors': 2, 'weights': 'uniform' } } }, 'datasets': { 'DataSetNClassRandom': { 'parameters': { 'n_samples': [100, 200], 'noise': 0, 'random_state': 0, 'shuffle': True }, 'type': 'NClassRandomClassificationWithNoise' } }, 'splits': { 'traintest20': { 'parameters': { 'random_state': 0, 'shuffle': True, 'test_size': 20 }, 'type': 'traintest' }, 'traintest40': { 'parameters': { 'random_state': 0, 'shuffle': True, 'test_size': 40 }, 'type': 'traintest' } } }, 'learning_process': { 'parameters': { 'algorithm': '@algorithms.knn-base', 'input': '@datasets.DataSetNClassRandom', 'split': ['@splits.traintest20', '@splits.traintest40'] }, }, } expected_config = { 'learning_process': { 'parameters': { 'algorithm': { 'type': 'sklearn.neighbors.KNeighborsClassifier', 'hyperparameters': { 'algorithm': 'auto', 'n_neighbors': 2, 'weights': 'uniform' } }, 'input': { 'parameters': { 'n_samples': [100, 200], 'noise': 0, 'random_state': 0, 'shuffle': True }, 'type': 'NClassRandomClassificationWithNoise' }, 'split': [{ 'parameters': { 'random_state': 0, 'shuffle': True, 'test_size': 20 }, 'type': 'traintest' }, { 'parameters': { 'random_state': 0, 'shuffle': True, 'test_size': 40 }, 'type': 'traintest' }] } } } config = mls.Config(config=base_config) config.compact() self.assertDictEqual(expected_config, config.data) self.assertTrue(config.is_compacted())