def test_repo_training_test_data(self): # init repository with sample in memory handler repository = MLRepo(user='******') job_runner = SimpleJobRunner(repository) repository._job_runner = job_runner training_data = RawData( np.zeros([10, 1]), ['x_values'], np.zeros([10, 1]), ['y_values'], repo_info={repo_objects.RepoInfoKey.NAME.value: 'training_data'}) repository.add(training_data, category=MLObjectType.TRAINING_DATA) training_data_2 = repository.get_training_data() self.assertEqual( training_data_2.repo_info[repo_objects.RepoInfoKey.NAME], training_data.repo_info[repo_objects.RepoInfoKey.NAME]) test_data = RawData( np.zeros([10, 1]), ['x_values'], np.zeros([10, 1]), ['y_values'], repo_info={repo_objects.RepoInfoKey.NAME.value: 'test_data'}) repository.add(test_data, category=MLObjectType.TEST_DATA) test_data_ref = repository.get('test_data') self.assertEqual( test_data_ref.repo_info[repo_objects.RepoInfoKey.NAME], test_data.repo_info[repo_objects.RepoInfoKey.NAME]) self.assertEqual( test_data_ref.repo_info[repo_objects.RepoInfoKey.VERSION], test_data.repo_info[repo_objects.RepoInfoKey.VERSION]) test_data_2 = RawData( np.zeros([10, 1]), ['x_values'], np.zeros([10, 1]), ['y_values'], repo_info={repo_objects.RepoInfoKey.NAME.value: 'test_data_2'}) repository.add(test_data_2, category=MLObjectType.TEST_DATA) test_data_2_ref = repository.get('test_data_2') self.assertEqual( test_data_2.repo_info[repo_objects.RepoInfoKey.NAME], test_data_2_ref.repo_info[repo_objects.RepoInfoKey.NAME]) commits = repository.get_commits() self.assertEqual(len(commits), 3) self.assertEqual(commits[1].objects['test_data'], test_data.repo_info.version) #self.assertEqual(commits[1].objects['repo_mapping'], 1) self.assertEqual(commits[2].objects['test_data_2'], test_data_2.repo_info.version)
def test_repo_RawData(self): """Test RawData within repo """ repository = MLRepo(user='******') job_runner = SimpleJobRunner(repository) repository._job_runner = job_runner raw_data = repo_objects.RawData( np.zeros([10, 1]), ['test_coord'], repo_info={ # pylint: disable=E0602 repo_objects.RepoInfoKey.NAME.value: 'RawData_Test' }) repository.add(raw_data, 'test commit', MLObjectType.RAW_DATA) raw_data_2 = repository.get('RawData_Test') self.assertEqual(len(raw_data.x_coord_names), len(raw_data_2.x_coord_names)) self.assertEqual(raw_data.x_coord_names[0], raw_data_2.x_coord_names[0]) commits = repository.get_commits() self.assertEqual(len(commits), 1) self.assertEqual(len(commits[0].objects), 1)
class RepoTest(unittest.TestCase): def _setup_measure_config(self): """Add a measure configuration with two measures (both MAX) where one measure just uses the coordinate x0 """ measure_config = repo_objects.MeasureConfiguration( [(repo_objects.MeasureConfiguration.MAX, ['y0']), repo_objects.MeasureConfiguration.MAX], repo_info={RepoInfoKey.NAME.value: 'measure_config'}) self.repository.add(measure_config, category=MLObjectType.MEASURE_CONFIGURATION, message='adding measure configuration') def _add_calibrated_model(self): self.repository.run_training() self.repository.set_label('prod') def setUp(self): '''Setup a complete ML repo with two different test data objetcs, training data, model definition etc. ''' self.repository = MLRepo(user='******') job_runner = SimpleJobRunner(self.repository) self.repository._job_runner = job_runner #### Setup dummy RawData raw_data = repo_objects.RawData( np.zeros([10, 1]), ['x0'], np.zeros([10, 1]), ['y0'], repo_info={repo_objects.RepoInfoKey.NAME.value: 'raw_1'}) self.repository.add(raw_data, category=MLObjectType.RAW_DATA) raw_data = repo_objects.RawData( np.zeros([10, 1]), ['x0'], np.zeros([10, 1]), ['y0'], repo_info={repo_objects.RepoInfoKey.NAME.value: 'raw_2'}) self.repository.add(raw_data, category=MLObjectType.RAW_DATA) raw_data = repo_objects.RawData( np.zeros([10, 1]), ['x0'], np.zeros([10, 1]), ['y0'], repo_info={repo_objects.RepoInfoKey.NAME.value: 'raw_3'}) self.repository.add(raw_data, category=MLObjectType.RAW_DATA) ## Setup dummy Test and Training DataSets on RawData training_data = DataSet('raw_1', 0, None, repo_info={ repo_objects.RepoInfoKey.NAME.value: 'training_data_1', repo_objects.RepoInfoKey.CATEGORY: MLObjectType.TRAINING_DATA }) test_data_1 = DataSet('raw_2', 0, None, repo_info={ repo_objects.RepoInfoKey.NAME.value: 'test_data_1', repo_objects.RepoInfoKey.CATEGORY: MLObjectType.TEST_DATA }) test_data_2 = DataSet('raw_3', 0, 2, repo_info={ repo_objects.RepoInfoKey.NAME.value: 'test_data_2', repo_objects.RepoInfoKey.CATEGORY: MLObjectType.TEST_DATA }) self.repository.add([training_data, test_data_1, test_data_2]) ## setup dummy preprocessor self.repository.add_preprocessing_transforming_function( preprocessor_transforming_function_test, repo_name='transform_func') self.repository.add_preprocessing_fitting_function( preprocessor_fitting_function_test, repo_name='fit_func') self.repository.add_preprocessor('test_preprocessor_with_fitting', 'transform_func', 'fit_func', preprocessor_param=None) self.repository.add_eval_function(eval_func_test, 'eval_func') self.repository.add_training_function(train_func_test, 'train_func') self.repository.add( TestClass( 1, 2, repo_info={ repo_objects.RepoInfoKey.NAME.value: 'training_param', # pylint: disable=E1123 repo_objects.RepoInfoKey.CATEGORY: MLObjectType.TRAINING_PARAM })) ## setup dummy model definition self.repository.add_model( 'model', 'eval_func', 'train_func', preprocessors=['test_preprocessor_with_fitting']) # setup measure configuration self._setup_measure_config() # add dummy calibrated model self._add_calibrated_model() def test_adding_training_data_exception(self): '''Tests if adding new training data leads to an exception ''' with self.assertRaises(Exception): test_obj = DataSet('raw_data', repo_info={ repo_objects.RepoInfoKey.CATEGORY: MLObjectType.TRAINING_DATA.value, 'name': 'test_object' }) self.repository.add(test_obj) def test_commit_increase_update(self): '''Check if updating an object in repository increases commit but does not change mapping ''' obj = self.repository.get('raw_1') old_num_commits = len(self.repository.get_commits()) old_version_mapping = self.repository.get('repo_mapping').repo_info[ RepoInfoKey.VERSION] self.repository.add(obj) new_num_commits = len(self.repository.get_commits()) new_version_mapping = self.repository.get('repo_mapping').repo_info[ RepoInfoKey.VERSION] self.assertEqual(old_num_commits + 1, new_num_commits) self.assertEqual(old_version_mapping, new_version_mapping) def test_commit_increase_add(self): '''Check if adding a new object in repository increases commit and does also change the mapping ''' obj = DataSet('raw_data_1', 0, None, repo_info={ RepoInfoKey.NAME.value: 'test...', RepoInfoKey.CATEGORY: MLObjectType.TEST_DATA }) old_num_commits = len(self.repository.get_commits()) old_version_mapping = self.repository.get( 'repo_mapping').repo_info.version self.repository.add(obj) new_num_commits = len(self.repository.get_commits()) new_version_mapping = self.repository.get( 'repo_mapping').repo_info.version self.assertEqual(old_num_commits + 1, new_num_commits) commits = self.repository.get_commits() def test_DataSet_get(self): '''Test if getting a DataSet does include all informations from the underlying RawData (excluding numpy data) ''' obj = self.repository.get('test_data_1') raw_obj = self.repository.get(obj.raw_data) for i in range(len(raw_obj.x_coord_names)): self.assertEqual(raw_obj.x_coord_names[i], obj.x_coord_names[i]) for i in range(len(raw_obj.y_coord_names)): self.assertEqual(raw_obj.y_coord_names[i], obj.y_coord_names[i]) def test_DataSet_get_full(self): '''Test if getting a DataSet does include all informations from the underlying RawData (including numpy data) ''' obj = self.repository.get('test_data_1', version=repo_store.RepoStore.LAST_VERSION, full_object=True) raw_obj = self.repository.get( obj.raw_data, version=repo_store.RepoStore.LAST_VERSION, full_object=True) for i in range(len(raw_obj.x_coord_names)): self.assertEqual(raw_obj.x_coord_names[i], obj.x_coord_names[i]) for i in range(len(raw_obj.y_coord_names)): self.assertEqual(raw_obj.y_coord_names[i], obj.y_coord_names[i]) self.assertEqual(raw_obj.x_data.shape[0], obj.x_data.shape[0]) obj = self.repository.get('test_data_2', version=repo_store.RepoStore.LAST_VERSION, full_object=True) self.assertEqual(obj.x_data.shape[0], 2) obj = self.repository.get('training_data_1', version=repo_store.RepoStore.LAST_VERSION, full_object=True) self.assertEqual(obj.x_data.shape[0], 10) def test_repo_RawData(self): """Test RawData within repo """ repository = MLRepo(user='******') job_runner = SimpleJobRunner(repository) repository._job_runner = job_runner raw_data = repo_objects.RawData( np.zeros([10, 1]), ['test_coord'], repo_info={ # pylint: disable=E0602 repo_objects.RepoInfoKey.NAME.value: 'RawData_Test' }) repository.add(raw_data, 'test commit', MLObjectType.RAW_DATA) raw_data_2 = repository.get('RawData_Test') self.assertEqual(len(raw_data.x_coord_names), len(raw_data_2.x_coord_names)) self.assertEqual(raw_data.x_coord_names[0], raw_data_2.x_coord_names[0]) commits = repository.get_commits() self.assertEqual(len(commits), 1) self.assertEqual(len(commits[0].objects), 1) def test_add_model_defaults(self): """test add_model using defaults to check whether default logic applies correctly """ model_param = TestClass(3, 4, repo_info={ RepoInfoKey.NAME.value: 'model_param', RepoInfoKey.CATEGORY: MLObjectType.MODEL_PARAM.value }) # pylint: disable=E1123 self.repository.add(model_param) training_param = TestClass(3, 4, repo_info={ RepoInfoKey.NAME.value: 'training_param', RepoInfoKey.CATEGORY: MLObjectType.TRAINING_PARAM.value }) # pylint: disable=E1123 self.repository.add(training_param) self.repository.add_model('model1') model = self.repository.get('model1') self.assertEqual(model.eval_function, 'eval_func') self.assertEqual(model.training_function, 'train_func') self.assertEqual(model.training_param, 'training_param') self.assertEqual(model.model_param, 'model_param') def test_get_history(self): training_data_history = self.repository.get_history('training_data_1') self.assertEqual(len(training_data_history), 1) training_data = self.repository.get('training_data_1') self.repository.add(training_data) training_data_history = self.repository.get_history('training_data_1') self.assertEqual(len(training_data_history), 2) def test_run_eval_defaults(self): '''Test running evaluation with default arguments ''' self.repository.run_evaluation() def test_run_train_defaults(self): '''Test running training with default arguments ''' self.repository.run_training() def test_run_measure_defaults(self): self.repository.run_evaluation( ) # run first the evaluation so that there is at least one evaluation self.repository.run_measures() def test_repo_training_test_data(self): # init repository with sample in memory handler repository = MLRepo(user='******') job_runner = SimpleJobRunner(repository) repository._job_runner = job_runner training_data = RawData( np.zeros([10, 1]), ['x_values'], np.zeros([10, 1]), ['y_values'], repo_info={repo_objects.RepoInfoKey.NAME.value: 'training_data'}) repository.add(training_data, category=MLObjectType.TRAINING_DATA) training_data_2 = repository.get_training_data() self.assertEqual( training_data_2.repo_info[repo_objects.RepoInfoKey.NAME], training_data.repo_info[repo_objects.RepoInfoKey.NAME]) test_data = RawData( np.zeros([10, 1]), ['x_values'], np.zeros([10, 1]), ['y_values'], repo_info={repo_objects.RepoInfoKey.NAME.value: 'test_data'}) repository.add(test_data, category=MLObjectType.TEST_DATA) test_data_ref = repository.get('test_data') self.assertEqual( test_data_ref.repo_info[repo_objects.RepoInfoKey.NAME], test_data.repo_info[repo_objects.RepoInfoKey.NAME]) self.assertEqual( test_data_ref.repo_info[repo_objects.RepoInfoKey.VERSION], test_data.repo_info[repo_objects.RepoInfoKey.VERSION]) test_data_2 = RawData( np.zeros([10, 1]), ['x_values'], np.zeros([10, 1]), ['y_values'], repo_info={repo_objects.RepoInfoKey.NAME.value: 'test_data_2'}) repository.add(test_data_2, category=MLObjectType.TEST_DATA) test_data_2_ref = repository.get('test_data_2') self.assertEqual( test_data_2.repo_info[repo_objects.RepoInfoKey.NAME], test_data_2_ref.repo_info[repo_objects.RepoInfoKey.NAME]) commits = repository.get_commits() self.assertEqual(len(commits), 3) self.assertEqual(commits[1].objects['test_data'], test_data.repo_info.version) #self.assertEqual(commits[1].objects['repo_mapping'], 1) self.assertEqual(commits[2].objects['test_data_2'], test_data_2.repo_info.version) #self.assertEqual(commits[2].objects['repo_mapping'], 2) def test_repo_RegressionTest(self): regression_test_def = ml_tests.RegressionTestDefinition( repo_info={ RepoInfoKey.NAME: 'regression_test', RepoInfoKey.CATEGORY: MLObjectType.TEST_DEFINITION.name }) tests = regression_test_def.create(self.repository) self.assertEqual(len(tests), 3) self.repository.add(regression_test_def) self.repository.run_evaluation() self.repository.run_measures() self.repository.run_tests() def test_add_multiple(self): """Test adding multiple objects at once """ obj1 = TestClass(5, 4, repo_info={}) obj1.repo_info.name = 'obj1' v1 = self.repository.add(obj1, category=MLObjectType.CALIBRATED_MODEL) obj2 = TestClass(2, 3, repo_info={}) obj2.repo_info.name = 'obj2' self.repository.add([obj1, obj2], category=MLObjectType.CALIBRATED_MODEL) new_obj1 = self.repository.get('obj1') self.assertEqual(new_obj1.repo_info.name, 'obj1') new_obj2 = self.repository.get('obj2') self.assertEqual(new_obj2.repo_info.name, 'obj2') def test_delete(self): """Test if deletion works and if it considers if there are dependencies to other objects """ obj1 = TestClass(5, 4, repo_info={}) obj1.repo_info.name = 'obj1' v1 = self.repository.add(obj1, category=MLObjectType.CALIBRATED_MODEL) obj2 = TestClass(2, 3, repo_info={}) obj2.repo_info.name = 'obj2' obj2.repo_info.modification_info = {'obj1': v1} v2 = self.repository.add(obj2, category=MLObjectType.CALIBRATED_MODEL) # check if an exception is thrown if one tries to delete obj1 although obj2 has # a dependency on obj1 try: self.repository.delete('obj1', v1) self.assertEqual(0, 1) except: pass # now first delete obj2 self.repository.delete('obj2', v2) # check if obj2 has really been deleted try: obj2 = self.repository.get('obj2') self.assertEqual(0, 1) except: pass #now, deletion of obj 1 should work try: self.repository.delete('obj1', v1) except: self.assertEqual(0, 1) try: #check if object really has been deleted obj1 = self.repository.get('obj1') self.assertEqual(0, 1) except: pass