def test_get_metrics(self): repo_type = MODELS mdpath = os.path.join(self.test_dir, 'mdata', repo_type, 'metadata') specpath = os.path.join('vision-computer', 'images') entity = 'model-ex' m = Metadata(entity, self.test_dir, config, repo_type) m.init() ensure_path_exists(os.path.join(mdpath, specpath, entity)) spec_metadata_path = os.path.join(mdpath, specpath, entity, 'model-ex.spec') shutil.copy('hdata/dataset-ex.spec', spec_metadata_path) spec_file = yaml_load(spec_metadata_path) spec_file[MODEL_SPEC_KEY] = deepcopy(spec_file[DATASET_SPEC_KEY]) del spec_file[DATASET_SPEC_KEY] spec_file[MODEL_SPEC_KEY]['metrics'] = {'metric_1': 0, 'metric_2': 1} yaml_save(spec_file, spec_metadata_path) tag = 'vision-computer__images__model-ex__1' sha = m.commit(spec_metadata_path, specpath) m.tag_add(tag) metrics = m._get_metrics(entity, sha) test_table = PrettyTable() test_table.field_names = ['Name', 'Value'] test_table.align['Name'] = 'l' test_table.align['Value'] = 'l' test_table.add_row(['metric_1', 0]) test_table.add_row(['metric_2', 1]) test_metrics = '\nmetrics:\n{}'.format(test_table.get_string()) self.assertEqual(metrics, test_metrics)
def test_diff_refs_modified_file(self): repo_type = DATASETS mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata') entity = 'dataset-ex' specpath = os.path.join('vision-computer', 'images', entity) config_test = deepcopy(config) config_test['mlgit_path'] = '.ml-git' m = Metadata(entity, mdpath, config_test, repo_type) m.init() ensure_path_exists(os.path.join(mdpath, specpath, entity)) manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml') shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath, '{}.spec'.format(entity))) yaml_save(files_mock, manifestpath) sha1 = m.commit(manifestpath, 'test') files_mock_copy = deepcopy(files_mock) del files_mock_copy[ 'zdj7WZzR8Tw87Dx3dm76W5aehnT23GSbXbQ9qo73JgtwREGwB'] files_mock_copy['NewHash'] = {'7.jpg'} yaml_save(files_mock_copy, manifestpath) sha2 = m.commit(manifestpath, 'test') added_files, deleted_files, modified_file = m.diff_refs_with_modified_files( entity, sha1, sha2) self.assertTrue(len(added_files) == 0) self.assertTrue(len(deleted_files) == 0) self.assertTrue(len(modified_file) == 1)
def test_diff_refs_add_file(self): repo_type = DATASETS mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata') entity = 'dataset-ex' specpath = os.path.join('vision-computer', 'images', entity) config_test = deepcopy(config) config_test['mlgit_path'] = '.ml-git' m = Metadata(entity, mdpath, config_test, repo_type) m.init() ensure_path_exists(os.path.join(mdpath, specpath, entity)) manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml') shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath, '{}.spec'.format(entity))) yaml_save(files_mock, manifestpath) sha1 = m.commit(manifestpath, 'test') files_mock_copy = deepcopy(files_mock) files_mock_copy[ 'zPaksM5tNewHashQ2VABPvvfC3VW6wFRTWKvFhUW5QaDx6JMoma'] = { '11.jpg' } yaml_save(files_mock_copy, manifestpath) sha2 = m.commit(manifestpath, 'test') added_files, deleted_files, modified_file = m.diff_refs_with_modified_files( entity, sha1, sha2) self.assertTrue(len(added_files) == 1) self.assertTrue(len(deleted_files) == 0) self.assertTrue(len(modified_file) == 0)
def checkout(self, tag, samples, options): try: metadata_path = get_metadata_path(self.__config) except RootPathException as e: log.warn(e, class_name=REPOSITORY_CLASS_NAME) metadata_path = self._initialize_repository_on_the_fly() dt_tag, lb_tag = self._checkout(tag, samples, options) options['with_dataset'] = False options['with_labels'] = False if dt_tag is not None: try: self.__repo_type = 'dataset' m = Metadata('', metadata_path, self.__config, self.__repo_type) log.info('Initializing related dataset download', class_name=REPOSITORY_CLASS_NAME) if not m.check_exists(): m.init() self._checkout(dt_tag, samples, options) except Exception as e: log.error('LocalRepository: [%s]' % e, class_name=REPOSITORY_CLASS_NAME) if lb_tag is not None: try: self.__repo_type = 'labels' m = Metadata('', metadata_path, self.__config, self.__repo_type) log.info('Initializing related labels download', class_name=REPOSITORY_CLASS_NAME) if not m.check_exists(): m.init() self._checkout(lb_tag, samples, options) except Exception as e: log.error('LocalRepository: [%s]' % e, class_name=REPOSITORY_CLASS_NAME)
def init(self): try: metadata_path = get_metadata_path(self.__config) m = Metadata('', metadata_path, self.__config, self.__repo_type) m.init() except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return
def test_format_data_for_csv(self): entity_name = '{}-ex'.format(MODELS) m = Metadata(entity_name, self.test_dir, config, MODELS) m.init() tag_infos = [{PERFORMANCE_KEY: {'accuracy': 10.0}}] expected_header = [DATE, TAG, RELATED_DATASET_TABLE_INFO, RELATED_LABELS_TABLE_INFO, 'accuracy'] csv_header, output_info = m._format_data_for_csv(tag_infos) self.assertEqual(expected_header, csv_header) self.assertIn('accuracy', output_info[0])
def test_get_related_entity_info(self): entity_name = '{}-ex'.format(MODELS) m = Metadata(entity_name, self.test_dir, config, MODELS) m.init() related_tag = 'test__dataset-ex__1' expected_formatted_output = 'dataset-ex - (1)' spec = {DATASETS: {'tag': related_tag, 'sha': '7f42830dbd035acb35f41359a5178c72d7cbc12c'}} entity_tag, formatted_info = m._get_related_entity_info(spec, DATASETS) self.assertEqual(related_tag, entity_tag) self.assertEqual(formatted_info, expected_formatted_output)
def test_default_branch(self): default_branch_for_empty_repo = 'master' new_branch = 'main' m = Metadata('', self.test_dir, config, DATASETS) m.init() self.assertTrue(m.check_exists()) self.assertEqual(m.get_default_branch(), default_branch_for_empty_repo) self.change_branch(m.path, new_branch) self.assertNotEqual(m.get_default_branch(), default_branch_for_empty_repo) self.assertEqual(m.get_default_branch(), new_branch) clear(m.path)
def test_delete_git_reference(self): m = Metadata(spec, self.test_dir, config, repotype) m.init() for url in Repo(m.path).remote().urls: self.assertNotEqual(url, '') self.assertTrue(m.delete_git_reference()) for url in Repo(m.path).remote().urls: self.assertEqual(url, '')
def test_export_metrics(self): entity_name = '{}-ex'.format(MODELS) m = Metadata(entity_name, self.test_dir, config, MODELS) m.init() tag_infos = [{PERFORMANCE_KEY: {'accuracy': 10.0}}] data = m.export_metrics(entity_name, self.test_dir, CSV, tag_infos) file_path = os.path.join(self.test_dir, '{}-{}.{}'.format(entity_name, PERFORMANCE_KEY, CSV)) self.assertTrue(os.path.exists(file_path)) self.assertIn('{},{},{},{},accuracy'.format(DATE, TAG, RELATED_DATASET_TABLE_INFO, RELATED_LABELS_TABLE_INFO), data.getvalue()) self.assertIn(',,,,10.0', data.getvalue())
def test_get_spec_content_from_ref(self): mdpath = os.path.join(self.test_dir, 'mdata', DATASETS, 'metadata') specpath = 'dataset-ex' m = Metadata(specpath, self.test_dir, config, DATASETS) m.init() ensure_path_exists(os.path.join(mdpath, specpath)) spec_metadata_path = os.path.join(mdpath, specpath) + '/dataset-ex.spec' shutil.copy('hdata/dataset-ex.spec', spec_metadata_path) sha = m.commit(spec_metadata_path, specpath) tag = m.tag_add(sha) path = 'dataset-ex/dataset-ex.spec' content = yaml_load_str(m._get_spec_content_from_ref(tag.commit, path)) spec_file = yaml_load(spec_metadata_path) self.assertEqual(content, spec_file)
def test_create_tag_info_table(self): test_table = PrettyTable() test_table.field_names = ['Name', 'Value'] test_table.add_row([DATE, 'date']) test_table.add_row([RELATED_DATASET_TABLE_INFO, '1']) test_table.add_row([RELATED_LABELS_TABLE_INFO, '2']) test_table.add_row(['accuracy', 10.0]) entity_name = '{}-ex'.format(MODELS) m = Metadata(entity_name, self.test_dir, config, MODELS) m.init() tag_info = {DATE: 'date', RELATED_DATASET_TABLE_INFO: '1', RELATED_LABELS_TABLE_INFO: '2'} metrics = {'accuracy': 10.0} tag_table = m._create_tag_info_table(tag_info, metrics) self.assertEqual(test_table.get_string(), tag_table.get_string())
def test_get_specs_to_compare(self): mdpath = os.path.join(self.test_dir, 'mdata', DATASETS, 'metadata') specpath = 'dataset-ex' m = Metadata(specpath, self.test_dir, config, DATASETS) m.init() ensure_path_exists(os.path.join(mdpath, specpath)) spec_metadata_path = os.path.join(mdpath, specpath) + '/dataset-ex.spec' shutil.copy('hdata/dataset-ex.spec', spec_metadata_path) sha = m.commit(spec_metadata_path, specpath) m.tag_add(sha) specs = m.get_specs_to_compare(specpath) spec_file = yaml_load(spec_metadata_path) for c, v in specs: self.assertEqual(c, spec_file[DATASETS]['manifest']) self.assertIsNotNone(v, {DATASETS: {'manifest': {}}})
def test_last_tag_version(self): sepc_path = 'dataset-ex' config['mlgit_path'] = self.test_dir m = Metadata('', '', config, DATASETS) m.init() tag_list = [ 'computer__images__dataset-ex__1', 'computer__images__dataset-ex__2' ] with mock.patch('ml_git.metadata.Metadata.list_tags', return_value=tag_list): last_version = m.get_last_tag_version(sepc_path) self.assertEqual(last_version, 2) tag_list = [] with mock.patch('ml_git.metadata.Metadata.list_tags', return_value=tag_list): last_version = m.get_last_tag_version(sepc_path) self.assertEqual(last_version, 0) clear(self.test_dir)
def test_get_metrics_without_metrics(self): repo_type = MODELS mdpath = os.path.join(self.test_dir, 'mdata', repo_type, 'metadata') specpath = os.path.join('vision-computer', 'images') entity = 'model-ex' m = Metadata(entity, self.test_dir, config, repo_type) m.init() ensure_path_exists(os.path.join(mdpath, specpath, entity)) spec_metadata_path = os.path.join(mdpath, specpath, entity) + '/model-ex.spec' shutil.copy('hdata/dataset-ex.spec', spec_metadata_path) spec_file = yaml_load(spec_metadata_path) spec_file[MODEL_SPEC_KEY] = deepcopy(spec_file[DATASET_SPEC_KEY]) del spec_file[DATASET_SPEC_KEY] yaml_save(spec_file, spec_metadata_path) tag = 'vision-computer__images__model-ex__1' sha = m.commit(spec_metadata_path, specpath) m.tag_add(tag) metrics = m._get_metrics(entity, sha) self.assertEqual(metrics, '')
def test_init_local_repo(self): m = Metadata(spec, self.test_dir, config, DATASETS) m.init() self.assertTrue(m.check_exists()) clear(m.path)
def test_init(self): m = Metadata(spec, self.test_dir, config, repotype) m.init() self.assertTrue(m.check_exists()) clear(m.path)