Ejemplo n.º 1
0
    def test_get_metrics(self):
        repo_type = MODELS
        mdpath = os.path.join(self.test_dir, 'mdata', repo_type, 'metadata')
        specpath = os.path.join('vision-computer', 'images')
        entity = 'model-ex'
        m = Metadata(entity, self.test_dir, config, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        spec_metadata_path = os.path.join(mdpath, specpath, entity, 'model-ex.spec')
        shutil.copy('hdata/dataset-ex.spec', spec_metadata_path)

        spec_file = yaml_load(spec_metadata_path)
        spec_file[MODEL_SPEC_KEY] = deepcopy(spec_file[DATASET_SPEC_KEY])
        del spec_file[DATASET_SPEC_KEY]
        spec_file[MODEL_SPEC_KEY]['metrics'] = {'metric_1': 0, 'metric_2': 1}
        yaml_save(spec_file, spec_metadata_path)

        tag = 'vision-computer__images__model-ex__1'
        sha = m.commit(spec_metadata_path, specpath)
        m.tag_add(tag)

        metrics = m._get_metrics(entity, sha)

        test_table = PrettyTable()
        test_table.field_names = ['Name', 'Value']
        test_table.align['Name'] = 'l'
        test_table.align['Value'] = 'l'
        test_table.add_row(['metric_1', 0])
        test_table.add_row(['metric_2', 1])
        test_metrics = '\nmetrics:\n{}'.format(test_table.get_string())

        self.assertEqual(metrics, test_metrics)
Ejemplo n.º 2
0
    def test_diff_refs_modified_file(self):
        repo_type = DATASETS
        mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata')
        entity = 'dataset-ex'
        specpath = os.path.join('vision-computer', 'images', entity)
        config_test = deepcopy(config)
        config_test['mlgit_path'] = '.ml-git'
        m = Metadata(entity, mdpath, config_test, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        manifestpath = os.path.join(os.path.join(mdpath, specpath),
                                    'MANIFEST.yaml')
        shutil.copy('hdata/dataset-ex.spec',
                    os.path.join(mdpath, specpath, '{}.spec'.format(entity)))
        yaml_save(files_mock, manifestpath)
        sha1 = m.commit(manifestpath, 'test')

        files_mock_copy = deepcopy(files_mock)
        del files_mock_copy[
            'zdj7WZzR8Tw87Dx3dm76W5aehnT23GSbXbQ9qo73JgtwREGwB']
        files_mock_copy['NewHash'] = {'7.jpg'}

        yaml_save(files_mock_copy, manifestpath)
        sha2 = m.commit(manifestpath, 'test')

        added_files, deleted_files, modified_file = m.diff_refs_with_modified_files(
            entity, sha1, sha2)
        self.assertTrue(len(added_files) == 0)
        self.assertTrue(len(deleted_files) == 0)
        self.assertTrue(len(modified_file) == 1)
Ejemplo n.º 3
0
    def test_diff_refs_add_file(self):
        repo_type = DATASETS
        mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata')
        entity = 'dataset-ex'
        specpath = os.path.join('vision-computer', 'images', entity)
        config_test = deepcopy(config)
        config_test['mlgit_path'] = '.ml-git'
        m = Metadata(entity, mdpath, config_test, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        manifestpath = os.path.join(os.path.join(mdpath, specpath),
                                    'MANIFEST.yaml')
        shutil.copy('hdata/dataset-ex.spec',
                    os.path.join(mdpath, specpath, '{}.spec'.format(entity)))
        yaml_save(files_mock, manifestpath)
        sha1 = m.commit(manifestpath, 'test')
        files_mock_copy = deepcopy(files_mock)
        files_mock_copy[
            'zPaksM5tNewHashQ2VABPvvfC3VW6wFRTWKvFhUW5QaDx6JMoma'] = {
                '11.jpg'
            }
        yaml_save(files_mock_copy, manifestpath)
        sha2 = m.commit(manifestpath, 'test')

        added_files, deleted_files, modified_file = m.diff_refs_with_modified_files(
            entity, sha1, sha2)
        self.assertTrue(len(added_files) == 1)
        self.assertTrue(len(deleted_files) == 0)
        self.assertTrue(len(modified_file) == 0)
Ejemplo n.º 4
0
 def checkout(self, tag, samples, options):
     try:
         metadata_path = get_metadata_path(self.__config)
     except RootPathException as e:
         log.warn(e, class_name=REPOSITORY_CLASS_NAME)
         metadata_path = self._initialize_repository_on_the_fly()
     dt_tag, lb_tag = self._checkout(tag, samples, options)
     options['with_dataset'] = False
     options['with_labels'] = False
     if dt_tag is not None:
         try:
             self.__repo_type = 'dataset'
             m = Metadata('', metadata_path, self.__config,
                          self.__repo_type)
             log.info('Initializing related dataset download',
                      class_name=REPOSITORY_CLASS_NAME)
             if not m.check_exists():
                 m.init()
             self._checkout(dt_tag, samples, options)
         except Exception as e:
             log.error('LocalRepository: [%s]' % e,
                       class_name=REPOSITORY_CLASS_NAME)
     if lb_tag is not None:
         try:
             self.__repo_type = 'labels'
             m = Metadata('', metadata_path, self.__config,
                          self.__repo_type)
             log.info('Initializing related labels download',
                      class_name=REPOSITORY_CLASS_NAME)
             if not m.check_exists():
                 m.init()
             self._checkout(lb_tag, samples, options)
         except Exception as e:
             log.error('LocalRepository: [%s]' % e,
                       class_name=REPOSITORY_CLASS_NAME)
Ejemplo n.º 5
0
 def init(self):
     try:
         metadata_path = get_metadata_path(self.__config)
         m = Metadata('', metadata_path, self.__config, self.__repo_type)
         m.init()
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
Ejemplo n.º 6
0
 def test_format_data_for_csv(self):
     entity_name = '{}-ex'.format(MODELS)
     m = Metadata(entity_name, self.test_dir, config, MODELS)
     m.init()
     tag_infos = [{PERFORMANCE_KEY: {'accuracy': 10.0}}]
     expected_header = [DATE, TAG, RELATED_DATASET_TABLE_INFO, RELATED_LABELS_TABLE_INFO, 'accuracy']
     csv_header, output_info = m._format_data_for_csv(tag_infos)
     self.assertEqual(expected_header, csv_header)
     self.assertIn('accuracy', output_info[0])
Ejemplo n.º 7
0
 def test_get_related_entity_info(self):
     entity_name = '{}-ex'.format(MODELS)
     m = Metadata(entity_name, self.test_dir, config, MODELS)
     m.init()
     related_tag = 'test__dataset-ex__1'
     expected_formatted_output = 'dataset-ex - (1)'
     spec = {DATASETS: {'tag': related_tag, 'sha': '7f42830dbd035acb35f41359a5178c72d7cbc12c'}}
     entity_tag, formatted_info = m._get_related_entity_info(spec, DATASETS)
     self.assertEqual(related_tag, entity_tag)
     self.assertEqual(formatted_info, expected_formatted_output)
Ejemplo n.º 8
0
 def test_default_branch(self):
     default_branch_for_empty_repo = 'master'
     new_branch = 'main'
     m = Metadata('', self.test_dir, config, DATASETS)
     m.init()
     self.assertTrue(m.check_exists())
     self.assertEqual(m.get_default_branch(), default_branch_for_empty_repo)
     self.change_branch(m.path, new_branch)
     self.assertNotEqual(m.get_default_branch(), default_branch_for_empty_repo)
     self.assertEqual(m.get_default_branch(), new_branch)
     clear(m.path)
Ejemplo n.º 9
0
    def test_delete_git_reference(self):
        m = Metadata(spec, self.test_dir, config, repotype)
        m.init()

        for url in Repo(m.path).remote().urls:
            self.assertNotEqual(url, '')

        self.assertTrue(m.delete_git_reference())

        for url in Repo(m.path).remote().urls:
            self.assertEqual(url, '')
Ejemplo n.º 10
0
    def test_export_metrics(self):
        entity_name = '{}-ex'.format(MODELS)
        m = Metadata(entity_name, self.test_dir, config, MODELS)
        m.init()

        tag_infos = [{PERFORMANCE_KEY: {'accuracy': 10.0}}]
        data = m.export_metrics(entity_name, self.test_dir, CSV, tag_infos)
        file_path = os.path.join(self.test_dir, '{}-{}.{}'.format(entity_name, PERFORMANCE_KEY, CSV))
        self.assertTrue(os.path.exists(file_path))
        self.assertIn('{},{},{},{},accuracy'.format(DATE, TAG, RELATED_DATASET_TABLE_INFO, RELATED_LABELS_TABLE_INFO),
                      data.getvalue())
        self.assertIn(',,,,10.0', data.getvalue())
Ejemplo n.º 11
0
    def test_get_spec_content_from_ref(self):
        mdpath = os.path.join(self.test_dir, 'mdata', DATASETS, 'metadata')
        specpath = 'dataset-ex'
        m = Metadata(specpath, self.test_dir, config, DATASETS)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath))
        spec_metadata_path = os.path.join(mdpath, specpath) + '/dataset-ex.spec'
        shutil.copy('hdata/dataset-ex.spec', spec_metadata_path)

        sha = m.commit(spec_metadata_path, specpath)
        tag = m.tag_add(sha)
        path = 'dataset-ex/dataset-ex.spec'
        content = yaml_load_str(m._get_spec_content_from_ref(tag.commit, path))
        spec_file = yaml_load(spec_metadata_path)
        self.assertEqual(content, spec_file)
Ejemplo n.º 12
0
    def test_create_tag_info_table(self):
        test_table = PrettyTable()
        test_table.field_names = ['Name', 'Value']
        test_table.add_row([DATE, 'date'])
        test_table.add_row([RELATED_DATASET_TABLE_INFO, '1'])
        test_table.add_row([RELATED_LABELS_TABLE_INFO, '2'])
        test_table.add_row(['accuracy', 10.0])

        entity_name = '{}-ex'.format(MODELS)
        m = Metadata(entity_name, self.test_dir, config, MODELS)
        m.init()
        tag_info = {DATE: 'date', RELATED_DATASET_TABLE_INFO: '1', RELATED_LABELS_TABLE_INFO: '2'}
        metrics = {'accuracy': 10.0}
        tag_table = m._create_tag_info_table(tag_info, metrics)

        self.assertEqual(test_table.get_string(), tag_table.get_string())
Ejemplo n.º 13
0
    def test_get_specs_to_compare(self):
        mdpath = os.path.join(self.test_dir, 'mdata', DATASETS, 'metadata')
        specpath = 'dataset-ex'
        m = Metadata(specpath, self.test_dir, config, DATASETS)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath))
        spec_metadata_path = os.path.join(mdpath, specpath) + '/dataset-ex.spec'
        shutil.copy('hdata/dataset-ex.spec', spec_metadata_path)

        sha = m.commit(spec_metadata_path, specpath)
        m.tag_add(sha)
        specs = m.get_specs_to_compare(specpath)
        spec_file = yaml_load(spec_metadata_path)
        for c, v in specs:
            self.assertEqual(c, spec_file[DATASETS]['manifest'])
            self.assertIsNotNone(v, {DATASETS: {'manifest': {}}})
Ejemplo n.º 14
0
    def test_last_tag_version(self):
        sepc_path = 'dataset-ex'
        config['mlgit_path'] = self.test_dir
        m = Metadata('', '', config, DATASETS)
        m.init()

        tag_list = [
            'computer__images__dataset-ex__1',
            'computer__images__dataset-ex__2'
        ]
        with mock.patch('ml_git.metadata.Metadata.list_tags',
                        return_value=tag_list):
            last_version = m.get_last_tag_version(sepc_path)
        self.assertEqual(last_version, 2)

        tag_list = []
        with mock.patch('ml_git.metadata.Metadata.list_tags',
                        return_value=tag_list):
            last_version = m.get_last_tag_version(sepc_path)
        self.assertEqual(last_version, 0)
        clear(self.test_dir)
Ejemplo n.º 15
0
    def test_get_metrics_without_metrics(self):
        repo_type = MODELS
        mdpath = os.path.join(self.test_dir, 'mdata', repo_type, 'metadata')
        specpath = os.path.join('vision-computer', 'images')
        entity = 'model-ex'
        m = Metadata(entity, self.test_dir, config, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        spec_metadata_path = os.path.join(mdpath, specpath, entity) + '/model-ex.spec'
        shutil.copy('hdata/dataset-ex.spec', spec_metadata_path)

        spec_file = yaml_load(spec_metadata_path)
        spec_file[MODEL_SPEC_KEY] = deepcopy(spec_file[DATASET_SPEC_KEY])
        del spec_file[DATASET_SPEC_KEY]
        yaml_save(spec_file,  spec_metadata_path)

        tag = 'vision-computer__images__model-ex__1'
        sha = m.commit(spec_metadata_path, specpath)
        m.tag_add(tag)

        metrics = m._get_metrics(entity, sha)

        self.assertEqual(metrics, '')
Ejemplo n.º 16
0
 def test_init_local_repo(self):
     m = Metadata(spec, self.test_dir, config, DATASETS)
     m.init()
     self.assertTrue(m.check_exists())
     clear(m.path)
Ejemplo n.º 17
0
 def test_init(self):
     m = Metadata(spec, self.test_dir, config, repotype)
     m.init()
     self.assertTrue(m.check_exists())
     clear(m.path)