Exemple #1
0
 def checkout(self, tag, samples, options):
     try:
         metadata_path = get_metadata_path(self.__config)
     except RootPathException as e:
         log.warn(e, class_name=REPOSITORY_CLASS_NAME)
         metadata_path = self._initialize_repository_on_the_fly()
     dt_tag, lb_tag = self._checkout(tag, samples, options)
     options['with_dataset'] = False
     options['with_labels'] = False
     if dt_tag is not None:
         try:
             self.__repo_type = 'dataset'
             m = Metadata('', metadata_path, self.__config,
                          self.__repo_type)
             log.info('Initializing related dataset download',
                      class_name=REPOSITORY_CLASS_NAME)
             if not m.check_exists():
                 m.init()
             self._checkout(dt_tag, samples, options)
         except Exception as e:
             log.error('LocalRepository: [%s]' % e,
                       class_name=REPOSITORY_CLASS_NAME)
     if lb_tag is not None:
         try:
             self.__repo_type = 'labels'
             m = Metadata('', metadata_path, self.__config,
                          self.__repo_type)
             log.info('Initializing related labels download',
                      class_name=REPOSITORY_CLASS_NAME)
             if not m.check_exists():
                 m.init()
             self._checkout(lb_tag, samples, options)
         except Exception as e:
             log.error('LocalRepository: [%s]' % e,
                       class_name=REPOSITORY_CLASS_NAME)
Exemple #2
0
    def tag(self, spec, usr_tag):
        repo_type = self.__repo_type
        try:
            metadata_path = get_metadata_path(self.__config, repo_type)
            refs_path = get_refs_path(self.__config, repo_type)
            r = Refs(refs_path, spec, repo_type)
            curtag, sha = r.head()
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return False

        if curtag is None:
            log.error('No current tag for [%s]. commit first.' % spec,
                      class_name=REPOSITORY_CLASS_NAME)
            return False
        utag = UsrTag(curtag, usr_tag)

        # Check if usrtag exists before creating it
        log.debug('Check if tag [%s] already exists' % utag,
                  class_name=REPOSITORY_CLASS_NAME)
        m = Metadata(spec, metadata_path, self.__config, repo_type)
        if m._usrtag_exists(utag) is True:
            log.error('Tag [%s] already exists.' % utag,
                      class_name=REPOSITORY_CLASS_NAME)
            return False

        # ensure metadata repository is at the current tag/sha version
        m = Metadata('', metadata_path, self.__config, repo_type)
        m.checkout(curtag)

        # TODO: format to something that could be used for a checkout:
        # format: _._user_.._ + curtag + _.._ + usrtag
        # at checkout with usrtag look for pattern _._ then find usrtag in the list (split on '_.._')
        # adds usrtag to the metadata repository

        m = Metadata(spec, metadata_path, self.__config, repo_type)
        try:
            m.tag_add(utag)
        except Exception as e:

            match = re.search("stderr: 'fatal:(.*)'$", e.stderr)
            err = match.group(1)
            log.error(err, class_name=REPOSITORY_CLASS_NAME)
            return
        log.info('Create Tag Successfull', class_name=REPOSITORY_CLASS_NAME)
        # checkout at metadata repository at master version
        m.checkout()
        return True
Exemple #3
0
def check_initialized_entity(context, entity_type, entity_name):
    config = merged_config_load()
    metadata_path = get_metadata_path(config, entity_type)
    metadata = Metadata(entity_name, metadata_path, config, entity_type)
    if not metadata.check_exists():
        log.error(output_messages['ERROR_NOT_INITIALIZED'] % entity_type)
        context.exit()
Exemple #4
0
    def _checkout_ref(self, ref):
        repo_type = self.__repo_type
        metadata_path = get_metadata_path(self.__config, repo_type)

        # checkout
        m = Metadata('', metadata_path, self.__config, repo_type)
        m.checkout(ref)
Exemple #5
0
 def metadata_exists(self, entity):
     self.__repo_type = entity
     entity_metadata_path = get_metadata_path(self.__config,
                                              self.__repo_type)
     metadata = Metadata('', entity_metadata_path, self.__config,
                         self.__repo_type)
     return metadata.check_exists()
Exemple #6
0
    def log(self, spec, stat=False, fullstat=False):

        try:
            repo_type = self.__repo_type
            metadata_path = get_metadata_path(self.__config, repo_type)
            metadata = Metadata(spec, metadata_path, self.__config, repo_type)
            index_path = get_index_path(self.__config, repo_type)

            log_info = metadata.get_log_info(spec, fullstat)

        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return
        fidx = FullIndex(spec, index_path)
        if stat or fullstat:
            workspace_size = fidx.get_total_size()

            amount_message = 'Total of files: %s' % fidx.get_total_count()
            size_message = 'Workspace size: %s' % humanize.naturalsize(
                workspace_size)

            workspace_info = '------------------------------------------------- \n{}\t{}' \
                .format(amount_message, size_message)

            log_info = '{}\n{}'.format(log_info, workspace_info)

        log.info(log_info, class_name=REPOSITORY_CLASS_NAME)
Exemple #7
0
    def test_get_metrics(self):
        repo_type = MODELS
        mdpath = os.path.join(self.test_dir, 'mdata', repo_type, 'metadata')
        specpath = os.path.join('vision-computer', 'images')
        entity = 'model-ex'
        m = Metadata(entity, self.test_dir, config, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        spec_metadata_path = os.path.join(mdpath, specpath, entity, 'model-ex.spec')
        shutil.copy('hdata/dataset-ex.spec', spec_metadata_path)

        spec_file = yaml_load(spec_metadata_path)
        spec_file[MODEL_SPEC_KEY] = deepcopy(spec_file[DATASET_SPEC_KEY])
        del spec_file[DATASET_SPEC_KEY]
        spec_file[MODEL_SPEC_KEY]['metrics'] = {'metric_1': 0, 'metric_2': 1}
        yaml_save(spec_file, spec_metadata_path)

        tag = 'vision-computer__images__model-ex__1'
        sha = m.commit(spec_metadata_path, specpath)
        m.tag_add(tag)

        metrics = m._get_metrics(entity, sha)

        test_table = PrettyTable()
        test_table.field_names = ['Name', 'Value']
        test_table.align['Name'] = 'l'
        test_table.align['Value'] = 'l'
        test_table.add_row(['metric_1', 0])
        test_table.add_row(['metric_2', 1])
        test_metrics = '\nmetrics:\n{}'.format(test_table.get_string())

        self.assertEqual(metrics, test_metrics)
Exemple #8
0
    def test_diff_refs_modified_file(self):
        repo_type = DATASETS
        mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata')
        entity = 'dataset-ex'
        specpath = os.path.join('vision-computer', 'images', entity)
        config_test = deepcopy(config)
        config_test['mlgit_path'] = '.ml-git'
        m = Metadata(entity, mdpath, config_test, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        manifestpath = os.path.join(os.path.join(mdpath, specpath),
                                    'MANIFEST.yaml')
        shutil.copy('hdata/dataset-ex.spec',
                    os.path.join(mdpath, specpath, '{}.spec'.format(entity)))
        yaml_save(files_mock, manifestpath)
        sha1 = m.commit(manifestpath, 'test')

        files_mock_copy = deepcopy(files_mock)
        del files_mock_copy[
            'zdj7WZzR8Tw87Dx3dm76W5aehnT23GSbXbQ9qo73JgtwREGwB']
        files_mock_copy['NewHash'] = {'7.jpg'}

        yaml_save(files_mock_copy, manifestpath)
        sha2 = m.commit(manifestpath, 'test')

        added_files, deleted_files, modified_file = m.diff_refs_with_modified_files(
            entity, sha1, sha2)
        self.assertTrue(len(added_files) == 0)
        self.assertTrue(len(deleted_files) == 0)
        self.assertTrue(len(modified_file) == 1)
Exemple #9
0
    def test_diff_refs_add_file(self):
        repo_type = DATASETS
        mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata')
        entity = 'dataset-ex'
        specpath = os.path.join('vision-computer', 'images', entity)
        config_test = deepcopy(config)
        config_test['mlgit_path'] = '.ml-git'
        m = Metadata(entity, mdpath, config_test, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        manifestpath = os.path.join(os.path.join(mdpath, specpath),
                                    'MANIFEST.yaml')
        shutil.copy('hdata/dataset-ex.spec',
                    os.path.join(mdpath, specpath, '{}.spec'.format(entity)))
        yaml_save(files_mock, manifestpath)
        sha1 = m.commit(manifestpath, 'test')
        files_mock_copy = deepcopy(files_mock)
        files_mock_copy[
            'zPaksM5tNewHashQ2VABPvvfC3VW6wFRTWKvFhUW5QaDx6JMoma'] = {
                '11.jpg'
            }
        yaml_save(files_mock_copy, manifestpath)
        sha2 = m.commit(manifestpath, 'test')

        added_files, deleted_files, modified_file = m.diff_refs_with_modified_files(
            entity, sha1, sha2)
        self.assertTrue(len(added_files) == 1)
        self.assertTrue(len(deleted_files) == 0)
        self.assertTrue(len(modified_file) == 0)
Exemple #10
0
 def init(self):
     try:
         metadata_path = get_metadata_path(self.__config)
         m = Metadata('', metadata_path, self.__config, self.__repo_type)
         m.init()
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
Exemple #11
0
 def test_format_data_for_csv(self):
     entity_name = '{}-ex'.format(MODELS)
     m = Metadata(entity_name, self.test_dir, config, MODELS)
     m.init()
     tag_infos = [{PERFORMANCE_KEY: {'accuracy': 10.0}}]
     expected_header = [DATE, TAG, RELATED_DATASET_TABLE_INFO, RELATED_LABELS_TABLE_INFO, 'accuracy']
     csv_header, output_info = m._format_data_for_csv(tag_infos)
     self.assertEqual(expected_header, csv_header)
     self.assertIn('accuracy', output_info[0])
Exemple #12
0
def get_last_entity_version(entity_type, entity_name):
    config = merged_config_load()
    metadata_path = get_metadata_path(config, entity_type)
    metadata = Metadata(entity_name, metadata_path, config, entity_type)
    if not metadata.check_exists():
        log.error(output_messages['ERROR_NOT_INITIALIZED'] % entity_type)
        return
    last_version = metadata.get_last_tag_version(entity_name)
    return last_version + 1
Exemple #13
0
    def _checkout_ref(self, ref=None):
        repo_type = self.__repo_type
        metadata_path = get_metadata_path(self.__config, repo_type)
        m = Metadata('', metadata_path, self.__config, repo_type)

        if ref is None:
            ref = m.get_default_branch()

        m.checkout(ref)
Exemple #14
0
 def test_get_related_entity_info(self):
     entity_name = '{}-ex'.format(MODELS)
     m = Metadata(entity_name, self.test_dir, config, MODELS)
     m.init()
     related_tag = 'test__dataset-ex__1'
     expected_formatted_output = 'dataset-ex - (1)'
     spec = {DATASETS: {'tag': related_tag, 'sha': '7f42830dbd035acb35f41359a5178c72d7cbc12c'}}
     entity_tag, formatted_info = m._get_related_entity_info(spec, DATASETS)
     self.assertEqual(related_tag, entity_tag)
     self.assertEqual(formatted_info, expected_formatted_output)
Exemple #15
0
 def repo_remote_del(self, global_conf=False):
     try:
         metadata_path = get_metadata_path(self.__config)
         metadata = Metadata('', metadata_path, self.__config,
                             self.__repo_type)
         if metadata.delete_git_reference():
             remote_del(self.__repo_type, global_conf)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
Exemple #16
0
 def repo_remote_add(self, repo_type, mlgit_remote, global_conf=False):
     try:
         remote_add(repo_type, mlgit_remote, global_conf)
         self.__config = config_load()
         metadata_path = get_metadata_path(self.__config)
         m = Metadata('', metadata_path, self.__config, self.__repo_type)
         m.remote_set_url(mlgit_remote)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
Exemple #17
0
 def list_tag(self, spec):
     repo_type = self.__repo_type
     try:
         metadata_path = get_metadata_path(self.__config, repo_type)
         m = Metadata(spec, metadata_path, self.__config, repo_type)
         for tag in m.list_tags(spec):
             print(tag)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
Exemple #18
0
 def test_get_target_tag(self):
     tags = ['computer__images__dataset-ex__1',
             'computer__images__dataset-ex__2',
             'computer__videos__dataset-ex__1']
     m = Metadata('', self.test_dir, config, DATASETS)
     self.assertRaises(RuntimeError, lambda: m._get_target_tag(tags, 'dataset-ex', -1))
     self.assertRaises(RuntimeError, lambda: m._get_target_tag(tags, 'dataset-ex', 1))
     self.assertRaises(RuntimeError, lambda: m._get_target_tag(tags, 'dataset-wrong', 1))
     self.assertEqual(m._get_target_tag(tags, 'dataset-ex', 2), 'computer__images__dataset-ex__2')
     clear(m.path)
Exemple #19
0
    def push(self, spec, retry=2, clear_on_fail=False):
        repo_type = self.__repo_type
        try:
            objects_path = get_objects_path(self.__config, repo_type)
            metadata_path = get_metadata_path(self.__config, repo_type)
            refs_path = get_refs_path(self.__config, repo_type)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return

        met = Metadata(spec, metadata_path, self.__config, repo_type)
        fields = met.git_user_config()
        if None in fields.values():
            log.error(
                'Your name and email address need to be configured in git. '
                'Please see the commands below:',
                class_name=REPOSITORY_CLASS_NAME)

            log.error('git config --global user.name \'Your Name\'',
                      class_name=REPOSITORY_CLASS_NAME)
            log.error('git config --global user.email [email protected]',
                      class_name=REPOSITORY_CLASS_NAME)
            return
        if met.fetch() is False:
            return

        ref = Refs(refs_path, spec, repo_type)
        tag, sha = ref.branch()
        categories_path = get_path_with_categories(tag)

        spec_path, spec_file = None, None
        try:
            spec_path, spec_file = search_spec_file(self.__repo_type, spec,
                                                    categories_path)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)

        if spec_path is None:
            return

        full_spec_path = os.path.join(spec_path, spec_file)

        repo = LocalRepository(self.__config, objects_path, repo_type)
        ret = repo.push(objects_path, full_spec_path, retry, clear_on_fail)

        # ensure first we're on master !
        met.checkout()
        if ret == 0:
            # push metadata spec to LocalRepository git repository
            try:
                met.push()
            except Exception as e:
                log.error(e, class_name=REPOSITORY_CLASS_NAME)
                return
            MultihashFS(objects_path).reset_log()
Exemple #20
0
    def test_delete_git_reference(self):
        m = Metadata(spec, self.test_dir, config, repotype)
        m.init()

        for url in Repo(m.path).remote().urls:
            self.assertNotEqual(url, '')

        self.assertTrue(m.delete_git_reference())

        for url in Repo(m.path).remote().urls:
            self.assertEqual(url, '')
Exemple #21
0
 def test_default_branch(self):
     default_branch_for_empty_repo = 'master'
     new_branch = 'main'
     m = Metadata('', self.test_dir, config, DATASETS)
     m.init()
     self.assertTrue(m.check_exists())
     self.assertEqual(m.get_default_branch(), default_branch_for_empty_repo)
     self.change_branch(m.path, new_branch)
     self.assertNotEqual(m.get_default_branch(), default_branch_for_empty_repo)
     self.assertEqual(m.get_default_branch(), new_branch)
     clear(m.path)
Exemple #22
0
    def test_clone_empty_config_repo(self):
        config = {
            'mlgit_path': './mdata',
            'mlgit_conf': 'config.yaml',
            'verbose': 'info',
            DATASETS: {'git': '', },
            LABELS: {'git': '', },
            MODELS: {'git': '', }, }

        m = Metadata('', self.test_dir, config, DATASETS)
        m.clone_config_repo()
        self.assertFalse(m.check_exists())
Exemple #23
0
    def test_export_metrics(self):
        entity_name = '{}-ex'.format(MODELS)
        m = Metadata(entity_name, self.test_dir, config, MODELS)
        m.init()

        tag_infos = [{PERFORMANCE_KEY: {'accuracy': 10.0}}]
        data = m.export_metrics(entity_name, self.test_dir, CSV, tag_infos)
        file_path = os.path.join(self.test_dir, '{}-{}.{}'.format(entity_name, PERFORMANCE_KEY, CSV))
        self.assertTrue(os.path.exists(file_path))
        self.assertIn('{},{},{},{},accuracy'.format(DATE, TAG, RELATED_DATASET_TABLE_INFO, RELATED_LABELS_TABLE_INFO),
                      data.getvalue())
        self.assertIn(',,,,10.0', data.getvalue())
Exemple #24
0
 def update(self):
     repo_type = self.__repo_type
     try:
         metadata_path = get_metadata_path(self.__config, repo_type)
         m = Metadata('', metadata_path, self.__config, repo_type)
         m.update()
     except GitError as error:
         log.error(
             'Could not update metadata. Check your remote configuration. %s'
             % error.stderr,
             class_name=REPOSITORY_CLASS_NAME)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
Exemple #25
0
 def _initialize_repository_on_the_fly(self):
     if os.path.exists(get_global_config_path()):
         log.info('Initializing the project with global settings',
                  class_name=REPOSITORY_CLASS_NAME)
         init_mlgit()
         save_global_config_in_local()
         metadata_path = get_metadata_path(self.__config)
         if not os.path.exists(metadata_path):
             Metadata('', metadata_path, self.__config,
                      self.__repo_type).init()
         return metadata_path
     raise RootPathException(
         'You are not in an initialized ml-git repository and do not have a global configuration.'
     )
Exemple #26
0
    def test_get_spec_content_from_ref(self):
        mdpath = os.path.join(self.test_dir, 'mdata', DATASETS, 'metadata')
        specpath = 'dataset-ex'
        m = Metadata(specpath, self.test_dir, config, DATASETS)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath))
        spec_metadata_path = os.path.join(mdpath, specpath) + '/dataset-ex.spec'
        shutil.copy('hdata/dataset-ex.spec', spec_metadata_path)

        sha = m.commit(spec_metadata_path, specpath)
        tag = m.tag_add(sha)
        path = 'dataset-ex/dataset-ex.spec'
        content = yaml_load_str(m._get_spec_content_from_ref(tag.commit, path))
        spec_file = yaml_load(spec_metadata_path)
        self.assertEqual(content, spec_file)
Exemple #27
0
    def test_get_specs_to_compare(self):
        mdpath = os.path.join(self.test_dir, 'mdata', DATASETS, 'metadata')
        specpath = 'dataset-ex'
        m = Metadata(specpath, self.test_dir, config, DATASETS)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath))
        spec_metadata_path = os.path.join(mdpath, specpath) + '/dataset-ex.spec'
        shutil.copy('hdata/dataset-ex.spec', spec_metadata_path)

        sha = m.commit(spec_metadata_path, specpath)
        m.tag_add(sha)
        specs = m.get_specs_to_compare(specpath)
        spec_file = yaml_load(spec_metadata_path)
        for c, v in specs:
            self.assertEqual(c, spec_file[DATASETS]['manifest'])
            self.assertIsNotNone(v, {DATASETS: {'manifest': {}}})
Exemple #28
0
    def test_create_tag_info_table(self):
        test_table = PrettyTable()
        test_table.field_names = ['Name', 'Value']
        test_table.add_row([DATE, 'date'])
        test_table.add_row([RELATED_DATASET_TABLE_INFO, '1'])
        test_table.add_row([RELATED_LABELS_TABLE_INFO, '2'])
        test_table.add_row(['accuracy', 10.0])

        entity_name = '{}-ex'.format(MODELS)
        m = Metadata(entity_name, self.test_dir, config, MODELS)
        m.init()
        tag_info = {DATE: 'date', RELATED_DATASET_TABLE_INFO: '1', RELATED_LABELS_TABLE_INFO: '2'}
        metrics = {'accuracy': 10.0}
        tag_table = m._create_tag_info_table(tag_info, metrics)

        self.assertEqual(test_table.get_string(), tag_table.get_string())
Exemple #29
0
    def test_tag_exist(self):
        mdpath = os.path.join(self.test_dir, 'metadata')
        specpath = 'dataset-ex'
        ensure_path_exists(os.path.join(mdpath, specpath))
        shutil.copy('hdata/dataset-ex.spec',
                    os.path.join(mdpath, specpath) + '/dataset-ex.spec')
        manifestpath = os.path.join(os.path.join(mdpath, specpath),
                                    'MANIFEST.yaml')
        yaml_save(files_mock, manifestpath)

        config['mlgit_path'] = self.test_dir
        m = Metadata(specpath, mdpath, config, repotype)
        r = Repository(config, repotype)
        r.init()

        fullmetadatapath, categories_subpath, metadata = m.tag_exists(
            self.test_dir)
        self.assertFalse(metadata is None)
Exemple #30
0
    def test_get_tag(self):
        mdpath = os.path.join(self.test_dir, 'metadata')
        specpath = 'dataset-ex'
        ensure_path_exists(os.path.join(mdpath, specpath))
        shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath) + '/dataset-ex.spec')
        manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml')
        yaml_save(files_mock, manifestpath)

        config['mlgit_path'] = self.test_dir
        m = Metadata(specpath, mdpath, config, DATASETS)
        r = Repository(config, DATASETS)
        r.init()

        tag_list = ['computer__images__dataset-ex__1']
        with mock.patch('ml_git.metadata.Metadata.list_tags', return_value=tag_list):
            target_tag = m.get_tag(specpath, -1)
        self.assertEqual(target_tag, tag_list[0])
        clear(m.path)