예제 #1
0
    def log(self, spec, stat=False, fullstat=False):

        try:
            repo_type = self.__repo_type
            metadata_path = get_metadata_path(self.__config, repo_type)
            metadata = Metadata(spec, metadata_path, self.__config, repo_type)
            index_path = get_index_path(self.__config, repo_type)

            log_info = metadata.get_log_info(spec, fullstat)

        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return
        fidx = FullIndex(spec, index_path)
        if stat or fullstat:
            workspace_size = fidx.get_total_size()

            amount_message = 'Total of files: %s' % fidx.get_total_count()
            size_message = 'Workspace size: %s' % humanize.naturalsize(
                workspace_size)

            workspace_info = '------------------------------------------------- \n{}\t{}' \
                .format(amount_message, size_message)

            log_info = '{}\n{}'.format(log_info, workspace_info)

        log.info(log_info, class_name=REPOSITORY_CLASS_NAME)
예제 #2
0
 def metadata_exists(self, entity):
     self.__repo_type = entity
     entity_metadata_path = get_metadata_path(self.__config,
                                              self.__repo_type)
     metadata = Metadata('', entity_metadata_path, self.__config,
                         self.__repo_type)
     return metadata.check_exists()
예제 #3
0
 def checkout(self, tag, samples, options):
     try:
         metadata_path = get_metadata_path(self.__config)
     except RootPathException as e:
         log.warn(e, class_name=REPOSITORY_CLASS_NAME)
         metadata_path = self._initialize_repository_on_the_fly()
     dt_tag, lb_tag = self._checkout(tag, samples, options)
     options['with_dataset'] = False
     options['with_labels'] = False
     if dt_tag is not None:
         try:
             self.__repo_type = 'dataset'
             m = Metadata('', metadata_path, self.__config,
                          self.__repo_type)
             log.info('Initializing related dataset download',
                      class_name=REPOSITORY_CLASS_NAME)
             if not m.check_exists():
                 m.init()
             self._checkout(dt_tag, samples, options)
         except Exception as e:
             log.error('LocalRepository: [%s]' % e,
                       class_name=REPOSITORY_CLASS_NAME)
     if lb_tag is not None:
         try:
             self.__repo_type = 'labels'
             m = Metadata('', metadata_path, self.__config,
                          self.__repo_type)
             log.info('Initializing related labels download',
                      class_name=REPOSITORY_CLASS_NAME)
             if not m.check_exists():
                 m.init()
             self._checkout(lb_tag, samples, options)
         except Exception as e:
             log.error('LocalRepository: [%s]' % e,
                       class_name=REPOSITORY_CLASS_NAME)
예제 #4
0
    def export(self, bucket, tag, retry):
        try:
            categories_path, spec_name, _ = spec_parse(tag)
            get_root_path()
            if not self._tag_exists(tag):
                return None, None
        except InvalidGitRepositoryError:
            log.error('You are not in an initialized ml-git repository.',
                      class_name=LOCAL_REPOSITORY_CLASS_NAME)
            return None, None
        except Exception as e:
            log.error(e, class_name=LOCAL_REPOSITORY_CLASS_NAME)
            return None, None

        try:
            self._checkout_ref(tag)
        except Exception:
            log.error('Unable to checkout to %s' % tag,
                      class_name=REPOSITORY_CLASS_NAME)
            return None, None

        local = LocalRepository(
            self.__config, get_objects_path(self.__config, self.__repo_type),
            self.__repo_type)
        local.export_tag(get_metadata_path(self.__config, self.__repo_type),
                         tag, bucket, retry)

        self._checkout_ref()
예제 #5
0
    def remote_fsck(self, spec, retries=2, thorough=False, paranoid=False):
        repo_type = self.__repo_type
        try:
            metadata_path = get_metadata_path(self.__config, repo_type)
            objects_path = get_objects_path(self.__config, repo_type)
            refs_path = get_refs_path(self.__config, repo_type)
            ref = Refs(refs_path, spec, repo_type)
            tag, sha = ref.branch()

            categories_path = get_path_with_categories(tag)

            self._checkout_ref(tag)
            spec_path, spec_file = search_spec_file(self.__repo_type, spec,
                                                    categories_path)

        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return
        if spec_path is None:
            return

        full_spec_path = os.path.join(spec_path, spec_file)

        r = LocalRepository(self.__config, objects_path, repo_type)

        r.remote_fsck(metadata_path, tag, full_spec_path, retries, thorough,
                      paranoid)

        # ensure first we're on master !
        self._checkout_ref()
예제 #6
0
    def _checkout_ref(self, ref):
        repo_type = self.__repo_type
        metadata_path = get_metadata_path(self.__config, repo_type)

        # checkout
        m = Metadata('', metadata_path, self.__config, repo_type)
        m.checkout(ref)
예제 #7
0
def check_initialized_entity(context, entity_type, entity_name):
    config = merged_config_load()
    metadata_path = get_metadata_path(config, entity_type)
    metadata = Metadata(entity_name, metadata_path, config, entity_type)
    if not metadata.check_exists():
        log.error(output_messages['ERROR_NOT_INITIALIZED'] % entity_type)
        context.exit()
예제 #8
0
 def test_paths(self):
     config = config_load()
     self.assertTrue(len(get_index_path(config)) > 0)
     self.assertTrue(len(get_objects_path(config)) > 0)
     self.assertTrue(len(get_cache_path(config)) > 0)
     self.assertTrue(len(get_metadata_path(config)) > 0)
     self.assertTrue('.ml-git' in get_refs_path(config))
예제 #9
0
 def init(self):
     try:
         metadata_path = get_metadata_path(self.__config)
         m = Metadata('', metadata_path, self.__config, self.__repo_type)
         m.init()
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
예제 #10
0
def get_last_entity_version(entity_type, entity_name):
    config = merged_config_load()
    metadata_path = get_metadata_path(config, entity_type)
    metadata = Metadata(entity_name, metadata_path, config, entity_type)
    if not metadata.check_exists():
        log.error(output_messages['ERROR_NOT_INITIALIZED'] % entity_type)
        return
    last_version = metadata.get_last_tag_version(entity_name)
    return last_version + 1
예제 #11
0
    def _checkout_ref(self, ref=None):
        repo_type = self.__repo_type
        metadata_path = get_metadata_path(self.__config, repo_type)
        m = Metadata('', metadata_path, self.__config, repo_type)

        if ref is None:
            ref = m.get_default_branch()

        m.checkout(ref)
예제 #12
0
 def repo_remote_add(self, repo_type, mlgit_remote, global_conf=False):
     try:
         remote_add(repo_type, mlgit_remote, global_conf)
         self.__config = config_load()
         metadata_path = get_metadata_path(self.__config)
         m = Metadata('', metadata_path, self.__config, self.__repo_type)
         m.remote_set_url(mlgit_remote)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
예제 #13
0
 def list_tag(self, spec):
     repo_type = self.__repo_type
     try:
         metadata_path = get_metadata_path(self.__config, repo_type)
         m = Metadata(spec, metadata_path, self.__config, repo_type)
         for tag in m.list_tags(spec):
             print(tag)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
예제 #14
0
 def repo_remote_del(self, global_conf=False):
     try:
         metadata_path = get_metadata_path(self.__config)
         metadata = Metadata('', metadata_path, self.__config,
                             self.__repo_type)
         if metadata.delete_git_reference():
             remote_del(self.__repo_type, global_conf)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
예제 #15
0
    def push(self, spec, retry=2, clear_on_fail=False):
        repo_type = self.__repo_type
        try:
            objects_path = get_objects_path(self.__config, repo_type)
            metadata_path = get_metadata_path(self.__config, repo_type)
            refs_path = get_refs_path(self.__config, repo_type)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return

        met = Metadata(spec, metadata_path, self.__config, repo_type)
        fields = met.git_user_config()
        if None in fields.values():
            log.error(
                'Your name and email address need to be configured in git. '
                'Please see the commands below:',
                class_name=REPOSITORY_CLASS_NAME)

            log.error('git config --global user.name \'Your Name\'',
                      class_name=REPOSITORY_CLASS_NAME)
            log.error('git config --global user.email [email protected]',
                      class_name=REPOSITORY_CLASS_NAME)
            return
        if met.fetch() is False:
            return

        ref = Refs(refs_path, spec, repo_type)
        tag, sha = ref.branch()
        categories_path = get_path_with_categories(tag)

        spec_path, spec_file = None, None
        try:
            spec_path, spec_file = search_spec_file(self.__repo_type, spec,
                                                    categories_path)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)

        if spec_path is None:
            return

        full_spec_path = os.path.join(spec_path, spec_file)

        repo = LocalRepository(self.__config, objects_path, repo_type)
        ret = repo.push(objects_path, full_spec_path, retry, clear_on_fail)

        # ensure first we're on master !
        met.checkout()
        if ret == 0:
            # push metadata spec to LocalRepository git repository
            try:
                met.push()
            except Exception as e:
                log.error(e, class_name=REPOSITORY_CLASS_NAME)
                return
            MultihashFS(objects_path).reset_log()
예제 #16
0
 def _fetch(self, tag, samples, retries=2, bare=False):
     repo_type = self.__repo_type
     try:
         objects_path = get_objects_path(self.__config, repo_type)
         metadata_path = get_metadata_path(self.__config, repo_type)
         # check if no data left untracked/uncommitted. othrewise, stop.
         local_rep = LocalRepository(self.__config, objects_path, repo_type)
         return local_rep.fetch(metadata_path, tag, samples, retries, bare)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
예제 #17
0
 def update(self):
     repo_type = self.__repo_type
     try:
         metadata_path = get_metadata_path(self.__config, repo_type)
         m = Metadata('', metadata_path, self.__config, repo_type)
         m.update()
     except GitError as error:
         log.error(
             'Could not update metadata. Check your remote configuration. %s'
             % error.stderr,
             class_name=REPOSITORY_CLASS_NAME)
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
예제 #18
0
 def _initialize_repository_on_the_fly(self):
     if os.path.exists(get_global_config_path()):
         log.info('Initializing the project with global settings',
                  class_name=REPOSITORY_CLASS_NAME)
         init_mlgit()
         save_global_config_in_local()
         metadata_path = get_metadata_path(self.__config)
         if not os.path.exists(metadata_path):
             Metadata('', metadata_path, self.__config,
                      self.__repo_type).init()
         return metadata_path
     raise RootPathException(
         'You are not in an initialized ml-git repository and do not have a global configuration.'
     )
예제 #19
0
    def tag(self, spec, usr_tag):
        repo_type = self.__repo_type
        try:
            metadata_path = get_metadata_path(self.__config, repo_type)
            refs_path = get_refs_path(self.__config, repo_type)
            r = Refs(refs_path, spec, repo_type)
            curtag, sha = r.head()
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return False

        if curtag is None:
            log.error('No current tag for [%s]. commit first.' % spec,
                      class_name=REPOSITORY_CLASS_NAME)
            return False
        utag = UsrTag(curtag, usr_tag)

        # Check if usrtag exists before creating it
        log.debug('Check if tag [%s] already exists' % utag,
                  class_name=REPOSITORY_CLASS_NAME)
        m = Metadata(spec, metadata_path, self.__config, repo_type)
        if m._usrtag_exists(utag) is True:
            log.error('Tag [%s] already exists.' % utag,
                      class_name=REPOSITORY_CLASS_NAME)
            return False

        # ensure metadata repository is at the current tag/sha version
        m = Metadata('', metadata_path, self.__config, repo_type)
        m.checkout(curtag)

        # TODO: format to something that could be used for a checkout:
        # format: _._user_.._ + curtag + _.._ + usrtag
        # at checkout with usrtag look for pattern _._ then find usrtag in the list (split on '_.._')
        # adds usrtag to the metadata repository

        m = Metadata(spec, metadata_path, self.__config, repo_type)
        try:
            m.tag_add(utag)
        except Exception as e:

            match = re.search("stderr: 'fatal:(.*)'$", e.stderr)
            err = match.group(1)
            log.error(err, class_name=REPOSITORY_CLASS_NAME)
            return
        log.info('Create Tag Successfull', class_name=REPOSITORY_CLASS_NAME)
        # checkout at metadata repository at master version
        m.checkout()
        return True
예제 #20
0
 def list(self):
     repo_type = self.__repo_type
     try:
         metadata_path = get_metadata_path(self.__config, repo_type)
         m = Metadata('', metadata_path, self.__config, repo_type)
         if not m.check_exists():
             raise RuntimeError('The %s doesn\'t have been initialized.' %
                                self.__repo_type)
         m.checkout()
         m.list(title='ML ' + repo_type)
     except GitError as g:
         error_message = g.stderr
         if 'did not match any file(s) known' in error_message:
             error_message = 'You don\'t have any entity being managed.'
         log.error(error_message, class_name=REPOSITORY_CLASS_NAME)
         return
     except Exception as e:
         log.error(e, class_name=REPOSITORY_CLASS_NAME)
         return
예제 #21
0
    def fetch_tag(self, tag, samples, retries=2):
        repo_type = self.__repo_type
        try:
            objects_path = get_objects_path(self.__config, repo_type)
            metadata_path = get_metadata_path(self.__config, repo_type)
            m = Metadata('', metadata_path, self.__config, repo_type)
            m.checkout(tag)

            fetch_success = self._fetch(tag, samples, retries)

            if not fetch_success:
                objs = Objects('', objects_path)
                objs.fsck(remove_corrupted=True)
                m.checkout()
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return

        # restore to master/head
        self._checkout_ref()
예제 #22
0
    def show(self, spec):
        repo_type = self.__repo_type
        try:
            metadata_path = get_metadata_path(self.__config, repo_type)
            refs_path = get_refs_path(self.__config, repo_type)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return
        r = Refs(refs_path, spec, repo_type)
        tag, sha = r.head()
        if tag is None:
            log.info('No HEAD for [%s]' % spec,
                     class_name=LOCAL_REPOSITORY_CLASS_NAME)
            return

        m = Metadata('', metadata_path, self.__config, repo_type)

        m.checkout(tag)

        m.show(spec)

        m.checkout()
예제 #23
0
    def __init__(self, config, repo_type=EntityType.MODELS.value):
        self.path = get_metadata_path(config, repo_type)
        self.git = config[repo_type]['git']

        super(MetadataManager, self).__init__(self.git, self.path, repo_type)
예제 #24
0
    def reset(self, spec, reset_type, head):
        log.info(output_messages['INFO_INITIALIZING_RESET'] %
                 (reset_type, head),
                 class_name=REPOSITORY_CLASS_NAME)
        if (reset_type == '--soft'
                or reset_type == '--mixed') and head == HEAD:
            return
        try:
            repo_type = self.__repo_type
            metadata_path = get_metadata_path(self.__config, repo_type)
            index_path = get_index_path(self.__config, repo_type)
            refs_path = get_refs_path(self.__config, repo_type)
            object_path = get_objects_path(self.__config, repo_type)
            met = Metadata(spec, metadata_path, self.__config, repo_type)
            ref = Refs(refs_path, spec, repo_type)
            idx = MultihashIndex(spec, index_path, object_path)
            fidx = FullIndex(spec, index_path)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return

        # get tag before reset
        tag = met.get_current_tag()
        categories_path = get_path_with_categories(str(tag))
        # current manifest file before reset
        manifest_path = os.path.join(metadata_path, categories_path, spec,
                                     MANIFEST_FILE)
        _manifest = Manifest(manifest_path).load()

        if head == HEAD_1:  # HEAD~1
            try:
                # reset the repo
                met.reset()
            except Exception:
                return

        # get tag after reset
        tag_after_reset = met.get_current_tag()
        sha = met.sha_from_tag(tag_after_reset)

        # update ml-git ref HEAD
        ref.update_head(str(tag_after_reset), sha)

        # # get path to reset workspace in case of --hard
        path, file = None, None
        try:
            path, file = search_spec_file(self.__repo_type, spec,
                                          categories_path)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)

        if reset_type == '--hard' and path is None:
            return

        # get manifest from metadata after reset
        _manifest_changed = Manifest(manifest_path)

        hash_files, file_names = _manifest_changed.get_diff(_manifest)
        idx_mf = idx.get_index().load()

        if reset_type == '--soft':
            # add in index/metadata/<entity-name>/MANIFEST
            idx.update_index_manifest(idx_mf)
            idx.update_index_manifest(hash_files)
            fidx.update_index_status(file_names, Status.a.name)

        else:  # --hard or --mixed
            # remove hash from index/hashsh/store.log
            file_names.update(*idx_mf.values())
            objs = MultihashFS(index_path)
            for key_hash in hash_files:
                objs.remove_hash(key_hash)
            idx.remove_manifest()
            fidx.remove_from_index_yaml(file_names)
            fidx.remove_uncommitted()

        if reset_type == '--hard':  # reset workspace
            remove_from_workspace(file_names, path, spec)
예제 #25
0
    def __init__(self, config, type='model'):
        self.path = get_metadata_path(config, type)
        self.git = config[type]['git']

        super(MetadataManager, self).__init__(self.git, self.path)
예제 #26
0
    def add(self, spec, file_path, bump_version=False, run_fsck=False):
        repo_type = self.__repo_type

        is_shared_objects = 'objects_path' in self.__config[repo_type]
        is_shared_cache = 'cache_path' in self.__config[repo_type]

        if not validate_config_spec_hash(self.__config):
            log.error(
                '.ml-git/config.yaml invalid. It should look something like this:\n%s'
                % get_yaml_str(
                    get_sample_config_spec('somebucket', 'someprofile',
                                           'someregion')),
                class_name=REPOSITORY_CLASS_NAME)
            return None

        path, file = None, None
        try:

            refs_path = get_refs_path(self.__config, repo_type)
            index_path = get_index_path(self.__config, repo_type)
            metadata_path = get_metadata_path(self.__config, repo_type)
            cache_path = get_cache_path(self.__config, repo_type)
            objects_path = get_objects_path(self.__config, repo_type)
            repo = LocalRepository(self.__config, objects_path, repo_type)
            mutability, check_mutability = repo.get_mutability_from_spec(
                spec, repo_type)
            sampling_flag = os.path.exists(
                os.path.join(index_path, 'metadata', spec, 'sampling'))
            if sampling_flag:
                log.error(
                    'You cannot add new data to an entity that is based on a checkout with the --sampling option.',
                    class_name=REPOSITORY_CLASS_NAME)
                return

            if not mutability:
                return

            if not check_mutability:
                log.error('Spec mutability cannot be changed.',
                          class_name=REPOSITORY_CLASS_NAME)
                return

            if not self._has_new_data(repo, spec):
                return None

            ref = Refs(refs_path, spec, repo_type)
            tag, sha = ref.branch()

            categories_path = get_path_with_categories(tag)

            path, file = search_spec_file(self.__repo_type, spec,
                                          categories_path)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return

        if path is None:
            return
        spec_path = os.path.join(path, file)
        if not self._is_spec_valid(spec_path):
            return None

        # Check tag before anything to avoid creating unstable state
        log.debug('Repository: check if tag already exists',
                  class_name=REPOSITORY_CLASS_NAME)

        m = Metadata(spec, metadata_path, self.__config, repo_type)

        if not m.check_exists():
            log.error('The %s has not been initialized' % self.__repo_type,
                      class_name=REPOSITORY_CLASS_NAME)
            return

        try:
            m.update()
        except Exception:
            pass

        # get version of current manifest file
        manifest = self._get_current_manifest_file(m, tag)

        try:
            # adds chunks to ml-git Index
            log.info('%s adding path [%s] to ml-git index' % (repo_type, path),
                     class_name=REPOSITORY_CLASS_NAME)
            with change_mask_for_routine(is_shared_objects):
                idx = MultihashIndex(spec, index_path, objects_path,
                                     mutability, cache_path)
                idx.add(path, manifest, file_path)

            # create hard links in ml-git Cache
            self.create_hard_links_in_cache(cache_path, index_path,
                                            is_shared_cache, mutability, path,
                                            spec)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return None

        if bump_version and not increment_version_in_spec(
                spec_path, self.__repo_type):
            return None

        idx.add_metadata(path, file)

        self._check_corrupted_files(spec, repo)

        # Run file check
        if run_fsck:
            self.fsck()
예제 #27
0
    def _checkout(self, tag, samples, options):
        dataset = options['with_dataset']
        labels = options['with_labels']
        retries = options['retry']
        force_get = options['force']
        bare = options['bare']
        version = options['version']
        repo_type = self.__repo_type
        try:
            cache_path = get_cache_path(self.__config, repo_type)
            metadata_path = get_metadata_path(self.__config, repo_type)
            objects_path = get_objects_path(self.__config, repo_type)
            refs_path = get_refs_path(self.__config, repo_type)

            if not re.search(RGX_TAG_FORMAT, tag):
                metadata_path = get_metadata_path(self.__config, repo_type)
                metadata = Metadata(tag, metadata_path, self.__config,
                                    repo_type)
                tag = metadata.get_tag(tag, version)
                if not tag:
                    return None, None
            elif not self._tag_exists(tag):
                return None, None
            categories_path, spec_name, _ = spec_parse(tag)
            root_path = get_root_path()
            ws_path = os.path.join(root_path,
                                   os.sep.join([repo_type, categories_path]))
        except Exception as e:
            log.error(e, class_name=LOCAL_REPOSITORY_CLASS_NAME)
            return None, None

        ref = Refs(refs_path, spec_name, repo_type)
        cur_tag, _ = ref.branch()

        if cur_tag == tag:
            log.info('already at tag [%s]' % tag,
                     class_name=REPOSITORY_CLASS_NAME)
            return None, None

        local_rep = LocalRepository(self.__config, objects_path, repo_type)
        # check if no data left untracked/uncommitted. otherwise, stop.
        if not force_get and local_rep.exist_local_changes(spec_name) is True:
            return None, None

        try:
            self._checkout_ref(tag)
        except Exception:
            log.error('Unable to checkout to %s' % tag,
                      class_name=REPOSITORY_CLASS_NAME)
            return None, None

        dataset_tag, labels_tag = self._get_related_tags(
            categories_path, dataset, labels, metadata_path, repo_type,
            spec_name)

        fetch_success = self._fetch(tag, samples, retries, bare)
        if not fetch_success:
            objs = Objects('', objects_path)
            objs.fsck(remove_corrupted=True)
            self._checkout_ref()
            return None, None
        ensure_path_exists(ws_path)

        try:
            spec_index_path = os.path.join(
                get_index_metadata_path(self.__config, repo_type), spec_name)
        except Exception:
            return
        self._delete_spec_and_readme(spec_index_path, spec_name)

        try:
            r = LocalRepository(self.__config, objects_path, repo_type)
            r.checkout(cache_path, metadata_path, ws_path, tag, samples, bare)
        except OSError as e:
            self._checkout_ref()
            if e.errno == errno.ENOSPC:
                log.error(
                    'There is not enough space in the disk. Remove some files and try again.',
                    class_name=REPOSITORY_CLASS_NAME)
            else:
                log.error(
                    'An error occurred while creating the files into workspace: %s \n.'
                    % e,
                    class_name=REPOSITORY_CLASS_NAME)
                return None, None
        except Exception as e:
            self._checkout_ref()
            log.error(
                'An error occurred while creating the files into workspace: %s \n.'
                % e,
                class_name=REPOSITORY_CLASS_NAME)
            return None, None

        m = Metadata('', metadata_path, self.__config, repo_type)
        sha = m.sha_from_tag(tag)
        ref.update_head(tag, sha)

        # restore to master/head
        self._checkout_ref()
        return dataset_tag, labels_tag
예제 #28
0
    def _checkout(self,
                  tag,
                  samples,
                  retries=2,
                  force_get=False,
                  dataset=False,
                  labels=False,
                  bare=False):
        repo_type = self.__repo_type
        try:
            cache_path = get_cache_path(self.__config, repo_type)
            metadata_path = get_metadata_path(self.__config, repo_type)
            objects_path = get_objects_path(self.__config, repo_type)
            refs_path = get_refs_path(self.__config, repo_type)
            # find out actual workspace path to save data
            if not self._tag_exists(tag):
                return None, None
            categories_path, spec_name, _ = spec_parse(tag)
            dataset_tag = None
            labels_tag = None
            root_path = get_root_path()
            ws_path = os.path.join(root_path,
                                   os.sep.join([repo_type, categories_path]))
            ensure_path_exists(ws_path)
        except Exception as e:
            log.error(e, class_name=LOCAL_REPOSITORY_CLASS_NAME)
            return None, None

        ref = Refs(refs_path, spec_name, repo_type)
        cur_tag, _ = ref.branch()

        if cur_tag == tag:
            log.info('already at tag [%s]' % tag,
                     class_name=REPOSITORY_CLASS_NAME)
            return None, None

        local_rep = LocalRepository(self.__config, objects_path, repo_type)
        # check if no data left untracked/uncommitted. otherwise, stop.
        if not force_get and local_rep.exist_local_changes(spec_name) is True:
            return None, None

        try:
            self._checkout_ref(tag)
        except Exception:
            log.error('Unable to checkout to %s' % tag,
                      class_name=REPOSITORY_CLASS_NAME)
            return None, None

        spec_path = os.path.join(metadata_path, categories_path,
                                 spec_name + '.spec')

        if dataset is True:
            dataset_tag = get_entity_tag(spec_path, repo_type, 'dataset')
        if labels is True:
            labels_tag = get_entity_tag(spec_path, repo_type, 'labels')

        fetch_success = self._fetch(tag, samples, retries, bare)

        if not fetch_success:
            objs = Objects('', objects_path)
            objs.fsck(remove_corrupted=True)
            self._checkout_ref('master')
            return None, None

        try:
            spec_index_path = os.path.join(
                get_index_metadata_path(self.__config, repo_type), spec_name)
        except Exception:
            return
        if os.path.exists(spec_index_path):
            if os.path.exists(
                    os.path.join(spec_index_path, spec_name + '.spec')):
                os.unlink(os.path.join(spec_index_path, spec_name + '.spec'))
            if os.path.exists(os.path.join(spec_index_path, 'README.md')):
                os.unlink(os.path.join(spec_index_path, 'README.md'))

        try:
            r = LocalRepository(self.__config, objects_path, repo_type)
            r.checkout(cache_path, metadata_path, objects_path, ws_path, tag,
                       samples, bare)
        except OSError as e:
            self._checkout_ref('master')
            if e.errno == errno.ENOSPC:
                log.error(
                    'There is not enough space in the disk. Remove some files and try again.',
                    class_name=REPOSITORY_CLASS_NAME)
            else:
                log.error(
                    'An error occurred while creating the files into workspace: %s \n.'
                    % e,
                    class_name=REPOSITORY_CLASS_NAME)
                return None, None
        except Exception as e:
            self._checkout_ref('master')
            log.error(
                'An error occurred while creating the files into workspace: %s \n.'
                % e,
                class_name=REPOSITORY_CLASS_NAME)
            return None, None

        m = Metadata('', metadata_path, self.__config, repo_type)
        sha = m.sha_from_tag(tag)
        ref.update_head(tag, sha)

        # restore to master/head
        self._checkout_ref('master')
        return dataset_tag, labels_tag
예제 #29
0
 def clone_config(self, url, folder=None, track=False):
     if clone_config_repository(url, folder, track):
         self.__config = config_load()
         m = Metadata('', get_metadata_path(self.__config), self.__config)
         m.clone_config_repo()
예제 #30
0
    def commit(self, spec, specs, version=None, run_fsck=False, msg=None):
        # Move chunks from index to .ml-git/objects
        repo_type = self.__repo_type
        try:
            index_path = get_index_path(self.__config, repo_type)
            objects_path = get_objects_path(self.__config, repo_type)
            metadata_path = get_metadata_path(self.__config, repo_type)
            refs_path = get_refs_path(self.__config, repo_type)
            repo = LocalRepository(self.__config, objects_path, repo_type)
            mutability, check_mutability = repo.get_mutability_from_spec(
                spec, repo_type)

            if not mutability:
                return

            if not check_mutability:
                log.error('Spec mutability cannot be changed.',
                          class_name=REPOSITORY_CLASS_NAME)
                return
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)
            return

        ref = Refs(refs_path, spec, repo_type)

        tag, sha = ref.branch()
        categories_path = get_path_with_categories(tag)
        manifest_path = os.path.join(metadata_path, categories_path, spec,
                                     MANIFEST_FILE)
        path, file = None, None
        try:
            path, file = search_spec_file(self.__repo_type, spec,
                                          categories_path)
        except Exception as e:
            log.error(e, class_name=REPOSITORY_CLASS_NAME)

        if path is None:
            return None, None, None

        spec_path = os.path.join(path, file)
        idx = MultihashIndex(spec, index_path, objects_path)

        if version:
            set_version_in_spec(version, spec_path, self.__repo_type)
            idx.add_metadata(path, file)

        # Check tag before anything to avoid creating unstable state
        log.debug('Check if tag already exists',
                  class_name=REPOSITORY_CLASS_NAME)
        m = Metadata(spec, metadata_path, self.__config, repo_type)

        if not m.check_exists():
            log.error('The %s has not been initialized' % self.__repo_type,
                      class_name=REPOSITORY_CLASS_NAME)
            return

        full_metadata_path, categories_sub_path, metadata = m.tag_exists(
            index_path)
        if metadata is None:
            return None

        log.debug('%s -> %s' % (index_path, objects_path),
                  class_name=REPOSITORY_CLASS_NAME)
        # commit objects in index to ml-git objects
        o = Objects(spec, objects_path)
        changed_files, deleted_files = o.commit_index(index_path, path)

        bare_mode = os.path.exists(
            os.path.join(index_path, 'metadata', spec, 'bare'))

        if not bare_mode:
            manifest = m.get_metadata_manifest(manifest_path)
            self._remove_deleted_files(idx, index_path, m, manifest, spec,
                                       deleted_files)
            m.remove_files_added_after_base_tag(manifest, path)
        else:
            tag, _ = ref.branch()
            self._checkout_ref(tag)
        # update metadata spec & README.md
        # option --dataset-spec --labels-spec
        tag, sha = m.commit_metadata(index_path, specs, msg, changed_files,
                                     mutability, path)

        # update ml-git ref spec HEAD == to new SHA-1 / tag
        if tag is None:
            return None
        ref = Refs(refs_path, spec, repo_type)
        ref.update_head(tag, sha)

        # Run file check
        if run_fsck:
            self.fsck()

        return tag