def log(self, spec, stat=False, fullstat=False): try: repo_type = self.__repo_type metadata_path = get_metadata_path(self.__config, repo_type) metadata = Metadata(spec, metadata_path, self.__config, repo_type) index_path = get_index_path(self.__config, repo_type) log_info = metadata.get_log_info(spec, fullstat) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return fidx = FullIndex(spec, index_path) if stat or fullstat: workspace_size = fidx.get_total_size() amount_message = 'Total of files: %s' % fidx.get_total_count() size_message = 'Workspace size: %s' % humanize.naturalsize( workspace_size) workspace_info = '------------------------------------------------- \n{}\t{}' \ .format(amount_message, size_message) log_info = '{}\n{}'.format(log_info, workspace_info) log.info(log_info, class_name=REPOSITORY_CLASS_NAME)
def metadata_exists(self, entity): self.__repo_type = entity entity_metadata_path = get_metadata_path(self.__config, self.__repo_type) metadata = Metadata('', entity_metadata_path, self.__config, self.__repo_type) return metadata.check_exists()
def checkout(self, tag, samples, options): try: metadata_path = get_metadata_path(self.__config) except RootPathException as e: log.warn(e, class_name=REPOSITORY_CLASS_NAME) metadata_path = self._initialize_repository_on_the_fly() dt_tag, lb_tag = self._checkout(tag, samples, options) options['with_dataset'] = False options['with_labels'] = False if dt_tag is not None: try: self.__repo_type = 'dataset' m = Metadata('', metadata_path, self.__config, self.__repo_type) log.info('Initializing related dataset download', class_name=REPOSITORY_CLASS_NAME) if not m.check_exists(): m.init() self._checkout(dt_tag, samples, options) except Exception as e: log.error('LocalRepository: [%s]' % e, class_name=REPOSITORY_CLASS_NAME) if lb_tag is not None: try: self.__repo_type = 'labels' m = Metadata('', metadata_path, self.__config, self.__repo_type) log.info('Initializing related labels download', class_name=REPOSITORY_CLASS_NAME) if not m.check_exists(): m.init() self._checkout(lb_tag, samples, options) except Exception as e: log.error('LocalRepository: [%s]' % e, class_name=REPOSITORY_CLASS_NAME)
def export(self, bucket, tag, retry): try: categories_path, spec_name, _ = spec_parse(tag) get_root_path() if not self._tag_exists(tag): return None, None except InvalidGitRepositoryError: log.error('You are not in an initialized ml-git repository.', class_name=LOCAL_REPOSITORY_CLASS_NAME) return None, None except Exception as e: log.error(e, class_name=LOCAL_REPOSITORY_CLASS_NAME) return None, None try: self._checkout_ref(tag) except Exception: log.error('Unable to checkout to %s' % tag, class_name=REPOSITORY_CLASS_NAME) return None, None local = LocalRepository( self.__config, get_objects_path(self.__config, self.__repo_type), self.__repo_type) local.export_tag(get_metadata_path(self.__config, self.__repo_type), tag, bucket, retry) self._checkout_ref()
def remote_fsck(self, spec, retries=2, thorough=False, paranoid=False): repo_type = self.__repo_type try: metadata_path = get_metadata_path(self.__config, repo_type) objects_path = get_objects_path(self.__config, repo_type) refs_path = get_refs_path(self.__config, repo_type) ref = Refs(refs_path, spec, repo_type) tag, sha = ref.branch() categories_path = get_path_with_categories(tag) self._checkout_ref(tag) spec_path, spec_file = search_spec_file(self.__repo_type, spec, categories_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return if spec_path is None: return full_spec_path = os.path.join(spec_path, spec_file) r = LocalRepository(self.__config, objects_path, repo_type) r.remote_fsck(metadata_path, tag, full_spec_path, retries, thorough, paranoid) # ensure first we're on master ! self._checkout_ref()
def _checkout_ref(self, ref): repo_type = self.__repo_type metadata_path = get_metadata_path(self.__config, repo_type) # checkout m = Metadata('', metadata_path, self.__config, repo_type) m.checkout(ref)
def check_initialized_entity(context, entity_type, entity_name): config = merged_config_load() metadata_path = get_metadata_path(config, entity_type) metadata = Metadata(entity_name, metadata_path, config, entity_type) if not metadata.check_exists(): log.error(output_messages['ERROR_NOT_INITIALIZED'] % entity_type) context.exit()
def test_paths(self): config = config_load() self.assertTrue(len(get_index_path(config)) > 0) self.assertTrue(len(get_objects_path(config)) > 0) self.assertTrue(len(get_cache_path(config)) > 0) self.assertTrue(len(get_metadata_path(config)) > 0) self.assertTrue('.ml-git' in get_refs_path(config))
def init(self): try: metadata_path = get_metadata_path(self.__config) m = Metadata('', metadata_path, self.__config, self.__repo_type) m.init() except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return
def get_last_entity_version(entity_type, entity_name): config = merged_config_load() metadata_path = get_metadata_path(config, entity_type) metadata = Metadata(entity_name, metadata_path, config, entity_type) if not metadata.check_exists(): log.error(output_messages['ERROR_NOT_INITIALIZED'] % entity_type) return last_version = metadata.get_last_tag_version(entity_name) return last_version + 1
def _checkout_ref(self, ref=None): repo_type = self.__repo_type metadata_path = get_metadata_path(self.__config, repo_type) m = Metadata('', metadata_path, self.__config, repo_type) if ref is None: ref = m.get_default_branch() m.checkout(ref)
def repo_remote_add(self, repo_type, mlgit_remote, global_conf=False): try: remote_add(repo_type, mlgit_remote, global_conf) self.__config = config_load() metadata_path = get_metadata_path(self.__config) m = Metadata('', metadata_path, self.__config, self.__repo_type) m.remote_set_url(mlgit_remote) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return
def list_tag(self, spec): repo_type = self.__repo_type try: metadata_path = get_metadata_path(self.__config, repo_type) m = Metadata(spec, metadata_path, self.__config, repo_type) for tag in m.list_tags(spec): print(tag) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return
def repo_remote_del(self, global_conf=False): try: metadata_path = get_metadata_path(self.__config) metadata = Metadata('', metadata_path, self.__config, self.__repo_type) if metadata.delete_git_reference(): remote_del(self.__repo_type, global_conf) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return
def push(self, spec, retry=2, clear_on_fail=False): repo_type = self.__repo_type try: objects_path = get_objects_path(self.__config, repo_type) metadata_path = get_metadata_path(self.__config, repo_type) refs_path = get_refs_path(self.__config, repo_type) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return met = Metadata(spec, metadata_path, self.__config, repo_type) fields = met.git_user_config() if None in fields.values(): log.error( 'Your name and email address need to be configured in git. ' 'Please see the commands below:', class_name=REPOSITORY_CLASS_NAME) log.error('git config --global user.name \'Your Name\'', class_name=REPOSITORY_CLASS_NAME) log.error('git config --global user.email [email protected]', class_name=REPOSITORY_CLASS_NAME) return if met.fetch() is False: return ref = Refs(refs_path, spec, repo_type) tag, sha = ref.branch() categories_path = get_path_with_categories(tag) spec_path, spec_file = None, None try: spec_path, spec_file = search_spec_file(self.__repo_type, spec, categories_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) if spec_path is None: return full_spec_path = os.path.join(spec_path, spec_file) repo = LocalRepository(self.__config, objects_path, repo_type) ret = repo.push(objects_path, full_spec_path, retry, clear_on_fail) # ensure first we're on master ! met.checkout() if ret == 0: # push metadata spec to LocalRepository git repository try: met.push() except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return MultihashFS(objects_path).reset_log()
def _fetch(self, tag, samples, retries=2, bare=False): repo_type = self.__repo_type try: objects_path = get_objects_path(self.__config, repo_type) metadata_path = get_metadata_path(self.__config, repo_type) # check if no data left untracked/uncommitted. othrewise, stop. local_rep = LocalRepository(self.__config, objects_path, repo_type) return local_rep.fetch(metadata_path, tag, samples, retries, bare) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return
def update(self): repo_type = self.__repo_type try: metadata_path = get_metadata_path(self.__config, repo_type) m = Metadata('', metadata_path, self.__config, repo_type) m.update() except GitError as error: log.error( 'Could not update metadata. Check your remote configuration. %s' % error.stderr, class_name=REPOSITORY_CLASS_NAME) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME)
def _initialize_repository_on_the_fly(self): if os.path.exists(get_global_config_path()): log.info('Initializing the project with global settings', class_name=REPOSITORY_CLASS_NAME) init_mlgit() save_global_config_in_local() metadata_path = get_metadata_path(self.__config) if not os.path.exists(metadata_path): Metadata('', metadata_path, self.__config, self.__repo_type).init() return metadata_path raise RootPathException( 'You are not in an initialized ml-git repository and do not have a global configuration.' )
def tag(self, spec, usr_tag): repo_type = self.__repo_type try: metadata_path = get_metadata_path(self.__config, repo_type) refs_path = get_refs_path(self.__config, repo_type) r = Refs(refs_path, spec, repo_type) curtag, sha = r.head() except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return False if curtag is None: log.error('No current tag for [%s]. commit first.' % spec, class_name=REPOSITORY_CLASS_NAME) return False utag = UsrTag(curtag, usr_tag) # Check if usrtag exists before creating it log.debug('Check if tag [%s] already exists' % utag, class_name=REPOSITORY_CLASS_NAME) m = Metadata(spec, metadata_path, self.__config, repo_type) if m._usrtag_exists(utag) is True: log.error('Tag [%s] already exists.' % utag, class_name=REPOSITORY_CLASS_NAME) return False # ensure metadata repository is at the current tag/sha version m = Metadata('', metadata_path, self.__config, repo_type) m.checkout(curtag) # TODO: format to something that could be used for a checkout: # format: _._user_.._ + curtag + _.._ + usrtag # at checkout with usrtag look for pattern _._ then find usrtag in the list (split on '_.._') # adds usrtag to the metadata repository m = Metadata(spec, metadata_path, self.__config, repo_type) try: m.tag_add(utag) except Exception as e: match = re.search("stderr: 'fatal:(.*)'$", e.stderr) err = match.group(1) log.error(err, class_name=REPOSITORY_CLASS_NAME) return log.info('Create Tag Successfull', class_name=REPOSITORY_CLASS_NAME) # checkout at metadata repository at master version m.checkout() return True
def list(self): repo_type = self.__repo_type try: metadata_path = get_metadata_path(self.__config, repo_type) m = Metadata('', metadata_path, self.__config, repo_type) if not m.check_exists(): raise RuntimeError('The %s doesn\'t have been initialized.' % self.__repo_type) m.checkout() m.list(title='ML ' + repo_type) except GitError as g: error_message = g.stderr if 'did not match any file(s) known' in error_message: error_message = 'You don\'t have any entity being managed.' log.error(error_message, class_name=REPOSITORY_CLASS_NAME) return except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return
def fetch_tag(self, tag, samples, retries=2): repo_type = self.__repo_type try: objects_path = get_objects_path(self.__config, repo_type) metadata_path = get_metadata_path(self.__config, repo_type) m = Metadata('', metadata_path, self.__config, repo_type) m.checkout(tag) fetch_success = self._fetch(tag, samples, retries) if not fetch_success: objs = Objects('', objects_path) objs.fsck(remove_corrupted=True) m.checkout() except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return # restore to master/head self._checkout_ref()
def show(self, spec): repo_type = self.__repo_type try: metadata_path = get_metadata_path(self.__config, repo_type) refs_path = get_refs_path(self.__config, repo_type) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return r = Refs(refs_path, spec, repo_type) tag, sha = r.head() if tag is None: log.info('No HEAD for [%s]' % spec, class_name=LOCAL_REPOSITORY_CLASS_NAME) return m = Metadata('', metadata_path, self.__config, repo_type) m.checkout(tag) m.show(spec) m.checkout()
def __init__(self, config, repo_type=EntityType.MODELS.value): self.path = get_metadata_path(config, repo_type) self.git = config[repo_type]['git'] super(MetadataManager, self).__init__(self.git, self.path, repo_type)
def reset(self, spec, reset_type, head): log.info(output_messages['INFO_INITIALIZING_RESET'] % (reset_type, head), class_name=REPOSITORY_CLASS_NAME) if (reset_type == '--soft' or reset_type == '--mixed') and head == HEAD: return try: repo_type = self.__repo_type metadata_path = get_metadata_path(self.__config, repo_type) index_path = get_index_path(self.__config, repo_type) refs_path = get_refs_path(self.__config, repo_type) object_path = get_objects_path(self.__config, repo_type) met = Metadata(spec, metadata_path, self.__config, repo_type) ref = Refs(refs_path, spec, repo_type) idx = MultihashIndex(spec, index_path, object_path) fidx = FullIndex(spec, index_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return # get tag before reset tag = met.get_current_tag() categories_path = get_path_with_categories(str(tag)) # current manifest file before reset manifest_path = os.path.join(metadata_path, categories_path, spec, MANIFEST_FILE) _manifest = Manifest(manifest_path).load() if head == HEAD_1: # HEAD~1 try: # reset the repo met.reset() except Exception: return # get tag after reset tag_after_reset = met.get_current_tag() sha = met.sha_from_tag(tag_after_reset) # update ml-git ref HEAD ref.update_head(str(tag_after_reset), sha) # # get path to reset workspace in case of --hard path, file = None, None try: path, file = search_spec_file(self.__repo_type, spec, categories_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) if reset_type == '--hard' and path is None: return # get manifest from metadata after reset _manifest_changed = Manifest(manifest_path) hash_files, file_names = _manifest_changed.get_diff(_manifest) idx_mf = idx.get_index().load() if reset_type == '--soft': # add in index/metadata/<entity-name>/MANIFEST idx.update_index_manifest(idx_mf) idx.update_index_manifest(hash_files) fidx.update_index_status(file_names, Status.a.name) else: # --hard or --mixed # remove hash from index/hashsh/store.log file_names.update(*idx_mf.values()) objs = MultihashFS(index_path) for key_hash in hash_files: objs.remove_hash(key_hash) idx.remove_manifest() fidx.remove_from_index_yaml(file_names) fidx.remove_uncommitted() if reset_type == '--hard': # reset workspace remove_from_workspace(file_names, path, spec)
def __init__(self, config, type='model'): self.path = get_metadata_path(config, type) self.git = config[type]['git'] super(MetadataManager, self).__init__(self.git, self.path)
def add(self, spec, file_path, bump_version=False, run_fsck=False): repo_type = self.__repo_type is_shared_objects = 'objects_path' in self.__config[repo_type] is_shared_cache = 'cache_path' in self.__config[repo_type] if not validate_config_spec_hash(self.__config): log.error( '.ml-git/config.yaml invalid. It should look something like this:\n%s' % get_yaml_str( get_sample_config_spec('somebucket', 'someprofile', 'someregion')), class_name=REPOSITORY_CLASS_NAME) return None path, file = None, None try: refs_path = get_refs_path(self.__config, repo_type) index_path = get_index_path(self.__config, repo_type) metadata_path = get_metadata_path(self.__config, repo_type) cache_path = get_cache_path(self.__config, repo_type) objects_path = get_objects_path(self.__config, repo_type) repo = LocalRepository(self.__config, objects_path, repo_type) mutability, check_mutability = repo.get_mutability_from_spec( spec, repo_type) sampling_flag = os.path.exists( os.path.join(index_path, 'metadata', spec, 'sampling')) if sampling_flag: log.error( 'You cannot add new data to an entity that is based on a checkout with the --sampling option.', class_name=REPOSITORY_CLASS_NAME) return if not mutability: return if not check_mutability: log.error('Spec mutability cannot be changed.', class_name=REPOSITORY_CLASS_NAME) return if not self._has_new_data(repo, spec): return None ref = Refs(refs_path, spec, repo_type) tag, sha = ref.branch() categories_path = get_path_with_categories(tag) path, file = search_spec_file(self.__repo_type, spec, categories_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return if path is None: return spec_path = os.path.join(path, file) if not self._is_spec_valid(spec_path): return None # Check tag before anything to avoid creating unstable state log.debug('Repository: check if tag already exists', class_name=REPOSITORY_CLASS_NAME) m = Metadata(spec, metadata_path, self.__config, repo_type) if not m.check_exists(): log.error('The %s has not been initialized' % self.__repo_type, class_name=REPOSITORY_CLASS_NAME) return try: m.update() except Exception: pass # get version of current manifest file manifest = self._get_current_manifest_file(m, tag) try: # adds chunks to ml-git Index log.info('%s adding path [%s] to ml-git index' % (repo_type, path), class_name=REPOSITORY_CLASS_NAME) with change_mask_for_routine(is_shared_objects): idx = MultihashIndex(spec, index_path, objects_path, mutability, cache_path) idx.add(path, manifest, file_path) # create hard links in ml-git Cache self.create_hard_links_in_cache(cache_path, index_path, is_shared_cache, mutability, path, spec) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return None if bump_version and not increment_version_in_spec( spec_path, self.__repo_type): return None idx.add_metadata(path, file) self._check_corrupted_files(spec, repo) # Run file check if run_fsck: self.fsck()
def _checkout(self, tag, samples, options): dataset = options['with_dataset'] labels = options['with_labels'] retries = options['retry'] force_get = options['force'] bare = options['bare'] version = options['version'] repo_type = self.__repo_type try: cache_path = get_cache_path(self.__config, repo_type) metadata_path = get_metadata_path(self.__config, repo_type) objects_path = get_objects_path(self.__config, repo_type) refs_path = get_refs_path(self.__config, repo_type) if not re.search(RGX_TAG_FORMAT, tag): metadata_path = get_metadata_path(self.__config, repo_type) metadata = Metadata(tag, metadata_path, self.__config, repo_type) tag = metadata.get_tag(tag, version) if not tag: return None, None elif not self._tag_exists(tag): return None, None categories_path, spec_name, _ = spec_parse(tag) root_path = get_root_path() ws_path = os.path.join(root_path, os.sep.join([repo_type, categories_path])) except Exception as e: log.error(e, class_name=LOCAL_REPOSITORY_CLASS_NAME) return None, None ref = Refs(refs_path, spec_name, repo_type) cur_tag, _ = ref.branch() if cur_tag == tag: log.info('already at tag [%s]' % tag, class_name=REPOSITORY_CLASS_NAME) return None, None local_rep = LocalRepository(self.__config, objects_path, repo_type) # check if no data left untracked/uncommitted. otherwise, stop. if not force_get and local_rep.exist_local_changes(spec_name) is True: return None, None try: self._checkout_ref(tag) except Exception: log.error('Unable to checkout to %s' % tag, class_name=REPOSITORY_CLASS_NAME) return None, None dataset_tag, labels_tag = self._get_related_tags( categories_path, dataset, labels, metadata_path, repo_type, spec_name) fetch_success = self._fetch(tag, samples, retries, bare) if not fetch_success: objs = Objects('', objects_path) objs.fsck(remove_corrupted=True) self._checkout_ref() return None, None ensure_path_exists(ws_path) try: spec_index_path = os.path.join( get_index_metadata_path(self.__config, repo_type), spec_name) except Exception: return self._delete_spec_and_readme(spec_index_path, spec_name) try: r = LocalRepository(self.__config, objects_path, repo_type) r.checkout(cache_path, metadata_path, ws_path, tag, samples, bare) except OSError as e: self._checkout_ref() if e.errno == errno.ENOSPC: log.error( 'There is not enough space in the disk. Remove some files and try again.', class_name=REPOSITORY_CLASS_NAME) else: log.error( 'An error occurred while creating the files into workspace: %s \n.' % e, class_name=REPOSITORY_CLASS_NAME) return None, None except Exception as e: self._checkout_ref() log.error( 'An error occurred while creating the files into workspace: %s \n.' % e, class_name=REPOSITORY_CLASS_NAME) return None, None m = Metadata('', metadata_path, self.__config, repo_type) sha = m.sha_from_tag(tag) ref.update_head(tag, sha) # restore to master/head self._checkout_ref() return dataset_tag, labels_tag
def _checkout(self, tag, samples, retries=2, force_get=False, dataset=False, labels=False, bare=False): repo_type = self.__repo_type try: cache_path = get_cache_path(self.__config, repo_type) metadata_path = get_metadata_path(self.__config, repo_type) objects_path = get_objects_path(self.__config, repo_type) refs_path = get_refs_path(self.__config, repo_type) # find out actual workspace path to save data if not self._tag_exists(tag): return None, None categories_path, spec_name, _ = spec_parse(tag) dataset_tag = None labels_tag = None root_path = get_root_path() ws_path = os.path.join(root_path, os.sep.join([repo_type, categories_path])) ensure_path_exists(ws_path) except Exception as e: log.error(e, class_name=LOCAL_REPOSITORY_CLASS_NAME) return None, None ref = Refs(refs_path, spec_name, repo_type) cur_tag, _ = ref.branch() if cur_tag == tag: log.info('already at tag [%s]' % tag, class_name=REPOSITORY_CLASS_NAME) return None, None local_rep = LocalRepository(self.__config, objects_path, repo_type) # check if no data left untracked/uncommitted. otherwise, stop. if not force_get and local_rep.exist_local_changes(spec_name) is True: return None, None try: self._checkout_ref(tag) except Exception: log.error('Unable to checkout to %s' % tag, class_name=REPOSITORY_CLASS_NAME) return None, None spec_path = os.path.join(metadata_path, categories_path, spec_name + '.spec') if dataset is True: dataset_tag = get_entity_tag(spec_path, repo_type, 'dataset') if labels is True: labels_tag = get_entity_tag(spec_path, repo_type, 'labels') fetch_success = self._fetch(tag, samples, retries, bare) if not fetch_success: objs = Objects('', objects_path) objs.fsck(remove_corrupted=True) self._checkout_ref('master') return None, None try: spec_index_path = os.path.join( get_index_metadata_path(self.__config, repo_type), spec_name) except Exception: return if os.path.exists(spec_index_path): if os.path.exists( os.path.join(spec_index_path, spec_name + '.spec')): os.unlink(os.path.join(spec_index_path, spec_name + '.spec')) if os.path.exists(os.path.join(spec_index_path, 'README.md')): os.unlink(os.path.join(spec_index_path, 'README.md')) try: r = LocalRepository(self.__config, objects_path, repo_type) r.checkout(cache_path, metadata_path, objects_path, ws_path, tag, samples, bare) except OSError as e: self._checkout_ref('master') if e.errno == errno.ENOSPC: log.error( 'There is not enough space in the disk. Remove some files and try again.', class_name=REPOSITORY_CLASS_NAME) else: log.error( 'An error occurred while creating the files into workspace: %s \n.' % e, class_name=REPOSITORY_CLASS_NAME) return None, None except Exception as e: self._checkout_ref('master') log.error( 'An error occurred while creating the files into workspace: %s \n.' % e, class_name=REPOSITORY_CLASS_NAME) return None, None m = Metadata('', metadata_path, self.__config, repo_type) sha = m.sha_from_tag(tag) ref.update_head(tag, sha) # restore to master/head self._checkout_ref('master') return dataset_tag, labels_tag
def clone_config(self, url, folder=None, track=False): if clone_config_repository(url, folder, track): self.__config = config_load() m = Metadata('', get_metadata_path(self.__config), self.__config) m.clone_config_repo()
def commit(self, spec, specs, version=None, run_fsck=False, msg=None): # Move chunks from index to .ml-git/objects repo_type = self.__repo_type try: index_path = get_index_path(self.__config, repo_type) objects_path = get_objects_path(self.__config, repo_type) metadata_path = get_metadata_path(self.__config, repo_type) refs_path = get_refs_path(self.__config, repo_type) repo = LocalRepository(self.__config, objects_path, repo_type) mutability, check_mutability = repo.get_mutability_from_spec( spec, repo_type) if not mutability: return if not check_mutability: log.error('Spec mutability cannot be changed.', class_name=REPOSITORY_CLASS_NAME) return except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return ref = Refs(refs_path, spec, repo_type) tag, sha = ref.branch() categories_path = get_path_with_categories(tag) manifest_path = os.path.join(metadata_path, categories_path, spec, MANIFEST_FILE) path, file = None, None try: path, file = search_spec_file(self.__repo_type, spec, categories_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) if path is None: return None, None, None spec_path = os.path.join(path, file) idx = MultihashIndex(spec, index_path, objects_path) if version: set_version_in_spec(version, spec_path, self.__repo_type) idx.add_metadata(path, file) # Check tag before anything to avoid creating unstable state log.debug('Check if tag already exists', class_name=REPOSITORY_CLASS_NAME) m = Metadata(spec, metadata_path, self.__config, repo_type) if not m.check_exists(): log.error('The %s has not been initialized' % self.__repo_type, class_name=REPOSITORY_CLASS_NAME) return full_metadata_path, categories_sub_path, metadata = m.tag_exists( index_path) if metadata is None: return None log.debug('%s -> %s' % (index_path, objects_path), class_name=REPOSITORY_CLASS_NAME) # commit objects in index to ml-git objects o = Objects(spec, objects_path) changed_files, deleted_files = o.commit_index(index_path, path) bare_mode = os.path.exists( os.path.join(index_path, 'metadata', spec, 'bare')) if not bare_mode: manifest = m.get_metadata_manifest(manifest_path) self._remove_deleted_files(idx, index_path, m, manifest, spec, deleted_files) m.remove_files_added_after_base_tag(manifest, path) else: tag, _ = ref.branch() self._checkout_ref(tag) # update metadata spec & README.md # option --dataset-spec --labels-spec tag, sha = m.commit_metadata(index_path, specs, msg, changed_files, mutability, path) # update ml-git ref spec HEAD == to new SHA-1 / tag if tag is None: return None ref = Refs(refs_path, spec, repo_type) ref.update_head(tag, sha) # Run file check if run_fsck: self.fsck() return tag