def unlock_file(self, spec, file_path): repo_type = self.__repo_type if not validate_config_spec_hash(self.__config): log.error( '.ml-git/config.yaml invalid. It should look something like this:\n%s' % get_yaml_str( get_sample_config_spec('somebucket', 'someprofile', 'someregion')), class_name=REPOSITORY_CLASS_NAME) return None path, file = None, None try: refs_path = get_refs_path(self.__config, repo_type) objects_path = get_objects_path(self.__config, repo_type) index_path = get_index_path(self.__config, repo_type) cache_path = get_cache_path(self.__config, repo_type) ref = Refs(refs_path, spec, repo_type) tag, sha = ref.branch() categories_path = get_path_with_categories(tag) path, file = search_spec_file(self.__repo_type, spec, categories_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return if path is None: return spec_path = os.path.join(path, file) spec_file = yaml_load(spec_path) try: mutability = spec_file[repo_type]['mutability'] if mutability not in Mutability.list(): log.error('Invalid mutability type.', class_name=REPOSITORY_CLASS_NAME) return except Exception: log.info( 'The spec does not have the \'mutability\' property set. Default: strict.', class_name=REPOSITORY_CLASS_NAME) return if mutability != Mutability.STRICT.value: try: local = LocalRepository(self.__config, objects_path, repo_type) local.unlock_file(path, file_path, index_path, objects_path, spec, cache_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return else: log.error( 'You cannot use this command for this entity because mutability cannot be strict.', class_name=REPOSITORY_CLASS_NAME)
def test_get_yaml_str(self): self.assertEqual( self.yaml_obj_sample[STORAGE_CONFIG_KEY][S3H]['bucket_test'] ['aws-credentials']['profile'], 'profile_test') self.assertEqual( self.yaml_obj_sample[STORAGE_CONFIG_KEY][S3H]['bucket_test'] ['region'], 'region_test') self.assertEqual(get_yaml_str(self.yaml_obj_sample), self.yaml_str_sample)
def test_get_yaml_str(self): self.assertEqual( self.yaml_obj_sample['store']['s3h']['bucket_test'] ['aws-credentials']['profile'], 'profile_test') self.assertEqual( self.yaml_obj_sample['store']['s3h']['bucket_test']['region'], 'region_test') self.assertEqual(get_yaml_str(self.yaml_obj_sample), self.yaml_str_sample)
def metadata_print(metadata_file, spec_name): md = yaml_load(metadata_file) sections = ['dataset', 'model', 'labels'] for section in sections: if section in ['model', 'dataset', 'labels']: try: md[section] # 'hack' to ensure we don't print something useless # 'dataset' not present in 'model' and vice versa print('-- %s : %s --' % (section, spec_name)) except Exception: continue elif section not in ['model', 'dataset', 'labels']: print('-- %s --' % (section)) try: print(get_yaml_str(md[section])) except Exception: continue
def metadata_print(metadata_file, spec_name): md = yaml_load(metadata_file) sections = EntityType.to_list() for section in sections: spec_key = get_spec_key(section) if section in EntityType.to_list(): try: md[spec_key] # 'hack' to ensure we don't print something useless # 'dataset' not present in 'model' and vice versa print('-- %s : %s --' % (section, spec_name)) except Exception: continue elif section not in EntityType.to_list(): print('-- %s --' % (section)) try: print(get_yaml_str(md[spec_key])) except Exception: continue
def add(self, spec, file_path, bump_version=False, run_fsck=False): repo_type = self.__repo_type is_shared_objects = 'objects_path' in self.__config[repo_type] is_shared_cache = 'cache_path' in self.__config[repo_type] if not validate_config_spec_hash(self.__config): log.error( '.ml-git/config.yaml invalid. It should look something like this:\n%s' % get_yaml_str( get_sample_config_spec('somebucket', 'someprofile', 'someregion')), class_name=REPOSITORY_CLASS_NAME) return None path, file = None, None try: refs_path = get_refs_path(self.__config, repo_type) index_path = get_index_path(self.__config, repo_type) metadata_path = get_metadata_path(self.__config, repo_type) cache_path = get_cache_path(self.__config, repo_type) objects_path = get_objects_path(self.__config, repo_type) repo = LocalRepository(self.__config, objects_path, repo_type) mutability, check_mutability = repo.get_mutability_from_spec( spec, repo_type) sampling_flag = os.path.exists( os.path.join(index_path, 'metadata', spec, 'sampling')) if sampling_flag: log.error( 'You cannot add new data to an entity that is based on a checkout with the --sampling option.', class_name=REPOSITORY_CLASS_NAME) return if not mutability: return if not check_mutability: log.error('Spec mutability cannot be changed.', class_name=REPOSITORY_CLASS_NAME) return if not self._has_new_data(repo, spec): return None ref = Refs(refs_path, spec, repo_type) tag, sha = ref.branch() categories_path = get_path_with_categories(tag) path, file = search_spec_file(self.__repo_type, spec, categories_path) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return if path is None: return spec_path = os.path.join(path, file) if not self._is_spec_valid(spec_path): return None # Check tag before anything to avoid creating unstable state log.debug('Repository: check if tag already exists', class_name=REPOSITORY_CLASS_NAME) m = Metadata(spec, metadata_path, self.__config, repo_type) if not m.check_exists(): log.error('The %s has not been initialized' % self.__repo_type, class_name=REPOSITORY_CLASS_NAME) return try: m.update() except Exception: pass # get version of current manifest file manifest = self._get_current_manifest_file(m, tag) try: # adds chunks to ml-git Index log.info('%s adding path [%s] to ml-git index' % (repo_type, path), class_name=REPOSITORY_CLASS_NAME) with change_mask_for_routine(is_shared_objects): idx = MultihashIndex(spec, index_path, objects_path, mutability, cache_path) idx.add(path, manifest, file_path) # create hard links in ml-git Cache self.create_hard_links_in_cache(cache_path, index_path, is_shared_cache, mutability, path, spec) except Exception as e: log.error(e, class_name=REPOSITORY_CLASS_NAME) return None if bump_version and not increment_version_in_spec( spec_path, self.__repo_type): return None idx.add_metadata(path, file) self._check_corrupted_files(spec, repo) # Run file check if run_fsck: self.fsck()