def _validate_collection_definition(self, definition, **kwargs): if isinstance(definition, str) and definition in FILE_ALIAS_DICT: input_collection = load_collection_input_file(definition, **kwargs) elif isinstance(definition, dict): input_collection = definition.copy() else: try: with open(os.path.abspath(definition)) as f: input_collection = yaml_load(f) except Exception as exc: raise exc name = input_collection.get('name', None) self.collection_type = input_collection.get('collection_type', None) if self.collection_type == 'cesm-aws': self._get_s3_connection_info() if name is None or self.collection_type is None: raise ValueError( f'name and/or collection_type keys are missing from {definition}' ) else: return input_collection
def parse(self, text): data = yaml_load(text) # modify sources default storage mode self._set_dal_default_storage_mode(data) transformed_text = yaml.dump(data, default_flow_style=False) # Reuse default NestedYAMLFileCatalog YAML parser # parse() does the heavy lifting of populating the catalog super().parse(transformed_text)
def load_conf(fn=None): """Update global config from YAML file If fn is None, looks in global config directory, which is either defined by the INTAKE_CONF_DIR env-var or is ~/.intake/ . """ if fn is None: fn = cfile() if os.path.isfile(fn): with open(fn) as f: try: conf.update(yaml_load(f)) except Exception as e: logger.warning('Failure to load config file "{fn}": {e}' ''.format(fn=fn, e=e))
def parse(self, text): self.text = text data = yaml_load(self.text) try: # Try the default YAMLFileCatalog first # Reuse default YAMLFileCatalog YAML parser # parse() does the heavy lifting of populating the catalog super().parse(text) except ValidationError: # Try to parse as a nested Catalog by assert_msg = "nested_yaml_cat requires a `hierarchical_catalog: true` metadata entry" assert 'metadata' in data, assert_msg assert 'hierarchical_catalog' in data['metadata'], assert_msg assert data['metadata']['hierarchical_catalog'], assert_msg entry = self._create_nested_catalog(self.name, data) self._entries = entry.entries self.metadata = self.metadata or entry.metadata self.name = entry.name or self.name_from_path self.description = self.description or entry.description
def load_collection_input_file( name=None, cache=True, cache_dir=_default_cache_dir, github_url='https://github.com/NCAR/intake-esm-datastore', branch='master', extension='collection-input', ): """Load collection definition from an online repository. Parameters ---------- name: str, default (None) Name of the yaml file containing collection definition, without the .yml extension. If None, this function prints out the available collection definitions to specify. cache: bool, optional If True, cache collection definition locally for use on later calls. cache_dir: str, optional The directory in which to search for and cache the downloaded file. github_url: str, optional Github repository where the collection definition is stored. branch: str, optional The git branch to download from. extension: str, optional Subfolder within the repository where the collection definition file is stored. Returns ------- The desired collection definition dictionary """ if name is None: return _get_collection_input_files() name, ext = os.path.splitext(name) if not ext.endswith('.yml'): ext += '.yml' if name in FILE_ALIAS_DICT.keys(): name = FILE_ALIAS_DICT[name] longdir = os.path.expanduser(cache_dir) fullname = name + ext localfile = os.sep.join((longdir, fullname)) md5name = name + '.md5' md5file = os.sep.join((longdir, md5name)) if extension is not None: url = '/'.join((github_url, 'raw', branch, extension, fullname)) url_md5 = '/'.join((github_url, 'raw', branch, extension, md5name)) else: url = '/'.join((github_url, 'raw', branch, fullname)) url_md5 = '/'.join((github_url, 'raw', branch, md5name)) if not os.path.exists(localfile): os.makedirs(longdir, exist_ok=True) urlretrieve(url, localfile) urlretrieve(url_md5, md5file) with open(md5file, 'r') as f: localmd5 = f.read() with urlopen(url_md5) as f: remotemd5 = f.read().decode('utf-8') if localmd5 != remotemd5: os.remove(localfile) os.remove(md5file) msg = """ Try downloading the file again. There was a confliction between your local .md5 file compared to the one in the remote repository, so the local copy has been removed to resolve the issue. """ raise IOError(msg) with open(localfile) as f: d = yaml_load(f) if not cache: os.remove(localfile) return d