Exemple #1
0
    def _validate_collection_definition(self, definition, **kwargs):

        if isinstance(definition, str) and definition in FILE_ALIAS_DICT:
            input_collection = load_collection_input_file(definition, **kwargs)

        elif isinstance(definition, dict):
            input_collection = definition.copy()

        else:
            try:
                with open(os.path.abspath(definition)) as f:
                    input_collection = yaml_load(f)
            except Exception as exc:
                raise exc

        name = input_collection.get('name', None)
        self.collection_type = input_collection.get('collection_type', None)
        if self.collection_type == 'cesm-aws':
            self._get_s3_connection_info()
        if name is None or self.collection_type is None:
            raise ValueError(
                f'name and/or collection_type keys are missing from {definition}'
            )
        else:
            return input_collection
Exemple #2
0
    def parse(self, text):
        data = yaml_load(text)

        # modify sources default storage mode
        self._set_dal_default_storage_mode(data)
        transformed_text = yaml.dump(data, default_flow_style=False)

        # Reuse default NestedYAMLFileCatalog YAML parser
        # parse() does the heavy lifting of populating the catalog
        super().parse(transformed_text)
Exemple #3
0
def load_conf(fn=None):
    """Update global config from YAML file

    If fn is None, looks in global config directory, which is either defined
    by the INTAKE_CONF_DIR env-var or is ~/.intake/ .
    """
    if fn is None:
        fn = cfile()
    if os.path.isfile(fn):
        with open(fn) as f:
            try:
                conf.update(yaml_load(f))
            except Exception as e:
                logger.warning('Failure to load config file "{fn}": {e}'
                               ''.format(fn=fn, e=e))
    def parse(self, text):
        self.text = text
        data = yaml_load(self.text)

        try:
            # Try the default YAMLFileCatalog first
            # Reuse default YAMLFileCatalog YAML parser
            # parse() does the heavy lifting of populating the catalog
            super().parse(text)
        except ValidationError:
            # Try to parse as a nested Catalog by
            assert_msg = "nested_yaml_cat requires a `hierarchical_catalog: true` metadata entry"
            assert 'metadata' in data, assert_msg
            assert 'hierarchical_catalog' in data['metadata'], assert_msg
            assert data['metadata']['hierarchical_catalog'], assert_msg

            entry = self._create_nested_catalog(self.name, data)
            self._entries = entry.entries
            self.metadata = self.metadata or entry.metadata
            self.name = entry.name or self.name_from_path
            self.description = self.description or entry.description
def load_collection_input_file(
    name=None,
    cache=True,
    cache_dir=_default_cache_dir,
    github_url='https://github.com/NCAR/intake-esm-datastore',
    branch='master',
    extension='collection-input',
):
    """Load collection definition from an online repository.

    Parameters
    ----------

    name: str, default (None)
        Name of the yaml file containing collection definition, without the .yml extension.
        If None, this function prints out the available collection definitions to specify.

    cache: bool, optional
         If True, cache collection definition locally for use on later calls.

    cache_dir: str, optional
        The directory in which to search for and cache the downloaded file.

    github_url: str, optional
        Github repository where the collection definition is stored.

    branch: str, optional
         The git branch to download from.

    extension: str, optional Subfolder within the repository where the
        collection definition file is stored.

    Returns
    -------

    The desired collection definition dictionary
    """

    if name is None:
        return _get_collection_input_files()

    name, ext = os.path.splitext(name)
    if not ext.endswith('.yml'):
        ext += '.yml'

    if name in FILE_ALIAS_DICT.keys():
        name = FILE_ALIAS_DICT[name]

    longdir = os.path.expanduser(cache_dir)
    fullname = name + ext
    localfile = os.sep.join((longdir, fullname))
    md5name = name + '.md5'
    md5file = os.sep.join((longdir, md5name))

    if extension is not None:
        url = '/'.join((github_url, 'raw', branch, extension, fullname))
        url_md5 = '/'.join((github_url, 'raw', branch, extension, md5name))

    else:
        url = '/'.join((github_url, 'raw', branch, fullname))
        url_md5 = '/'.join((github_url, 'raw', branch, md5name))

    if not os.path.exists(localfile):
        os.makedirs(longdir, exist_ok=True)
        urlretrieve(url, localfile)
        urlretrieve(url_md5, md5file)

    with open(md5file, 'r') as f:
        localmd5 = f.read()

    with urlopen(url_md5) as f:
        remotemd5 = f.read().decode('utf-8')

    if localmd5 != remotemd5:
        os.remove(localfile)
        os.remove(md5file)
        msg = """
        Try downloading the file again. There was a confliction between
        your local .md5 file compared to the one in the remote repository,
        so the local copy has been removed to resolve the issue.
        """
        raise IOError(msg)

    with open(localfile) as f:
        d = yaml_load(f)

    if not cache:
        os.remove(localfile)

    return d