예제 #1
0
def _resolve_url(base_url, path):
    """
    If path is a URL or an absolute path return URL
    If path is a relative path return base_url joined with path

    >>> _resolve_url('file:///foo/abc', 'bar')
    'file:///foo/bar'
    >>> _resolve_url('file:///foo/abc', 'file:///bar')
    'file:///bar'
    >>> _resolve_url('file:///foo/abc', None)
    'file:///foo/abc'
    >>> _resolve_url('file:///foo/abc', '/bar')
    'file:///bar'
    >>> _resolve_url('http://foo.com/abc/odc-metadata.yaml', 'band-5.tif')
    'http://foo.com/abc/band-5.tif'
    >>> _resolve_url('s3://foo.com/abc/odc-metadata.yaml', 'band-5.tif')
    's3://foo.com/abc/band-5.tif'
    >>> _resolve_url('s3://foo.com/abc/odc-metadata.yaml?something', 'band-5.tif')
    's3://foo.com/abc/band-5.tif'
    """
    if path:
        if is_url(path):
            url_str = path
        elif Path(path).is_absolute():
            url_str = Path(path).as_uri()
        else:
            url_str = urljoin(base_url, path)
    else:
        url_str = base_url
    return url_str
예제 #2
0
def get_metadata_path(possible_path: Union[str, Path]) -> str:
    """
    Find a metadata path for a given input/dataset path.

    Needs to handle local files as well as remote URLs
    """
    # We require exact URLs, lets skip any sort of fancy investigation and mapping
    if isinstance(possible_path, str) and is_url(possible_path):
        return possible_path

    dataset_path = Path(possible_path)

    # They may have given us a metadata file directly.
    if dataset_path.is_file() and is_supported_document_type(dataset_path):
        return str(dataset_path)

    # Otherwise there may be a sibling file with appended suffix '.agdc-md.yaml'.
    expected_name = dataset_path.parent.joinpath('{}.agdc-md'.format(
        dataset_path.name))
    found = _find_any_metadata_suffix(expected_name)
    if found:
        return str(found)

    # Otherwise if it's a directory, there may be an 'agdc-metadata.yaml' file describing all contained datasets.
    if dataset_path.is_dir():
        expected_name = dataset_path.joinpath('agdc-metadata')
        found = _find_any_metadata_suffix(expected_name)
        if found:
            return str(found)

    raise ValueError('No metadata found for input %r' % dataset_path)
예제 #3
0
def expand_paths_as_uris(
    input_paths: Iterable[str], ) -> Generator[Tuple[Path, bool], None, None]:
    """
    For any paths that are directories, find inner documents that are known.

    Returns Tuples: path as a URL, and whether it was specified explicitly by user.
    """
    for input_ in input_paths:
        if is_url(input_):
            yield input_, True
        else:
            path = Path(input_).resolve()
            if path.is_dir():
                for found_path in path.rglob("*"):
                    if _readable_doc_extension(
                            found_path.as_uri()) is not None:
                        yield found_path.as_uri(), False
            else:
                yield path.as_uri(), True
예제 #4
0
def _web_reference(ref: str):
    """
    A reference to a schema via a URL

    eg http://geojson.org/schemas/Features.json'
    """
    if not is_url(ref):
        raise ValueError(f"Expected URL? Got {ref!r}")
    (scheme, netloc, offset, params, query, fragment) = urllib.parse.urlparse(ref)
    # We used `wget -r` to download the remote schemas locally.
    # It puts into hostname/path folders by default. Eg. 'geojson.org/schema/Feature.json'
    path = _SCHEMA_BASE / f"{netloc}{offset}"
    if not path.exists():
        if ALLOW_INTERNET:
            path.parent.mkdir(parents=True, exist_ok=True)
            path.write_bytes(urlopen(ref).read())
        else:
            raise ValueError(
                f"No local copy exists of schema {ref!r}.\n"
                f"\tPerhaps we need to add it to ./update.sh in the tests folder?\n"
                f"\t(looked in {path})"
            )
    return read_document(path)
예제 #5
0
def _resolve_url(base_url, path):
    """
    If path is a URL or an absolute path return URL
    If path is a relative path return base_url joined with path

    >>> _resolve_url('file:///foo/abc', 'bar')
    'file:///foo/bar'
    >>> _resolve_url('file:///foo/abc', 'file:///bar')
    'file:///bar'
    >>> _resolve_url('file:///foo/abc', None)
    'file:///foo/abc'
    >>> _resolve_url('file:///foo/abc', '/bar')
    'file:///bar'
    """
    if path:
        if is_url(path):
            url_str = path
        elif Path(path).is_absolute():
            url_str = Path(path).as_uri()
        else:
            url_str = urljoin(base_url, path)
    else:
        url_str = base_url
    return url_str
예제 #6
0
def get_extension(url: str) -> jsonschema.Draft7Validator:
    if not is_url(url):
        raise ValueError(
            f"stac extensions are now expected to be URLs in 1.0.0. " f"Got {url!r}"
        )
    return load_schema_doc(_web_reference(url), location=url)