def _resolve_url(base_url, path): """ If path is a URL or an absolute path return URL If path is a relative path return base_url joined with path >>> _resolve_url('file:///foo/abc', 'bar') 'file:///foo/bar' >>> _resolve_url('file:///foo/abc', 'file:///bar') 'file:///bar' >>> _resolve_url('file:///foo/abc', None) 'file:///foo/abc' >>> _resolve_url('file:///foo/abc', '/bar') 'file:///bar' >>> _resolve_url('http://foo.com/abc/odc-metadata.yaml', 'band-5.tif') 'http://foo.com/abc/band-5.tif' >>> _resolve_url('s3://foo.com/abc/odc-metadata.yaml', 'band-5.tif') 's3://foo.com/abc/band-5.tif' >>> _resolve_url('s3://foo.com/abc/odc-metadata.yaml?something', 'band-5.tif') 's3://foo.com/abc/band-5.tif' """ if path: if is_url(path): url_str = path elif Path(path).is_absolute(): url_str = Path(path).as_uri() else: url_str = urljoin(base_url, path) else: url_str = base_url return url_str
def get_metadata_path(possible_path: Union[str, Path]) -> str: """ Find a metadata path for a given input/dataset path. Needs to handle local files as well as remote URLs """ # We require exact URLs, lets skip any sort of fancy investigation and mapping if isinstance(possible_path, str) and is_url(possible_path): return possible_path dataset_path = Path(possible_path) # They may have given us a metadata file directly. if dataset_path.is_file() and is_supported_document_type(dataset_path): return str(dataset_path) # Otherwise there may be a sibling file with appended suffix '.agdc-md.yaml'. expected_name = dataset_path.parent.joinpath('{}.agdc-md'.format( dataset_path.name)) found = _find_any_metadata_suffix(expected_name) if found: return str(found) # Otherwise if it's a directory, there may be an 'agdc-metadata.yaml' file describing all contained datasets. if dataset_path.is_dir(): expected_name = dataset_path.joinpath('agdc-metadata') found = _find_any_metadata_suffix(expected_name) if found: return str(found) raise ValueError('No metadata found for input %r' % dataset_path)
def expand_paths_as_uris( input_paths: Iterable[str], ) -> Generator[Tuple[Path, bool], None, None]: """ For any paths that are directories, find inner documents that are known. Returns Tuples: path as a URL, and whether it was specified explicitly by user. """ for input_ in input_paths: if is_url(input_): yield input_, True else: path = Path(input_).resolve() if path.is_dir(): for found_path in path.rglob("*"): if _readable_doc_extension( found_path.as_uri()) is not None: yield found_path.as_uri(), False else: yield path.as_uri(), True
def _web_reference(ref: str): """ A reference to a schema via a URL eg http://geojson.org/schemas/Features.json' """ if not is_url(ref): raise ValueError(f"Expected URL? Got {ref!r}") (scheme, netloc, offset, params, query, fragment) = urllib.parse.urlparse(ref) # We used `wget -r` to download the remote schemas locally. # It puts into hostname/path folders by default. Eg. 'geojson.org/schema/Feature.json' path = _SCHEMA_BASE / f"{netloc}{offset}" if not path.exists(): if ALLOW_INTERNET: path.parent.mkdir(parents=True, exist_ok=True) path.write_bytes(urlopen(ref).read()) else: raise ValueError( f"No local copy exists of schema {ref!r}.\n" f"\tPerhaps we need to add it to ./update.sh in the tests folder?\n" f"\t(looked in {path})" ) return read_document(path)
def _resolve_url(base_url, path): """ If path is a URL or an absolute path return URL If path is a relative path return base_url joined with path >>> _resolve_url('file:///foo/abc', 'bar') 'file:///foo/bar' >>> _resolve_url('file:///foo/abc', 'file:///bar') 'file:///bar' >>> _resolve_url('file:///foo/abc', None) 'file:///foo/abc' >>> _resolve_url('file:///foo/abc', '/bar') 'file:///bar' """ if path: if is_url(path): url_str = path elif Path(path).is_absolute(): url_str = Path(path).as_uri() else: url_str = urljoin(base_url, path) else: url_str = base_url return url_str
def get_extension(url: str) -> jsonschema.Draft7Validator: if not is_url(url): raise ValueError( f"stac extensions are now expected to be URLs in 1.0.0. " f"Got {url!r}" ) return load_schema_doc(_web_reference(url), location=url)