def load_dataset_definition(path): if not isinstance(path, pathlib.Path): path = pathlib.Path(path) fname = get_metadata_path(path) for _, doc in read_documents(fname): return SimpleDocNav(doc)
def load_datasets(datasets, rules): for dataset_path in datasets: metadata_path = get_metadata_path(Path(dataset_path)) if not metadata_path or not metadata_path.exists(): _LOG.error('No supported metadata docs found for dataset %s', dataset_path) continue try: for metadata_path, metadata_doc in read_documents(metadata_path): uri = metadata_path.absolute().as_uri() try: dataset = create_dataset(metadata_doc, uri, rules) except BadMatch as e: _LOG.error('Unable to create Dataset for %s: %s', uri, e) continue is_consistent, reason = check_dataset_consistent(dataset) if not is_consistent: _LOG.error("Dataset %s inconsistency: %s", dataset.id, reason) continue yield dataset except InvalidDocException: _LOG.error("Failed reading documents from %s", metadata_path) continue
def index_cmd(index, match_rules, dtype, auto_match, dry_run, datasets): if not (match_rules or dtype or auto_match): _LOG.error('Must specify one of [--match-rules, --type, --auto-match]') return if match_rules: rules = load_rules_from_file(match_rules, index) else: assert dtype or auto_match rules = load_rules_from_types(index, dtype) if rules is None: return for dataset_path in datasets: metadata_path = get_metadata_path(Path(dataset_path)) if not metadata_path or not metadata_path.exists(): raise ValueError('No supported metadata docs found for dataset {}'.format(dataset_path)) for metadata_path, metadata_doc in read_documents(metadata_path): uri = metadata_path.absolute().as_uri() try: dataset = match_dataset(metadata_doc, uri, rules) except RuntimeError as e: _LOG.error('Unable to create Dataset for %s: %s', uri, e) continue if not check_dataset_consistent(dataset): _LOG.error("Dataset measurements don't match it's type specification %s", dataset.id) continue _LOG.info('Matched %s', dataset) if not dry_run: index.datasets.add(dataset)
def test_find_metadata_path(): files = util.write_files({ 'directory_dataset': { 'file1.txt': '', 'file2.txt': '', 'agdc-metadata.yaml.gz': '' }, 'file_dataset.tif': '', 'file_dataset.tif.agdc-md.yaml': '', 'dataset_metadata.yaml': '', 'no_metadata.tif': '', }) # A metadata file can be specified directly. path = get_metadata_path(files.joinpath('dataset_metadata.yaml')) assert path.absolute() == files.joinpath( 'dataset_metadata.yaml').absolute() # A dataset directory will have an internal 'agdc-metadata' file. path = get_metadata_path(files.joinpath('directory_dataset')) assert path.absolute() == files.joinpath( 'directory_dataset', 'agdc-metadata.yaml.gz').absolute() # Other files can have a sibling file ending in 'agdc-md.yaml' path = get_metadata_path(files.joinpath('file_dataset.tif')) assert path.absolute() == files.joinpath( 'file_dataset.tif.agdc-md.yaml').absolute() # Lack of metadata raises an error. with pytest.raises(ValueError): get_metadata_path(files.joinpath('no_metadata.tif')) # Nonexistent dataset raises a ValueError. with pytest.raises(ValueError): get_metadata_path(files.joinpath('missing-dataset.tif'))
def test_find_metadata_path(): files = util.write_files({ 'directory_dataset': { 'file1.txt': '', 'file2.txt': '', 'agdc-metadata.yaml.gz': '' }, 'file_dataset.tif': '', 'file_dataset.tif.agdc-md.yaml': '', 'dataset_metadata.yaml': '', 'no_metadata.tif': '', }) # A metadata file can be specified directly. path = get_metadata_path(files.joinpath('dataset_metadata.yaml')) assert path.absolute() == files.joinpath('dataset_metadata.yaml').absolute() # A dataset directory will have an internal 'agdc-metadata' file. path = get_metadata_path(files.joinpath('directory_dataset')) assert path.absolute() == files.joinpath('directory_dataset', 'agdc-metadata.yaml.gz').absolute() # Other files can have a sibling file ending in 'agdc-md.yaml' path = get_metadata_path(files.joinpath('file_dataset.tif')) assert path.absolute() == files.joinpath('file_dataset.tif.agdc-md.yaml').absolute() # Lack of metadata raises an error. with pytest.raises(ValueError): get_metadata_path(files.joinpath('no_metadata.tif')) # Nonexistent dataset raises a ValueError. with pytest.raises(ValueError): get_metadata_path(files.joinpath('missing-dataset.tif'))
def test_get_metadata_path(): test_file_structure = { 'directory_dataset': { 'file1.txt': '', 'file2.txt': '', 'agdc-metadata.yaml.gz': '' }, 'file_dataset.tif': '', 'file_dataset.tif.agdc-md.yaml': '', 'dataset_metadata.yaml': '', 'no_metadata.tif': '', } out_dir = write_files(test_file_structure) assert_file_structure(out_dir, test_file_structure) # A metadata file can be specified directly. path = get_metadata_path(out_dir.joinpath('dataset_metadata.yaml')) assert Path(path).absolute() == out_dir.joinpath( 'dataset_metadata.yaml').absolute() # A dataset directory will have an internal 'agdc-metadata' file. path = get_metadata_path(out_dir.joinpath('directory_dataset')) assert Path(path).absolute() == out_dir.joinpath( 'directory_dataset', 'agdc-metadata.yaml.gz').absolute() # Other out_dir can have a sibling file ending in 'agdc-md.yaml' path = get_metadata_path(out_dir.joinpath('file_dataset.tif')) assert Path(path).absolute() == out_dir.joinpath( 'file_dataset.tif.agdc-md.yaml').absolute() # URLs are always themselves example_url = 'http://localhost/dataset.yaml' url = get_metadata_path(example_url) assert url == example_url # Lack of metadata raises an error. with pytest.raises(ValueError): get_metadata_path(out_dir.joinpath('no_metadata.tif')) # Nonexistent dataset raises a ValueError. with pytest.raises(ValueError): get_metadata_path(out_dir.joinpath('missing-dataset.tif'))
def resolve_doc_files(paths, on_error): for p in paths: try: yield get_metadata_path(Path(p)) except ValueError as e: on_error(p, e)