Exemple #1
0
def load_dataset_definition(path):
    if not isinstance(path, pathlib.Path):
        path = pathlib.Path(path)

    fname = get_metadata_path(path)
    for _, doc in read_documents(fname):
        return SimpleDocNav(doc)
Exemple #2
0
def load_datasets(datasets, rules):
    for dataset_path in datasets:
        metadata_path = get_metadata_path(Path(dataset_path))
        if not metadata_path or not metadata_path.exists():
            _LOG.error('No supported metadata docs found for dataset %s', dataset_path)
            continue

        try:
            for metadata_path, metadata_doc in read_documents(metadata_path):
                uri = metadata_path.absolute().as_uri()

                try:
                    dataset = create_dataset(metadata_doc, uri, rules)
                except BadMatch as e:
                    _LOG.error('Unable to create Dataset for %s: %s', uri, e)
                    continue

                is_consistent, reason = check_dataset_consistent(dataset)
                if not is_consistent:
                    _LOG.error("Dataset %s inconsistency: %s", dataset.id, reason)
                    continue

                yield dataset
        except InvalidDocException:
            _LOG.error("Failed reading documents from %s", metadata_path)
            continue
Exemple #3
0
def index_cmd(index, match_rules, dtype, auto_match, dry_run, datasets):
    if not (match_rules or dtype or auto_match):
        _LOG.error('Must specify one of [--match-rules, --type, --auto-match]')
        return

    if match_rules:
        rules = load_rules_from_file(match_rules, index)
    else:
        assert dtype or auto_match
        rules = load_rules_from_types(index, dtype)

    if rules is None:
        return

    for dataset_path in datasets:
        metadata_path = get_metadata_path(Path(dataset_path))
        if not metadata_path or not metadata_path.exists():
            raise ValueError('No supported metadata docs found for dataset {}'.format(dataset_path))

        for metadata_path, metadata_doc in read_documents(metadata_path):
            uri = metadata_path.absolute().as_uri()

            try:
                dataset = match_dataset(metadata_doc, uri, rules)
            except RuntimeError as e:
                _LOG.error('Unable to create Dataset for %s: %s', uri, e)
                continue

            if not check_dataset_consistent(dataset):
                _LOG.error("Dataset measurements don't match it's type specification %s", dataset.id)
                continue

            _LOG.info('Matched %s', dataset)
            if not dry_run:
                index.datasets.add(dataset)
Exemple #4
0
def test_find_metadata_path():
    files = util.write_files({
        'directory_dataset': {
            'file1.txt': '',
            'file2.txt': '',
            'agdc-metadata.yaml.gz': ''
        },
        'file_dataset.tif': '',
        'file_dataset.tif.agdc-md.yaml': '',
        'dataset_metadata.yaml': '',
        'no_metadata.tif': '',
    })

    # A metadata file can be specified directly.
    path = get_metadata_path(files.joinpath('dataset_metadata.yaml'))
    assert path.absolute() == files.joinpath(
        'dataset_metadata.yaml').absolute()

    # A dataset directory will have an internal 'agdc-metadata' file.
    path = get_metadata_path(files.joinpath('directory_dataset'))
    assert path.absolute() == files.joinpath(
        'directory_dataset', 'agdc-metadata.yaml.gz').absolute()

    # Other files can have a sibling file ending in 'agdc-md.yaml'
    path = get_metadata_path(files.joinpath('file_dataset.tif'))
    assert path.absolute() == files.joinpath(
        'file_dataset.tif.agdc-md.yaml').absolute()

    # Lack of metadata raises an error.
    with pytest.raises(ValueError):
        get_metadata_path(files.joinpath('no_metadata.tif'))

    # Nonexistent dataset raises a ValueError.
    with pytest.raises(ValueError):
        get_metadata_path(files.joinpath('missing-dataset.tif'))
Exemple #5
0
def test_find_metadata_path():
    files = util.write_files({
        'directory_dataset': {
            'file1.txt': '',
            'file2.txt': '',
            'agdc-metadata.yaml.gz': ''
        },
        'file_dataset.tif': '',
        'file_dataset.tif.agdc-md.yaml': '',
        'dataset_metadata.yaml': '',
        'no_metadata.tif': '',
    })

    # A metadata file can be specified directly.
    path = get_metadata_path(files.joinpath('dataset_metadata.yaml'))
    assert path.absolute() == files.joinpath('dataset_metadata.yaml').absolute()

    # A dataset directory will have an internal 'agdc-metadata' file.
    path = get_metadata_path(files.joinpath('directory_dataset'))
    assert path.absolute() == files.joinpath('directory_dataset', 'agdc-metadata.yaml.gz').absolute()

    # Other files can have a sibling file ending in 'agdc-md.yaml'
    path = get_metadata_path(files.joinpath('file_dataset.tif'))
    assert path.absolute() == files.joinpath('file_dataset.tif.agdc-md.yaml').absolute()

    # Lack of metadata raises an error.
    with pytest.raises(ValueError):
        get_metadata_path(files.joinpath('no_metadata.tif'))

    # Nonexistent dataset raises a ValueError.
    with pytest.raises(ValueError):
        get_metadata_path(files.joinpath('missing-dataset.tif'))
Exemple #6
0
def test_get_metadata_path():
    test_file_structure = {
        'directory_dataset': {
            'file1.txt': '',
            'file2.txt': '',
            'agdc-metadata.yaml.gz': ''
        },
        'file_dataset.tif': '',
        'file_dataset.tif.agdc-md.yaml': '',
        'dataset_metadata.yaml': '',
        'no_metadata.tif': '',
    }

    out_dir = write_files(test_file_structure)

    assert_file_structure(out_dir, test_file_structure)

    # A metadata file can be specified directly.
    path = get_metadata_path(out_dir.joinpath('dataset_metadata.yaml'))
    assert Path(path).absolute() == out_dir.joinpath(
        'dataset_metadata.yaml').absolute()

    # A dataset directory will have an internal 'agdc-metadata' file.
    path = get_metadata_path(out_dir.joinpath('directory_dataset'))
    assert Path(path).absolute() == out_dir.joinpath(
        'directory_dataset', 'agdc-metadata.yaml.gz').absolute()

    # Other out_dir can have a sibling file ending in 'agdc-md.yaml'
    path = get_metadata_path(out_dir.joinpath('file_dataset.tif'))
    assert Path(path).absolute() == out_dir.joinpath(
        'file_dataset.tif.agdc-md.yaml').absolute()

    # URLs are always themselves
    example_url = 'http://localhost/dataset.yaml'
    url = get_metadata_path(example_url)
    assert url == example_url

    # Lack of metadata raises an error.
    with pytest.raises(ValueError):
        get_metadata_path(out_dir.joinpath('no_metadata.tif'))

    # Nonexistent dataset raises a ValueError.
    with pytest.raises(ValueError):
        get_metadata_path(out_dir.joinpath('missing-dataset.tif'))
Exemple #7
0
def resolve_doc_files(paths, on_error):
    for p in paths:
        try:
            yield get_metadata_path(Path(p))
        except ValueError as e:
            on_error(p, e)