def check_missing_lineage(clirunner, index):
    """
      A -> B
      |    |
      |    v
      +--> C -> D
      |
      +--> E

    Use --no-auto-add-lineage
    """
    ds = SimpleDocNav(gen_dataset_test_dag(44, force_tree=True))
    child_docs = [ds.sources[x].doc for x in ('ae', 'ab', 'ac')]

    prefix = write_files({
        'lineage.yml': yaml.safe_dump_all(child_docs),
        'main.yml': yaml.safe_dump(ds.doc),
    })

    r = clirunner(
        ['dataset', 'add', '--no-auto-add-lineage',
         str(prefix / 'main.yml')])

    assert 'ERROR Following lineage datasets are missing' in r.output
    assert index.datasets.has(ds.id) is False

    # now add lineage and try again
    clirunner(['dataset', 'add', str(prefix / 'lineage.yml')])
    assert index.datasets.has(ds.sources['ae'].id)
    r = clirunner(
        ['dataset', 'add', '--no-auto-add-lineage',
         str(prefix / 'main.yml')])

    assert index.datasets.has(ds.id)
Beispiel #2
0
def test_find_any_metatadata_suffix():
    files = write_files({
        'directory_dataset': {
            'file1.txt': '',
            'file2.txt': '',
            'agdc-metadata.json.gz': ''
        },
        'file_dataset.tif.agdc-md.yaml': '',
        'dataset_metadata.YAML': '',
        'no_metadata.tif': '',
        'ambigous.yml': '',
        'ambigous.yaml': '',
    })

    path = _find_any_metadata_suffix(files.joinpath('dataset_metadata'))
    assert Path(path).absolute() == files.joinpath(
        'dataset_metadata.YAML').absolute()

    path = _find_any_metadata_suffix(
        files.joinpath('directory_dataset', 'agdc-metadata'))
    assert Path(path).absolute() == files.joinpath(
        'directory_dataset', 'agdc-metadata.json.gz').absolute()

    path = _find_any_metadata_suffix(
        files.joinpath('file_dataset.tif.agdc-md'))
    assert Path(path).absolute() == files.joinpath(
        'file_dataset.tif.agdc-md.yaml').absolute()

    # Returns none if none exist
    path = _find_any_metadata_suffix(files.joinpath('no_metadata'))
    assert path is None

    with pytest.raises(ValueError):
        _find_any_metadata_suffix(files.joinpath('ambigous'))
Beispiel #3
0
def test_find_config():
    files = write_files({
        'base.conf':
        dedent("""\
            [datacube]
            db_hostname: fakehost.test.lan
        """),
        'override.conf':
        dedent("""\
            [datacube]
            db_hostname: overridden.test.lan
            db_database: overridden_db
        """)
    })

    # One config file
    config = LocalConfig.find(paths=[str(files.joinpath('base.conf'))])
    assert config['db_hostname'] == 'fakehost.test.lan'
    # Not set: uses default
    assert config['db_database'] == 'datacube'

    # Now two config files, with the latter overriding earlier options.
    config = LocalConfig.find(paths=[
        str(files.joinpath('base.conf')),
        str(files.joinpath('override.conf'))
    ])
    assert config['db_hostname'] == 'overridden.test.lan'
    assert config['db_database'] == 'overridden_db'
Beispiel #4
0
def test_rio_env_aws_auto_region(monkeypatch, without_aws_env):
    import datacube.utils.aws

    pp = write_files({"config": """[default]
"""})

    assert (pp / "config").exists()
    monkeypatch.setenv("AWS_CONFIG_FILE", str(pp / "config"))

    assert datacube.utils.aws.botocore_default_region() is None

    aws = dict(aws_secret_access_key='blabla',
               aws_access_key_id='not a real one',
               aws_session_token='faketoo')

    with mock.patch('datacube.utils.aws.ec2_current_region',
                    return_value='TT'):
        ee = activate_rio_env(aws=aws)
        assert ee.get('AWS_REGION') == 'TT'

    with mock.patch('datacube.utils.aws.ec2_current_region',
                    return_value=None):
        ee = activate_rio_env(aws=aws)
        assert 'AWS_REGION' not in ee

        with pytest.raises(ValueError):
            activate_rio_env(aws=dict(region_name='auto'))

    deactivate_rio_env()
    assert get_rio_env() == {}
def check_with_existing_lineage(clirunner, index):
    """
      A -> B
      |    |
      |    v
      +--> C -> D
      |
      +--> E

    Add nodes BCE(D) with auto-matching, then add node A with product restricted to A only.
    """

    ds = SimpleDocNav(gen_dataset_test_dag(33, force_tree=True))

    child_docs = [ds.sources[x].doc for x in ('ab', 'ac', 'ae')]

    prefix = write_files({
        'lineage.yml': yaml.safe_dump_all(child_docs),
        'main.yml': yaml.safe_dump(ds.doc),
    })

    clirunner(['dataset', 'add', str(prefix / 'lineage.yml')])
    assert index.datasets.get(ds.sources['ae'].id) is not None
    assert index.datasets.get(ds.sources['ab'].id) is not None
    assert index.datasets.get(ds.sources['ac'].id) is not None

    clirunner([
        'dataset', 'add', '--no-auto-add-lineage', '--product', 'A',
        str(prefix / 'main.yml')
    ])

    assert index.datasets.get(ds.id) is not None
def check_no_product_match(clirunner, index):
    ds = SimpleDocNav(gen_dataset_test_dag(22, force_tree=True))

    prefix = write_files({'agdc-metadata.yml': yaml.safe_dump(ds.doc)})

    r = clirunner(['dataset', 'add',
                   '--product', 'A',
                   str(prefix)])
    assert 'ERROR Dataset metadata did not match product signature' in r.output

    r = clirunner(['dataset', 'add',
                   '--product', 'A',
                   '--product', 'B',
                   str(prefix)])
    assert 'ERROR No matching Product found for dataset' in r.output

    ds_ = index.datasets.get(ds.id, include_sources=True)
    assert ds_ is None

    # Ignore lineage but fail to match main dataset
    r = clirunner(['dataset', 'add',
                   '--product', 'B',
                   '--confirm-ignore-lineage',
                   str(prefix)])

    assert 'ERROR Dataset metadata did not match product signature' in r.output
    assert index.datasets.has(ds.id) is False
def test_dataset_add_ambgious_products(dataset_add_configs, index_empty, clirunner):
    p = dataset_add_configs
    index = index_empty

    dss = [SimpleDocNav(dataset_maker(i)(
        'A',
        product_type='eo',
        flag_a='a',
        flag_b='b')) for i in [1, 2]]

    prefix = write_files({
        'products.yml': '''
name: A
description: test product A
metadata_type: minimal
metadata:
    product_type: eo
    flag_a: a

---
name: B
description: test product B
metadata_type: minimal
metadata:
    product_type: eo
    flag_b: b
    ''',
        'dataset1.yml': yaml.safe_dump(dss[0].doc),
        'dataset2.yml': yaml.safe_dump(dss[1].doc),
    })

    clirunner(['metadata', 'add', p.metadata])
    clirunner(['product', 'add', str(prefix / 'products.yml')])

    pp = list(index.products.get_all())
    assert len(pp) == 2

    for ds, i in zip(dss, (1, 2)):
        r = clirunner(['dataset', 'add', str(prefix / ('dataset%d.yml' % i))])
        assert 'ERROR Auto match failed' in r.output
        assert 'matches several products' in r.output
        assert index.datasets.has(ds.id) is False

    # check that forcing product works
    ds, fname = dss[0], 'dataset1.yml'
    r = clirunner(['dataset', 'add',
                   '--product', 'A',
                   str(prefix / fname)])

    assert index.datasets.has(ds.id) is True

    # check that forcing via exclude works
    ds, fname = dss[1], 'dataset2.yml'
    r = clirunner(['dataset', 'add',
                   '--exclude-product', 'B',
                   str(prefix / fname)])

    assert index.datasets.has(ds.id) is True
def test_testutils_write_files():
    from datacube.testutils import write_files, assert_file_structure

    files = {'a.txt': 'string', 'aa.txt': ('line1\n', 'line2\n')}

    pp = write_files(files)
    assert pp.exists()
    assert_file_structure(pp, files)

    # test that we detect missing files
    (pp / 'a.txt').unlink()

    with pytest.raises(AssertionError):
        assert_file_structure(pp, files)

    with pytest.raises(AssertionError):
        assert_file_structure(pp, {'aa.txt': 3})

    with pytest.raises(ValueError):
        write_files({'tt': 3})
def check_skip_lineage_test(clirunner, index):
    ds = SimpleDocNav(gen_dataset_test_dag(11, force_tree=True))

    prefix = write_files({'agdc-metadata.yml': yaml.safe_dump(ds.doc)})

    clirunner(['dataset', 'add', '--confirm-ignore-lineage', '--product', 'A', str(prefix)])

    ds_ = index.datasets.get(ds.id, include_sources=True)
    assert ds_ is not None
    assert str(ds_.id) == ds.id
    assert ds_.sources == {}

    assert index.datasets.get(ds.sources['ab'].id) is None
    assert index.datasets.get(ds.sources['ac'].id) is None
    assert index.datasets.get(ds.sources['ae'].id) is None
    assert index.datasets.get(ds.sources['ac'].sources['cd'].id) is None
Beispiel #10
0
def test_ui_path_doc_stream(httpserver):
    filename = 'dataset_metadata.yaml'
    file_content = ''
    out_dir = write_files({filename: file_content})

    httpserver.expect_request(filename).respond_with_data(file_content)

    input_paths = [
        Path(out_dir) / 'dataset_metadata.yaml',
        httpserver.url_for(filename)
    ]

    for input_path, (doc,
                     resolved_path) in zip(input_paths,
                                           ui_path_doc_stream(input_paths)):
        assert doc == {}
        assert input_path == resolved_path
def test_dataset_add_ambgious_products(dataset_add_configs, index_empty,
                                       clirunner):
    p = dataset_add_configs
    index = index_empty
    mk = dataset_maker(0)

    ds = SimpleDocNav(mk('A', product_type='eo', flag_a='a', flag_b='b'))

    prefix = write_files({
        'products.yml': '''
name: A
description: test product A
metadata_type: minimal
metadata:
    product_type: eo
    flag_a: a

---
name: B
description: test product B
metadata_type: minimal
metadata:
    product_type: eo
    flag_b: b
    ''',
        'dataset.yml': yaml.safe_dump(ds.doc),
    })

    clirunner(['metadata_type', 'add', p.metadata])
    clirunner(['product', 'add', str(prefix / 'products.yml')])

    pp = list(index.products.get_all())
    assert len(pp) == 2

    r = clirunner(['dataset', 'add', str(prefix / 'dataset.yml')])
    assert 'ERROR Auto match failed' in r.output
    assert 'matches several products' in r.output
    assert index.datasets.has(ds.id) is False

    # check that forcing product works
    r = clirunner(
        ['dataset', 'add', '--product', 'A',
         str(prefix / 'dataset.yml')])

    assert index.datasets.has(ds.id) is True
def check_inconsistent_lineage(clirunner, index):
    """
      A -> B
      |    |
      |    v
      +--> C -> D
      |
      +--> E

    Add node E,
    then try adding A with modified E in the lineage, should fail to add ABCD
    """
    ds = SimpleDocNav(gen_dataset_test_dag(1313, force_tree=True))

    child_docs = [ds.sources[x].doc for x in ('ae', )]
    modified_doc = toolz.assoc_in(
        ds.doc, 'lineage.source_datasets.ae.label'.split('.'), 'modified')

    prefix = write_files({
        'lineage.yml': yaml.safe_dump_all(child_docs),
        'main.yml': yaml.safe_dump(modified_doc),
    })

    clirunner(['dataset', 'add', str(prefix / 'lineage.yml')])
    assert index.datasets.get(ds.sources['ae'].id) is not None

    r = clirunner(['dataset', 'add', str(prefix / 'main.yml')])

    assert 'ERROR Inconsistent lineage dataset' in r.output

    assert index.datasets.has(ds.id) is False
    assert index.datasets.has(ds.sources['ab'].id) is False
    assert index.datasets.has(ds.sources['ac'].id) is False
    assert index.datasets.has(ds.sources['ac'].sources['cd'].id) is False

    # now again but skipping verification check
    r = clirunner(
        ['dataset', 'add', '--no-verify-lineage',
         str(prefix / 'main.yml')])

    assert index.datasets.has(ds.id)
    assert index.datasets.has(ds.sources['ab'].id)
    assert index.datasets.has(ds.sources['ac'].id)
    assert index.datasets.has(ds.sources['ac'].sources['cd'].id)
Beispiel #13
0
def test_get_metadata_path():
    test_file_structure = {
        'directory_dataset': {
            'file1.txt': '',
            'file2.txt': '',
            'agdc-metadata.yaml.gz': ''
        },
        'file_dataset.tif': '',
        'file_dataset.tif.agdc-md.yaml': '',
        'dataset_metadata.yaml': '',
        'no_metadata.tif': '',
    }

    out_dir = write_files(test_file_structure)

    assert_file_structure(out_dir, test_file_structure)

    # A metadata file can be specified directly.
    path = get_metadata_path(out_dir.joinpath('dataset_metadata.yaml'))
    assert Path(path).absolute() == out_dir.joinpath(
        'dataset_metadata.yaml').absolute()

    # A dataset directory will have an internal 'agdc-metadata' file.
    path = get_metadata_path(out_dir.joinpath('directory_dataset'))
    assert Path(path).absolute() == out_dir.joinpath(
        'directory_dataset', 'agdc-metadata.yaml.gz').absolute()

    # Other out_dir can have a sibling file ending in 'agdc-md.yaml'
    path = get_metadata_path(out_dir.joinpath('file_dataset.tif'))
    assert Path(path).absolute() == out_dir.joinpath(
        'file_dataset.tif.agdc-md.yaml').absolute()

    # URLs are always themselves
    example_url = 'http://localhost/dataset.yaml'
    url = get_metadata_path(example_url)
    assert url == example_url

    # Lack of metadata raises an error.
    with pytest.raises(ValueError):
        get_metadata_path(out_dir.joinpath('no_metadata.tif'))

    # Nonexistent dataset raises a ValueError.
    with pytest.raises(ValueError):
        get_metadata_path(out_dir.joinpath('missing-dataset.tif'))
Beispiel #14
0
def test_find_metadata_path():
    FILES = {
        'directory_dataset': {
            'file1.txt': '',
            'file2.txt': '',
            'agdc-metadata.yaml.gz': ''
        },
        'file_dataset.tif': '',
        'file_dataset.tif.agdc-md.yaml': '',
        'dataset_metadata.yaml': '',
        'no_metadata.tif': '',
    }

    out_dir = write_files(FILES)

    assert_file_structure(out_dir, FILES)

    # A metadata file can be specified directly.
    path = get_metadata_path(out_dir.joinpath('dataset_metadata.yaml'))
    assert path.absolute() == out_dir.joinpath(
        'dataset_metadata.yaml').absolute()

    # A dataset directory will have an internal 'agdc-metadata' file.
    path = get_metadata_path(out_dir.joinpath('directory_dataset'))
    assert path.absolute() == out_dir.joinpath(
        'directory_dataset', 'agdc-metadata.yaml.gz').absolute()

    # Other out_dir can have a sibling file ending in 'agdc-md.yaml'
    path = get_metadata_path(out_dir.joinpath('file_dataset.tif'))
    assert path.absolute() == out_dir.joinpath(
        'file_dataset.tif.agdc-md.yaml').absolute()

    # Lack of metadata raises an error.
    with pytest.raises(ValueError):
        get_metadata_path(out_dir.joinpath('no_metadata.tif'))

    # Nonexistent dataset raises a ValueError.
    with pytest.raises(ValueError):
        get_metadata_path(out_dir.joinpath('missing-dataset.tif'))
def test_dataset_add_with_nans(dataset_add_configs, index_empty, clirunner):
    p = dataset_add_configs
    index = index_empty

    clirunner(['metadata', 'add', p.metadata])
    clirunner(['product', 'add', p.products])

    mk = dataset_maker(0)

    c = mk('C',
           product_type='C',
           val_is_nan=math.nan,
           val_is_inf=math.inf,
           val_is_neginf=-math.inf)

    b = mk('B', sources={'bc': c}, product_type='B')
    a = mk('A', sources={'ac': c}, product_type='A')

    prefix = write_files({
        'dataset.yml': yaml.safe_dump_all([a, b]),
    })

    r = clirunner([
        'dataset', 'add', '--auto-add-lineage', '--verify-lineage',
        str(prefix / 'dataset.yml')
    ])

    assert "ERROR" not in r.output

    a, b, c = [SimpleDocNav(v) for v in (a, b, c)]

    assert index.datasets.bulk_has([a.id, b.id, c.id]) == [True, True, True]

    c_doc = index.datasets.get(c.id).metadata_doc

    assert c_doc['val_is_nan'] == 'NaN'
    assert c_doc['val_is_inf'] == 'Infinity'
    assert c_doc['val_is_neginf'] == '-Infinity'
def test_get_aws_settings(monkeypatch, without_aws_env):

    pp = write_files({
        "config":
        """
[default]
region = us-west-2

[profile east]
region = us-east-1
[profile no_region]
""",
        "credentials":
        """
[default]
aws_access_key_id = AKIAWYXYXYXYXYXYXYXY
aws_secret_access_key = fake-fake-fake
[east]
aws_access_key_id = AKIAEYXYXYXYXYXYXYXY
aws_secret_access_key = fake-fake-fake
"""
    })

    assert (pp / "credentials").exists()
    assert (pp / "config").exists()

    monkeypatch.setenv("AWS_CONFIG_FILE", str(pp / "config"))
    monkeypatch.setenv("AWS_SHARED_CREDENTIALS_FILE", str(pp / "credentials"))

    aws, creds = get_aws_settings()
    assert aws['region_name'] == 'us-west-2'
    assert aws['aws_access_key_id'] == 'AKIAWYXYXYXYXYXYXYXY'
    assert aws['aws_secret_access_key'] == 'fake-fake-fake'

    sess = mk_boto_session(profile="no_region",
                           creds=creds.get_frozen_credentials(),
                           region_name="mordor")

    assert sess.get_credentials().get_frozen_credentials(
    ) == creds.get_frozen_credentials()

    aws, creds = get_aws_settings(profile='east')
    assert aws['region_name'] == 'us-east-1'
    assert aws['aws_access_key_id'] == 'AKIAEYXYXYXYXYXYXYXY'
    assert aws['aws_secret_access_key'] == 'fake-fake-fake'

    aws, creds = get_aws_settings(aws_unsigned=True)
    assert creds is None
    assert aws['region_name'] == 'us-west-2'
    assert aws['aws_unsigned'] is True

    aws, creds = get_aws_settings(profile="no_region",
                                  region_name="us-west-1",
                                  aws_unsigned=True)

    assert creds is None
    assert aws['region_name'] == 'us-west-1'
    assert aws['aws_unsigned'] is True

    with mock.patch('datacube.utils.aws._fetch_text',
                    return_value=_json(region="mordor")):
        aws, creds = get_aws_settings(profile="no_region", aws_unsigned=True)

        assert aws['region_name'] == 'mordor'
        assert aws['aws_unsigned'] is True
def test_dataset_add_inconsistent_measurements(dataset_add_configs,
                                               index_empty, clirunner):
    p = dataset_add_configs
    index = index_empty
    mk = dataset_maker(0)

    # not set, empty, subset, full set, super-set
    ds1 = SimpleDocNav(mk(
        'A',
        product_type='eo',
    ))
    ds2 = SimpleDocNav(mk('B', product_type='eo', measurements={}))
    ds3 = SimpleDocNav(mk('C', product_type='eo', measurements={'red': {}}))
    ds4 = SimpleDocNav(
        mk('D', product_type='eo', measurements={
            'red': {},
            'green': {},
        }))
    ds5 = SimpleDocNav(
        mk('E',
           product_type='eo',
           measurements={
               'red': {},
               'green': {},
               'extra': {},
           }))

    dss = (ds1, ds2, ds3, ds4, ds5)
    docs = [ds.doc for ds in dss]

    prefix = write_files({
        'products.yml': '''
name: eo
description: test product
metadata_type: with_measurements
metadata:
    product_type: eo

measurements:
    - name: red
      dtype: int16
      nodata: -999
      units: '1'

    - name: green
      dtype: int16
      nodata: -999
      units: '1'
    ''',
        'dataset.yml': yaml.safe_dump_all(docs),
    })

    clirunner(['metadata', 'add', p.metadata])
    r = clirunner(['product', 'add', str(prefix / 'products.yml')])

    pp = list(index.products.get_all())
    assert len(pp) == 1

    r = clirunner(['dataset', 'add', str(prefix / 'dataset.yml')])
    print(r.output)

    r = clirunner(['dataset', 'search', '-f', 'csv'])
    assert ds1.id not in r.output
    assert ds2.id not in r.output
    assert ds3.id not in r.output
    assert ds4.id in r.output
    assert ds5.id in r.output
def check_bad_yaml(clirunner, index):
    prefix = write_files({'broken.yml': '"'})
    r = clirunner(['dataset', 'add', str(prefix / 'broken.yml')])
    assert 'ERROR Failed reading documents from ' in r.output
def check_missing_metadata_doc(clirunner):
    prefix = write_files({'im.tiff': ''})
    r = clirunner(['dataset', 'add', str(prefix / 'im.tiff')])
    assert "ERROR No supported metadata docs found for dataset" in r.output