Exemple #1
0
def check_open_with_dc(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    data_array = dc.load(product='ls5_nbar_albers',
                         variables=['blue'],
                         stack='variable')
    assert data_array.shape

    data_array = dc.load(product='ls5_nbar_albers',
                         latitude=(-34, -35),
                         longitude=(149, 150),
                         stack='variable')
    assert data_array.shape

    dataset = dc.load(product='ls5_nbar_albers', variables=['blue'])
    assert dataset['blue'].size

    dataset = dc.load(product='ls5_nbar_albers',
                      latitude=(-35.2, -35.3),
                      longitude=(149.1, 149.2))
    assert dataset['blue'].size

    data_array = dc.load(product='ls5_nbar_albers',
                         latitude=(-34, -35),
                         longitude=(149, 150),
                         variables=['blue'],
                         group_by='solar_day')

    products_df = dc.list_products()
    assert len(products_df)
    assert len(products_df[products_df['name'].isin(['ls5_nbar_albers'])])
    assert len(products_df[products_df['name'].isin(['ls5_pq_albers'])])

    assert len(dc.list_measurements())
Exemple #2
0
def check_legacy_open(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    data_array = dc.load(product='ls5_nbar_albers',
                         measurements=['blue'],
                         time='1992-03-23T23:14:25.500000',
                         use_threads=True)
    assert data_array['blue'].shape[0] == 1
    assert (data_array.blue != -999).any()

    # force fusing load by duplicating dataset
    dss = dc.find_datasets(product='ls5_nbar_albers',
                           time='1992-03-23T23:14:25.500000')

    assert len(dss) == 1

    dss = dss*2
    sources = dc.group_datasets(dss, query_group_by('time'))

    gbox = data_array.geobox
    mm = [dss[0].type.measurements['blue']]
    xx = dc.load_data(sources, gbox, mm)
    assert (xx == data_array).all()

    with rasterio.Env():
        xx_lazy = dc.load_data(sources, gbox, mm, dask_chunks={'time': 1})
        assert xx_lazy['blue'].data.dask
        assert xx_lazy.blue[0, :, :].equals(xx.blue[0, :, :])
Exemple #3
0
def test_end_to_end_multitime(clirunner, index, product_def, original_data):
    """Test simple indexing but for multiple measurements and wavelengths."""
    dc = Datacube(index=index)

    # Add the GEDI Dataset Types
    clirunner(["-v", "product", "add", str(GEDI_PRODUCT.dataset_types)])

    for idx, measurement in enumerate(product_def.measurements):
        for product_id in GEDI_PRODUCT_IDS:
            index_yaml = str(product_def.index_yaml).format(
                product_id=product_id.pid,
                measurement=measurement,
            )
            # Index the Datasets
            clirunner(["-v", "dataset", "add", str(index_yaml)])

        if idx == 0:  # Full check for the first measurement only
            # Check data for all product IDs
            check_open_with_dc_contents(dc, product_def, GEDI_PRODUCT_IDS,
                                        measurement, original_data)
            # check_open_with_grid_workflow(index)
            # Only test first product ID with dss
            check_load_via_dss(dc, product_def, GEDI_PRODUCT_IDS[:1],
                               measurement, original_data)
        else:
            check_open_with_dc_simple(dc, product_def, GEDI_PRODUCT_IDS,
                                      measurement)
Exemple #4
0
def test_indexing(clirunner, index, product_def):
    """Test indexing features for 2D and 3D products.

    A few no-op indexing commands are tested as well as a simple load with shape
    check only.
    """
    product_id = GEDI_PRODUCT_IDS[0]
    measurement = product_def.measurements[0]
    index_yaml = str(product_def.index_yaml).format(
        product_id=product_id.pid,
        measurement=measurement,
    )

    # Add the GEDI Dataset Types
    clirunner(["-v", "product", "add", str(GEDI_PRODUCT.dataset_types)])

    # Index the Datasets
    #  - do test run first to increase test coverage
    clirunner(["-v", "dataset", "add", "--dry-run", str(index_yaml)])

    #  - do actual indexing
    clirunner(["-v", "dataset", "add", str(index_yaml)])

    #  - this will be no-op but with ignore lineage
    clirunner([
        "-v",
        "dataset",
        "add",
        "--confirm-ignore-lineage",
        str(index_yaml),
    ])

    # Test no-op update
    for policy in ["archive", "forget", "keep"]:
        clirunner([
            "-v",
            "dataset",
            "update",
            "--dry-run",
            "--location-policy",
            policy,
            str(index_yaml),
        ])

        # Test no changes needed update
        clirunner([
            "-v",
            "dataset",
            "update",
            "--location-policy",
            policy,
            str(index_yaml),
        ])

    dc = Datacube(index=index)
    check_open_with_dc_simple(dc, product_def, [product_id], measurement)
Exemple #5
0
def ingest_cmd(index, config, dry_run, executor):
    _, config = next(read_documents(Path(config)))
    source_type = index.datasets.types.get_by_name(config['source_type'])
    if not source_type:
        _LOG.error("Source DatasetType %s does not exist",
                   config['source_type'])
#    print (source_type)
#    print ("abcdefghijklmnopqrstuvwxyz")
    output_type = morph_dataset_type(source_type, config)
    #    print (output_type)
    _LOG.info('Created DatasetType %s', output_type.name)
    output_type = index.datasets.types.add(output_type)

    datacube = Datacube(index=index)

    grid_spec = output_type.grid_spec
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    file_path_template = str(
        Path(config['location'], config['file_path_template']))

    bbox = BoundingBox(**config['ingestion_bounds'])
    tasks = find_diff(source_type, output_type, bbox, datacube)

    def ingest_work(tile_index, sources):
        geobox = GeoBox.from_grid_spec(grid_spec, tile_index)
        #        print ("in ingest.py in ingest_word")
        data = Datacube.product_data(sources, geobox, measurements)

        nudata = data.rename(namemap)

        file_path = file_path_template.format(
            tile_index=tile_index,
            start_time=to_datetime(
                sources.time.values[0]).strftime('%Y%m%d%H%M%S%f'),
            end_time=to_datetime(
                sources.time.values[-1]).strftime('%Y%m%d%H%M%S%f'))
        # TODO: algorithm params
        print("Writing product")
        nudatasets = write_product(nudata, sources, output_type,
                                   config['global_attributes'],
                                   variable_params, Path(file_path))
        return nudatasets

    do_work(tasks, ingest_work, index, executor)
    temp = str(Path(config['location']))
    files_path = temp + "/cache"
    if not os.path.isfile(temp + "/archive"):
        os.system("mkdir " + temp + "/archive")
    print("Compressing files")
    compress(files_path)
Exemple #6
0
def check_open_with_api(index):
    from datacube.api.core import Datacube
    datacube = Datacube(index=index)

    input_type_name = 'ls5_nbar_albers'
    input_type = datacube.index.datasets.types.get_by_name(input_type_name)

    geobox = GeoBox(200, 200, Affine(25, 0.0, 1500000, 0.0, -25, -3900000), CRS('EPSG:3577'))
    observations = datacube.product_observations(product='ls5_nbar_albers', geopolygon=geobox.extent)
    sources = datacube.product_sources(observations, lambda ds: ds.center_time, 'time',
                                       'seconds since 1970-01-01 00:00:00')
    data = datacube.product_data(sources, geobox, input_type.measurements.values())
    assert data.blue.shape == (1, 200, 200)
Exemple #7
0
def test_indexing_with_spectral_map(clirunner, index, dataset_types):
    """Test indexing features with spectral map."""
    product_id = GEDI_PRODUCT_IDS[0]
    product_def = GEDI_PRODUCTS["3D"]
    measurement = product_def.measurements[0]
    index_yaml = str(product_def.index_yaml).format(
        product_id=product_id.pid,
        measurement=measurement,
    )

    # Add the GEDI Dataset Types
    clirunner(["-v", "product", "add", str(dataset_types)])

    # Index the Dataset
    clirunner(["-v", "dataset", "add", str(index_yaml)])
    dc = Datacube(index=index)
    check_open_with_dc_simple(dc, product_def, [product_id], measurement)
def check_load_via_dss(index):
    dc = Datacube(index=index)

    dss = dc.find_datasets(product='ls5_nbar_albers')
    assert len(dss) > 0

    xx1 = dc.load(product='ls5_nbar_albers', measurements=['blue'])
    xx2 = dc.load(datasets=dss, measurements=['blue'])
    assert xx1.blue.shape
    assert (xx1.blue != -999).any()
    assert (xx1.blue == xx2.blue).all()

    xx2 = dc.load(datasets=iter(dss), measurements=['blue'])
    assert xx1.blue.shape
    assert (xx1.blue != -999).any()
    assert (xx1.blue == xx2.blue).all()

    with pytest.raises(ValueError):
        dc.load(measurements=['blue'])
Exemple #9
0
def check_open_with_grid_workflow(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    type_name = 'ls5_nbar_albers'
    dt = dc.index.datasets.types.get_by_name(type_name)

    from datacube.api.grid_workflow import GridWorkflow
    gw = GridWorkflow(dc, dt.grid_spec)

    cells = gw.list_cells(product=type_name)
    assert LBG_CELL in cells

    tiles = gw.list_tiles(product=type_name)
    assert tiles
    assert tiles[LBG_CELL]

    ts, tile = tiles[LBG_CELL].popitem()
    dataset_cell = gw.load(LBG_CELL, tile, measurements=['blue'])
    assert dataset_cell['blue'].size

    dataset_cell = gw.load(LBG_CELL, tile)
    assert all(m in dataset_cell
               for m in ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'])

    tiles = gw.list_tile_stacks(product=type_name)
    assert tiles
    assert tiles[LBG_CELL]

    tile = tiles[LBG_CELL]
    dataset_cell = gw.load(LBG_CELL, tile, measurements=['blue'])
    assert dataset_cell['blue'].size

    dataset_cell = gw.load(LBG_CELL, tile)
    assert all(m in dataset_cell
               for m in ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'])
Exemple #10
0
 def with_datacube(index, *args, **kwargs):
     return f(Datacube(index=index), *args, **kwargs)
def check_open_with_dc(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], stack='variable')
    assert data_array.shape
    assert (data_array != -999).any()

    data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], time='1992-03-23T23:14:25.500000')
    assert data_array['blue'].shape[0] == 1
    assert (data_array.blue != -999).any()

    data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], latitude=-35.3, longitude=149.1)
    assert data_array['blue'].shape[1:] == (1, 1)
    assert (data_array.blue != -999).any()

    data_array = dc.load(product='ls5_nbar_albers', latitude=(-35, -36), longitude=(149, 150), stack='variable')
    assert data_array.ndim == 4
    assert 'variable' in data_array.dims
    assert (data_array != -999).any()

    with rasterio.Env():
        lazy_data_array = dc.load(product='ls5_nbar_albers', latitude=(-35, -36), longitude=(149, 150),
                                  stack='variable', dask_chunks={'time': 1, 'x': 1000, 'y': 1000})
        assert lazy_data_array.data.dask
        assert lazy_data_array.ndim == data_array.ndim
        assert 'variable' in lazy_data_array.dims
        assert lazy_data_array[1, :2, 950:1050, 950:1050].equals(data_array[1, :2, 950:1050, 950:1050])

    dataset = dc.load(product='ls5_nbar_albers', measurements=['blue'])
    assert dataset['blue'].size

    dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2))
    assert dataset['blue'].size

    with rasterio.Env():
        lazy_dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2),
                               dask_chunks={'time': 1})
        assert lazy_dataset['blue'].data.dask
        assert lazy_dataset.blue[:2, :100, :100].equals(dataset.blue[:2, :100, :100])
        assert lazy_dataset.isel(time=slice(0, 2), x=slice(950, 1050), y=slice(950, 1050)).equals(
            dataset.isel(time=slice(0, 2), x=slice(950, 1050), y=slice(950, 1050)))

    dataset_like = dc.load(product='ls5_nbar_albers', measurements=['blue'], like=dataset)
    assert (dataset.blue == dataset_like.blue).all()

    data_array = dc.load(product='ls5_nbar_albers',
                         latitude=(-35, -36), longitude=(149, 150),
                         measurements=['blue'], group_by='solar_day')

    dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2), align=(5, 20))
    assert dataset.geobox.affine.f % abs(dataset.geobox.affine.e) == 5
    assert dataset.geobox.affine.c % abs(dataset.geobox.affine.a) == 20
    dataset_like = dc.load(product='ls5_nbar_albers', measurements=['blue'], like=dataset)
    assert (dataset.blue == dataset_like.blue).all()

    products_df = dc.list_products()
    assert len(products_df)
    assert len(products_df[products_df['name'].isin(['ls5_nbar_albers'])])
    assert len(products_df[products_df['name'].isin(['ls5_pq_albers'])])

    assert len(dc.list_measurements())

    resamp = ['nearest', 'cubic', 'bilinear', 'cubic_spline', 'lanczos', 'average']
    results = {}

    # WTF
    def calc_max_change(da):
        midline = int(da.shape[0] * 0.5)
        a = int(abs(da[midline, :-1].data - da[midline, 1:].data).max())

        centerline = int(da.shape[1] * 0.5)
        b = int(abs(da[:-1, centerline].data - da[1:, centerline].data).max())
        return a + b

    for resamp_meth in resamp:
        dataset = dc.load(product='ls5_nbar_albers', measurements=['blue'],
                          latitude=(-35.28, -35.285), longitude=(149.15, 149.155),
                          output_crs='EPSG:4326', resolution=(-0.0000125, 0.0000125), resampling=resamp_meth)
        results[resamp_meth] = calc_max_change(dataset.blue.isel(time=0))

    assert results['cubic_spline'] < results['nearest']
    assert results['lanczos'] < results['average']
Exemple #12
0
 def with_index(driver_manager, *args, **kwargs):
     return f(Datacube(driver_manager=driver_manager), *args, **kwargs)
def test_end_to_end(clirunner, index, testdata_dir, ingest_configs,
                    datacube_env_name):
    """
    Loads two dataset configurations, then ingests a sample Landsat 5 scene

    One dataset configuration specifies Australian Albers Equal Area Projection,
    the other is simply latitude/longitude.

    The input dataset should be recorded in the index, and two sets of storage units
    should be created on disk and recorded in the index.
    """

    lbg_nbar = testdata_dir / 'lbg' / LBG_NBAR
    lbg_pq = testdata_dir / 'lbg' / LBG_PQ
    ls5_nbar_albers_ingest_config = testdata_dir / ingest_configs[
        'ls5_nbar_albers']
    ls5_pq_albers_ingest_config = testdata_dir / ingest_configs['ls5_pq_albers']

    # Add the LS5 Dataset Types
    clirunner(['-v', 'product', 'add', str(LS5_DATASET_TYPES)])

    # Index the Datasets
    #  - do test run first to increase test coverage
    clirunner(
        ['-v', 'dataset', 'add', '--dry-run',
         str(lbg_nbar),
         str(lbg_pq)])

    #  - do actual indexing
    clirunner(['-v', 'dataset', 'add', str(lbg_nbar), str(lbg_pq)])

    #  - this will be no-op but with ignore lineage
    clirunner([
        '-v', 'dataset', 'add', '--confirm-ignore-lineage',
        str(lbg_nbar),
        str(lbg_pq)
    ])

    # Test no-op update
    for policy in ['archive', 'forget', 'keep']:
        clirunner([
            '-v', 'dataset', 'update', '--dry-run', '--location-policy',
            policy,
            str(lbg_nbar),
            str(lbg_pq)
        ])

        # Test no changes needed update
        clirunner([
            '-v', 'dataset', 'update', '--location-policy', policy,
            str(lbg_nbar),
            str(lbg_pq)
        ])

    # TODO: test location update
    # 1. Make a copy of a file
    # 2. Call dataset update with archive/forget
    # 3. Check location

    # Ingest NBAR
    clirunner(['-v', 'ingest', '-c', str(ls5_nbar_albers_ingest_config)])

    # Ingest PQ
    clirunner(['-v', 'ingest', '-c', str(ls5_pq_albers_ingest_config)])

    dc = Datacube(index=index)
    assert isinstance(str(dc), str)
    assert isinstance(repr(dc), str)

    with pytest.raises(ValueError):
        dc.find_datasets(time='2019')  # no product supplied, raises exception

    check_open_with_dc(index)
    check_open_with_grid_workflow(index)
    check_load_via_dss(index)