Example #1
0
def check_open_with_dc(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    data_array = dc.load(product='ls5_nbar_albers',
                         variables=['blue'],
                         stack='variable')
    assert data_array.shape

    data_array = dc.load(product='ls5_nbar_albers',
                         latitude=(-34, -35),
                         longitude=(149, 150),
                         stack='variable')
    assert data_array.shape

    dataset = dc.load(product='ls5_nbar_albers', variables=['blue'])
    assert dataset['blue'].size

    dataset = dc.load(product='ls5_nbar_albers',
                      latitude=(-35.2, -35.3),
                      longitude=(149.1, 149.2))
    assert dataset['blue'].size

    data_array = dc.load(product='ls5_nbar_albers',
                         latitude=(-34, -35),
                         longitude=(149, 150),
                         variables=['blue'],
                         group_by='solar_day')

    products_df = dc.list_products()
    assert len(products_df)
    assert len(products_df[products_df['name'].isin(['ls5_nbar_albers'])])
    assert len(products_df[products_df['name'].isin(['ls5_pq_albers'])])

    assert len(dc.list_measurements())
def check_open_with_api(index):
    from datacube.api.core import Datacube
    datacube = Datacube(index=index)

    input_type_name = 'ls5_nbar_albers'
    input_type = datacube.index.datasets.types.get_by_name(input_type_name)

    geobox = GeoBox(200, 200, Affine(25, 0.0, 1500000, 0.0, -25, -3900000), CRS('EPSG:3577'))
    observations = datacube.product_observations('ls5_nbar_albers', geobox.extent)
    sources = datacube.product_sources(observations, lambda ds: ds.center_time, 'time',
                                       'seconds since 1970-01-01 00:00:00')
    data = datacube.product_data(sources, geobox, input_type.measurements.values())
    assert data.blue.shape == (1, 200, 200)
Example #3
0
def check_open_with_api(index):
    from datacube.api.core import Datacube
    datacube = Datacube(index=index)

    input_type_name = 'ls5_nbar_albers'
    input_type = datacube.index.datasets.types.get_by_name(input_type_name)

    geobox = GeoBox(200, 200, Affine(25, 0.0, 1500000, 0.0, -25, -3900000), CRS('EPSG:3577'))
    observations = datacube.product_observations(product='ls5_nbar_albers', geopolygon=geobox.extent)
    sources = datacube.product_sources(observations, lambda ds: ds.center_time, 'time',
                                       'seconds since 1970-01-01 00:00:00')
    data = datacube.product_data(sources, geobox, input_type.measurements.values())
    assert data.blue.shape == (1, 200, 200)
def check_open_with_dc(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    data_array = dc.load(product='ls5_nbar_albers', variables=['blue'], stack='variable')
    assert data_array.shape

    data_array = dc.load(product='ls5_nbar_albers', latitude=(-34, -35), longitude=(149, 150), stack='variable')
    assert data_array.shape

    dataset = dc.load(product='ls5_nbar_albers', variables=['blue'])
    assert dataset['blue'].size

    dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2))
    assert dataset['blue'].size

    data_array = dc.load(product='ls5_nbar_albers',
                         latitude=(-34, -35), longitude=(149, 150),
                         variables=['blue'], group_by='solar_day')

    products_df = dc.list_products()
    assert len(products_df)
    assert len(products_df[products_df['name'].isin(['ls5_nbar_albers'])])
    assert len(products_df[products_df['name'].isin(['ls5_pq_albers'])])

    assert len(dc.list_measurements())
Example #5
0
def test_end_to_end_multitime(clirunner, index, product_def, original_data):
    """Test simple indexing but for multiple measurements and wavelengths."""
    dc = Datacube(index=index)

    # Add the GEDI Dataset Types
    clirunner(["-v", "product", "add", str(GEDI_PRODUCT.dataset_types)])

    for idx, measurement in enumerate(product_def.measurements):
        for product_id in GEDI_PRODUCT_IDS:
            index_yaml = str(product_def.index_yaml).format(
                product_id=product_id.pid,
                measurement=measurement,
            )
            # Index the Datasets
            clirunner(["-v", "dataset", "add", str(index_yaml)])

        if idx == 0:  # Full check for the first measurement only
            # Check data for all product IDs
            check_open_with_dc_contents(dc, product_def, GEDI_PRODUCT_IDS,
                                        measurement, original_data)
            # check_open_with_grid_workflow(index)
            # Only test first product ID with dss
            check_load_via_dss(dc, product_def, GEDI_PRODUCT_IDS[:1],
                               measurement, original_data)
        else:
            check_open_with_dc_simple(dc, product_def, GEDI_PRODUCT_IDS,
                                      measurement)
Example #6
0
def ingest_work(config, source_type, output_type, tile, tile_index):
    # pylint: disable=too-many-locals
    _LOG.info('Starting task %s', tile_index)
    driver = storage_writer_by_name(config['storage']['driver'])

    if driver is None:
        _LOG.error('Failed to load storage driver %s', config['storage']['driver'])
        raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option')

    namemap = get_namemap(config)
    # TODO: get_measurements possibly changes dtype, not sure load_data would like that
    measurements = get_measurements(source_type, config)
    resampling = get_resampling(config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]

    datasets = tile.sources.sum().item()
    for dataset in datasets:
        if not dataset.uris:
            _LOG.error('Locationless dataset found in the database: %r', dataset)

    data = Datacube.load_data(tile.sources, tile.geobox, measurements,
                              resampling=resampling,
                              fuse_func=fuse_func)

    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources)
    file_uri = driver.mk_uri(file_path, config['storage'])

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=file_uri,
                            app_info=get_app_metadata(config['filename']),
                            valid_data=polygon_from_sources_extents(sources, tile.geobox))

    datasets = xr_apply(tile.sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    variable_params['dataset'] = {
        'chunksizes': (1,),
        'zlib': True,
        'complevel': 9,
    }

    driver_data = driver.write_dataset_to_storage(nudata, file_uri,
                                                  global_attributes=global_attributes,
                                                  variable_params=variable_params,
                                                  storage_config=config['storage'])

    if (driver_data is not None) and len(driver_data) > 0:
        datasets.attrs['driver_data'] = driver_data

    _LOG.info('Finished task %s', tile_index)

    return datasets
Example #7
0
def ingest_work(config, source_type, output_type, tile, tile_index):
    _LOG.info('Starting task %s', tile_index)
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func)
    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources, version=config['taskfile_version'])

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=file_path.absolute().as_uri(),
                            app_info=get_app_metadata(config, config['filename']),
                            valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox))

    datasets = xr_apply(tile.sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(nudata, file_path, global_attributes, variable_params)
    _LOG.info('Finished task %s', tile_index)

    return datasets
def test_new_xr_load(data_folder):
    base = "file://" + str(data_folder) + "/metadata.yml"

    rdr = mk_rio_driver()
    assert rdr is not None

    _bands = []

    def band_info_collector(bands, ctx):
        for b in bands:
            _bands.append(b)

    tee_new_load_context(rdr, band_info_collector)

    band_a = dict(name='a', path='test.tif')

    band_b = dict(name='b', band=2, path='test.tif')

    ds = mk_sample_dataset([band_a, band_b], base)

    sources = Datacube.group_datasets([ds], 'time')

    im, meta = rio_slurp(str(data_folder) + '/test.tif')
    measurements = [ds.type.measurements[n] for n in ('a', 'b')]

    xx, _ = xr_load(sources, meta.gbox, measurements, rdr)

    assert len(_bands) == 2

    assert im[0].shape == xx.a.isel(time=0).shape
    assert im[1].shape == xx.b.isel(time=0).shape

    np.testing.assert_array_equal(im[0], xx.a.values[0])
    np.testing.assert_array_equal(im[1], xx.b.values[0])
Example #9
0
def ingest_work(config, source_type, output_type, index, sources, geobox):
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.product_data(sources, geobox, measurements, fuse_func=fuse_func)
    nudata = data.rename(namemap)
    file_path = get_filename(config, index, sources)

    def _make_dataset(labels, sources):
        sources_union = union_points(*[source.extent.to_crs(geobox.crs).points for source in sources])
        valid_data = intersect_points(geobox.extent.points, sources_union)
        dataset = make_dataset(dataset_type=output_type,
                               sources=sources,
                               extent=geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(config, config['filename']),
                               valid_data=GeoPolygon(valid_data, geobox.crs))
        return dataset
    datasets = xr_apply(sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(nudata, global_attributes, variable_params, file_path)

    return datasets
Example #10
0
def check_legacy_open(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    data_array = dc.load(product='ls5_nbar_albers',
                         measurements=['blue'],
                         time='1992-03-23T23:14:25.500000',
                         use_threads=True)
    assert data_array['blue'].shape[0] == 1
    assert (data_array.blue != -999).any()

    # force fusing load by duplicating dataset
    dss = dc.find_datasets(product='ls5_nbar_albers',
                           time='1992-03-23T23:14:25.500000')

    assert len(dss) == 1

    dss = dss*2
    sources = dc.group_datasets(dss, query_group_by('time'))

    gbox = data_array.geobox
    mm = [dss[0].type.measurements['blue']]
    xx = dc.load_data(sources, gbox, mm)
    assert (xx == data_array).all()

    with rasterio.Env():
        xx_lazy = dc.load_data(sources, gbox, mm, dask_chunks={'time': 1})
        assert xx_lazy['blue'].data.dask
        assert xx_lazy.blue[0, :, :].equals(xx.blue[0, :, :])
Example #11
0
def dask_load(sources, geobox, measurements, dask_chunks,
              skip_broken_datasets=False):
    def data_func(measurement):
        return make_dask_array(sources, geobox, measurement,
                               skip_broken_datasets=skip_broken_datasets,
                               dask_chunks=dask_chunks)

    return Datacube.create_storage(OrderedDict((dim, sources.coords[dim]) for dim in sources.dims),
                                   geobox, measurements, data_func)
Example #12
0
def test_indexing(clirunner, index, product_def):
    """Test indexing features for 2D and 3D products.

    A few no-op indexing commands are tested as well as a simple load with shape
    check only.
    """
    product_id = GEDI_PRODUCT_IDS[0]
    measurement = product_def.measurements[0]
    index_yaml = str(product_def.index_yaml).format(
        product_id=product_id.pid,
        measurement=measurement,
    )

    # Add the GEDI Dataset Types
    clirunner(["-v", "product", "add", str(GEDI_PRODUCT.dataset_types)])

    # Index the Datasets
    #  - do test run first to increase test coverage
    clirunner(["-v", "dataset", "add", "--dry-run", str(index_yaml)])

    #  - do actual indexing
    clirunner(["-v", "dataset", "add", str(index_yaml)])

    #  - this will be no-op but with ignore lineage
    clirunner([
        "-v",
        "dataset",
        "add",
        "--confirm-ignore-lineage",
        str(index_yaml),
    ])

    # Test no-op update
    for policy in ["archive", "forget", "keep"]:
        clirunner([
            "-v",
            "dataset",
            "update",
            "--dry-run",
            "--location-policy",
            policy,
            str(index_yaml),
        ])

        # Test no changes needed update
        clirunner([
            "-v",
            "dataset",
            "update",
            "--location-policy",
            policy,
            str(index_yaml),
        ])

    dc = Datacube(index=index)
    check_open_with_dc_simple(dc, product_def, [product_id], measurement)
Example #13
0
def ingest_work(config, source_type, output_type, tile, tile_index):
    # pylint: disable=too-many-locals
    _LOG.info('Starting task %s', tile_index)
    driver = storage_writer_by_name(config['storage']['driver'])

    if driver is None:
        _LOG.error('Failed to load storage driver %s', config['storage']['driver'])
        raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option')

    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func)

    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources)

    def mk_uri(file_path):
        if driver.uri_scheme == "file":
            return file_path.absolute().as_uri()
        return '{}://{}'.format(driver.uri_scheme, file_path)

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=mk_uri(file_path),
                            app_info=get_app_metadata(config, config['filename']),
                            valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox))

    datasets = xr_apply(tile.sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    variable_params['dataset'] = {
        'chunksizes': (1,),
        'zlib': True,
        'complevel': 9,
    }

    storage_metadata = driver.write_dataset_to_storage(nudata, file_path,
                                                       global_attributes=global_attributes,
                                                       variable_params=variable_params,
                                                       storage_config=config['storage'])

    if (storage_metadata is not None) and len(storage_metadata) > 0:
        datasets.attrs['storage_metadata'] = storage_metadata

    _LOG.info('Finished task %s', tile_index)

    return datasets
Example #14
0
def ingest_cmd(index, config, dry_run, executor):
    _, config = next(read_documents(Path(config)))
    source_type = index.datasets.types.get_by_name(config['source_type'])
    if not source_type:
        _LOG.error("Source DatasetType %s does not exist",
                   config['source_type'])
#    print (source_type)
#    print ("abcdefghijklmnopqrstuvwxyz")
    output_type = morph_dataset_type(source_type, config)
    #    print (output_type)
    _LOG.info('Created DatasetType %s', output_type.name)
    output_type = index.datasets.types.add(output_type)

    datacube = Datacube(index=index)

    grid_spec = output_type.grid_spec
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    file_path_template = str(
        Path(config['location'], config['file_path_template']))

    bbox = BoundingBox(**config['ingestion_bounds'])
    tasks = find_diff(source_type, output_type, bbox, datacube)

    def ingest_work(tile_index, sources):
        geobox = GeoBox.from_grid_spec(grid_spec, tile_index)
        #        print ("in ingest.py in ingest_word")
        data = Datacube.product_data(sources, geobox, measurements)

        nudata = data.rename(namemap)

        file_path = file_path_template.format(
            tile_index=tile_index,
            start_time=to_datetime(
                sources.time.values[0]).strftime('%Y%m%d%H%M%S%f'),
            end_time=to_datetime(
                sources.time.values[-1]).strftime('%Y%m%d%H%M%S%f'))
        # TODO: algorithm params
        print("Writing product")
        nudatasets = write_product(nudata, sources, output_type,
                                   config['global_attributes'],
                                   variable_params, Path(file_path))
        return nudatasets

    do_work(tasks, ingest_work, index, executor)
    temp = str(Path(config['location']))
    files_path = temp + "/cache"
    if not os.path.isfile(temp + "/archive"):
        os.system("mkdir " + temp + "/archive")
    print("Compressing files")
    compress(files_path)
Example #15
0
def dask_load(sources,
              geobox,
              measurements,
              dask_chunks,
              skip_broken_datasets=False):
    def data_func(measurement):
        return make_dask_array(sources,
                               geobox,
                               measurement,
                               skip_broken_datasets=skip_broken_datasets,
                               dask_chunks=dask_chunks)

    return Datacube.create_storage(sources.coords, geobox, measurements,
                                   data_func)
def check_load_via_dss(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    dss = dc.find_datasets(product='ls5_nbar_albers')
    assert len(dss) > 0

    xx1 = dc.load(product='ls5_nbar_albers', measurements=['blue'])
    xx2 = dc.load(datasets=dss, measurements=['blue'])
    assert xx1.blue.shape
    assert (xx1.blue != -999).any()
    assert (xx1.blue == xx2.blue).all()

    xx2 = dc.load(datasets=iter(dss), measurements=['blue'])
    assert xx1.blue.shape
    assert (xx1.blue != -999).any()
    assert (xx1.blue == xx2.blue).all()

    with pytest.raises(ValueError):
        dc.load(measurements=['blue'])

    with pytest.raises(DeprecationWarning):
        dc.load(product='ls5_nbar_albers', stack=True)
Example #17
0
    def ingest_work(tile_index, sources):
        geobox = GeoBox.from_grid_spec(grid_spec, tile_index)
#        print ("in ingest.py in ingest_word")
        data = Datacube.product_data(sources, geobox, measurements)

        nudata = data.rename(namemap)

        file_path = file_path_template.format(tile_index=tile_index,
                                              start_time=to_datetime(sources.time.values[0]).strftime('%Y%m%d%H%M%S%f'),
                                              end_time=to_datetime(sources.time.values[-1]).strftime('%Y%m%d%H%M%S%f'))
        # TODO: algorithm params
        print ("Writing product")
        nudatasets = write_product(nudata, sources, output_type,
                                   config['global_attributes'], variable_params, Path(file_path))
        return nudatasets
Example #18
0
def ingest_work(driver_manager, config, source_type, output_type, tile,
                tile_index):
    _LOG.info('Starting task %s', tile_index)

    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.load_data(tile.sources,
                                  tile.geobox,
                                  measurements,
                                  fuse_func=fuse_func,
                                  driver_manager=driver_manager)
    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources)

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=file_path.absolute().as_uri(),
                            app_info=get_app_metadata(config,
                                                      config['filename']),
                            valid_data=GeoPolygon.from_sources_extents(
                                sources, tile.geobox))

    datasets = xr_apply(
        tile.sources, _make_dataset,
        dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    # Until ingest becomes a class and DriverManager an instance
    # variable, we call the constructor each time. DriverManager being
    # a singleton, there is little overhead, though.
    datasets.attrs['storage_output'] = driver_manager.write_dataset_to_storage(
        nudata, file_path, global_attributes, variable_params)
    _LOG.info('Finished task %s', tile_index)

    # When using multiproc executor, Driver Manager is a clone.
    if driver_manager.is_clone:
        driver_manager.close()

    return datasets
Example #19
0
def test_indexing_with_spectral_map(clirunner, index, dataset_types):
    """Test indexing features with spectral map."""
    product_id = GEDI_PRODUCT_IDS[0]
    product_def = GEDI_PRODUCTS["3D"]
    measurement = product_def.measurements[0]
    index_yaml = str(product_def.index_yaml).format(
        product_id=product_id.pid,
        measurement=measurement,
    )

    # Add the GEDI Dataset Types
    clirunner(["-v", "product", "add", str(dataset_types)])

    # Index the Dataset
    clirunner(["-v", "dataset", "add", str(index_yaml)])
    dc = Datacube(index=index)
    check_open_with_dc_simple(dc, product_def, [product_id], measurement)
Example #20
0
def xr_load(sources, geobox, measurements,
            skip_broken_datasets=False,
            use_threads=False):
    mk_new = get_loader(sources)

    data = Datacube.create_storage(OrderedDict((dim, sources.coords[dim]) for dim in sources.dims),
                                   geobox, measurements)

    # TODO: re-add use_threads
    for index, datasets in np.ndenumerate(sources.values):
        for m in measurements:
            t_slice = data[m.name].values[index]

            fuse_measurement(t_slice, datasets, geobox, m,
                             mk_new=mk_new,
                             skip_broken_datasets=skip_broken_datasets)

    return data
Example #21
0
    def ingest_work(tile_index, sources):
        geobox = GeoBox.from_grid_spec(grid_spec, tile_index)
        #        print ("in ingest.py in ingest_word")
        data = Datacube.product_data(sources, geobox, measurements)

        nudata = data.rename(namemap)

        file_path = file_path_template.format(
            tile_index=tile_index,
            start_time=to_datetime(
                sources.time.values[0]).strftime('%Y%m%d%H%M%S%f'),
            end_time=to_datetime(
                sources.time.values[-1]).strftime('%Y%m%d%H%M%S%f'))
        # TODO: algorithm params
        print("Writing product")
        nudatasets = write_product(nudata, sources, output_type,
                                   config['global_attributes'],
                                   variable_params, Path(file_path))
        return nudatasets
Example #22
0
def xr_load(sources,
            geobox,
            measurements,
            skip_broken_datasets=False,
            use_threads=False):
    mk_new = get_loader(sources)

    data = Datacube.create_storage(sources.coords, geobox, measurements)

    if use_threads:

        def work_load_data(index, datasets, m):
            t_slice = data[m.name].values[index]
            fuse_measurement(t_slice,
                             datasets,
                             geobox,
                             m,
                             mk_new=mk_new,
                             skip_broken_datasets=skip_broken_datasets)

        futures = []
        pool = ThreadPoolExecutor(cpu_count() * 2)
        for index, datasets in np.ndenumerate(sources.values):
            for m in measurements:
                futures.append(pool.submit(work_load_data, index, datasets, m))

        wait(futures)
    else:
        for index, datasets in np.ndenumerate(sources.values):
            for m in measurements:
                t_slice = data[m.name].values[index]

                fuse_measurement(t_slice,
                                 datasets,
                                 geobox,
                                 m,
                                 mk_new=mk_new,
                                 skip_broken_datasets=skip_broken_datasets)

    return data
Example #23
0
def ingest_work(config, source_type, output_type, index, sources, geobox):
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.product_data(sources,
                                     geobox,
                                     measurements,
                                     fuse_func=fuse_func)
    nudata = data.rename(namemap)
    file_path = get_filename(config, index, sources)

    def _make_dataset(labels, sources):
        sources_union = union_points(
            *[source.extent.to_crs(geobox.crs).points for source in sources])
        valid_data = intersect_points(geobox.extent.points, sources_union)
        dataset = make_dataset(dataset_type=output_type,
                               sources=sources,
                               extent=geobox.extent,
                               center_time=labels['time'],
                               uri=file_path.absolute().as_uri(),
                               app_info=get_app_metadata(
                                   config, config['filename']),
                               valid_data=GeoPolygon(valid_data, geobox.crs))
        return dataset

    datasets = xr_apply(
        sources, _make_dataset,
        dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(nudata, global_attributes, variable_params,
                            file_path)

    return datasets
Example #24
0
def check_open_with_grid_workflow(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    type_name = 'ls5_nbar_albers'
    dt = dc.index.datasets.types.get_by_name(type_name)

    from datacube.api.grid_workflow import GridWorkflow
    gw = GridWorkflow(dc, dt.grid_spec)

    cells = gw.list_cells(product=type_name)
    assert LBG_CELL in cells

    tiles = gw.list_tiles(product=type_name)
    assert tiles
    assert tiles[LBG_CELL]

    ts, tile = tiles[LBG_CELL].popitem()
    dataset_cell = gw.load(LBG_CELL, tile, measurements=['blue'])
    assert dataset_cell['blue'].size

    dataset_cell = gw.load(LBG_CELL, tile)
    assert all(m in dataset_cell
               for m in ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'])

    tiles = gw.list_tile_stacks(product=type_name)
    assert tiles
    assert tiles[LBG_CELL]

    tile = tiles[LBG_CELL]
    dataset_cell = gw.load(LBG_CELL, tile, measurements=['blue'])
    assert dataset_cell['blue'].size

    dataset_cell = gw.load(LBG_CELL, tile)
    assert all(m in dataset_cell
               for m in ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'])
Example #25
0
 def with_datacube(index, *args, **kwargs):
     return f(Datacube(index=index), *args, **kwargs)
Example #26
0
def test_end_to_end(clirunner, index, testdata_dir, ingest_configs,
                    datacube_env_name):
    """
    Loads two dataset configurations, then ingests a sample Landsat 5 scene

    One dataset configuration specifies Australian Albers Equal Area Projection,
    the other is simply latitude/longitude.

    The input dataset should be recorded in the index, and two sets of storage units
    should be created on disk and recorded in the index.
    """

    lbg_nbar = testdata_dir / 'lbg' / LBG_NBAR
    lbg_pq = testdata_dir / 'lbg' / LBG_PQ
    ls5_nbar_albers_ingest_config = testdata_dir / ingest_configs[
        'ls5_nbar_albers']
    ls5_pq_albers_ingest_config = testdata_dir / ingest_configs['ls5_pq_albers']

    # Add the LS5 Dataset Types
    clirunner(['-v', 'product', 'add', str(LS5_DATASET_TYPES)])

    # Index the Datasets
    #  - do test run first to increase test coverage
    clirunner(
        ['-v', 'dataset', 'add', '--dry-run',
         str(lbg_nbar),
         str(lbg_pq)])

    #  - do actual indexing
    clirunner(['-v', 'dataset', 'add', str(lbg_nbar), str(lbg_pq)])

    #  - this will be no-op but with ignore lineage
    clirunner([
        '-v', 'dataset', 'add', '--confirm-ignore-lineage',
        str(lbg_nbar),
        str(lbg_pq)
    ])

    # Test no-op update
    for policy in ['archive', 'forget', 'keep']:
        clirunner([
            '-v', 'dataset', 'update', '--dry-run', '--location-policy',
            policy,
            str(lbg_nbar),
            str(lbg_pq)
        ])

        # Test no changes needed update
        clirunner([
            '-v', 'dataset', 'update', '--location-policy', policy,
            str(lbg_nbar),
            str(lbg_pq)
        ])

    # TODO: test location update
    # 1. Make a copy of a file
    # 2. Call dataset update with archive/forget
    # 3. Check location

    # Ingest NBAR
    clirunner(['-v', 'ingest', '-c', str(ls5_nbar_albers_ingest_config)])

    # Ingest PQ
    clirunner(['-v', 'ingest', '-c', str(ls5_pq_albers_ingest_config)])

    dc = Datacube(index=index)
    assert isinstance(str(dc), str)
    assert isinstance(repr(dc), str)

    with pytest.raises(ValueError):
        dc.find_datasets(time='2019')  # no product supplied, raises exception

    check_open_with_dc(index)
    check_open_with_grid_workflow(index)
    check_load_via_dss(index)
Example #27
0
 def with_index(driver_manager, *args, **kwargs):
     return f(Datacube(driver_manager=driver_manager), *args, **kwargs)
Example #28
0
def check_open_with_dc(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], stack='variable')
    assert data_array.shape
    assert (data_array != -999).any()

    data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], time='1992-03-23T23:14:25.500000')
    assert data_array['blue'].shape[0] == 1
    assert (data_array.blue != -999).any()

    data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], latitude=-35.3, longitude=149.1)
    assert data_array['blue'].shape[1:] == (1, 1)
    assert (data_array.blue != -999).any()

    data_array = dc.load(product='ls5_nbar_albers', latitude=(-35, -36), longitude=(149, 150), stack='variable')
    assert data_array.ndim == 4
    assert 'variable' in data_array.dims
    assert (data_array != -999).any()

    with rasterio.Env():
        lazy_data_array = dc.load(product='ls5_nbar_albers', latitude=(-35, -36), longitude=(149, 150),
                                  stack='variable', dask_chunks={'time': 1, 'x': 1000, 'y': 1000})
        assert lazy_data_array.data.dask
        assert lazy_data_array.ndim == data_array.ndim
        assert 'variable' in lazy_data_array.dims
        assert lazy_data_array[1, :2, 950:1050, 950:1050].equals(data_array[1, :2, 950:1050, 950:1050])

    dataset = dc.load(product='ls5_nbar_albers', measurements=['blue'])
    assert dataset['blue'].size

    dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2))
    assert dataset['blue'].size

    with rasterio.Env():
        lazy_dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2),
                               dask_chunks={'time': 1})
        assert lazy_dataset['blue'].data.dask
        assert lazy_dataset.blue[:2, :100, :100].equals(dataset.blue[:2, :100, :100])
        assert lazy_dataset.isel(time=slice(0, 2), x=slice(950, 1050), y=slice(950, 1050)).equals(
            dataset.isel(time=slice(0, 2), x=slice(950, 1050), y=slice(950, 1050)))

    dataset_like = dc.load(product='ls5_nbar_albers', measurements=['blue'], like=dataset)
    assert (dataset.blue == dataset_like.blue).all()

    data_array = dc.load(product='ls5_nbar_albers',
                         latitude=(-35, -36), longitude=(149, 150),
                         measurements=['blue'], group_by='solar_day')

    dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2), align=(5, 20))
    assert dataset.geobox.affine.f % abs(dataset.geobox.affine.e) == 5
    assert dataset.geobox.affine.c % abs(dataset.geobox.affine.a) == 20
    dataset_like = dc.load(product='ls5_nbar_albers', measurements=['blue'], like=dataset)
    assert (dataset.blue == dataset_like.blue).all()

    products_df = dc.list_products()
    assert len(products_df)
    assert len(products_df[products_df['name'].isin(['ls5_nbar_albers'])])
    assert len(products_df[products_df['name'].isin(['ls5_pq_albers'])])

    assert len(dc.list_measurements())

    resamp = ['nearest', 'cubic', 'bilinear', 'cubic_spline', 'lanczos', 'average']
    results = {}

    # WTF
    def calc_max_change(da):
        midline = int(da.shape[0] * 0.5)
        a = int(abs(da[midline, :-1].data - da[midline, 1:].data).max())

        centerline = int(da.shape[1] * 0.5)
        b = int(abs(da[:-1, centerline].data - da[1:, centerline].data).max())
        return a + b

    for resamp_meth in resamp:
        dataset = dc.load(product='ls5_nbar_albers', measurements=['blue'],
                          latitude=(-35.28, -35.285), longitude=(149.15, 149.155),
                          output_crs='EPSG:4326', resolution=(-0.0000125, 0.0000125), resampling=resamp_meth)
        results[resamp_meth] = calc_max_change(dataset.blue.isel(time=0))

    assert results['cubic_spline'] < results['nearest']
    assert results['lanczos'] < results['average']