Exemple #1
0
def check_legacy_open(index):
    from datacube.api.core import Datacube
    dc = Datacube(index=index)

    data_array = dc.load(product='ls5_nbar_albers',
                         measurements=['blue'],
                         time='1992-03-23T23:14:25.500000',
                         use_threads=True)
    assert data_array['blue'].shape[0] == 1
    assert (data_array.blue != -999).any()

    # force fusing load by duplicating dataset
    dss = dc.find_datasets(product='ls5_nbar_albers',
                           time='1992-03-23T23:14:25.500000')

    assert len(dss) == 1

    dss = dss*2
    sources = dc.group_datasets(dss, query_group_by('time'))

    gbox = data_array.geobox
    mm = [dss[0].type.measurements['blue']]
    xx = dc.load_data(sources, gbox, mm)
    assert (xx == data_array).all()

    with rasterio.Env():
        xx_lazy = dc.load_data(sources, gbox, mm, dask_chunks={'time': 1})
        assert xx_lazy['blue'].data.dask
        assert xx_lazy.blue[0, :, :].equals(xx.blue[0, :, :])
Exemple #2
0
def ingest_work(config, source_type, output_type, tile, tile_index):
    _LOG.info('Starting task %s', tile_index)
    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func)
    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources, version=config['taskfile_version'])

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=file_path.absolute().as_uri(),
                            app_info=get_app_metadata(config, config['filename']),
                            valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox))

    datasets = xr_apply(tile.sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    write_dataset_to_netcdf(nudata, file_path, global_attributes, variable_params)
    _LOG.info('Finished task %s', tile_index)

    return datasets
Exemple #3
0
def ingest_work(config, source_type, output_type, tile, tile_index):
    # pylint: disable=too-many-locals
    _LOG.info('Starting task %s', tile_index)
    driver = storage_writer_by_name(config['storage']['driver'])

    if driver is None:
        _LOG.error('Failed to load storage driver %s', config['storage']['driver'])
        raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option')

    namemap = get_namemap(config)
    # TODO: get_measurements possibly changes dtype, not sure load_data would like that
    measurements = get_measurements(source_type, config)
    resampling = get_resampling(config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]

    datasets = tile.sources.sum().item()
    for dataset in datasets:
        if not dataset.uris:
            _LOG.error('Locationless dataset found in the database: %r', dataset)

    data = Datacube.load_data(tile.sources, tile.geobox, measurements,
                              resampling=resampling,
                              fuse_func=fuse_func)

    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources)
    file_uri = driver.mk_uri(file_path, config['storage'])

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=file_uri,
                            app_info=get_app_metadata(config['filename']),
                            valid_data=polygon_from_sources_extents(sources, tile.geobox))

    datasets = xr_apply(tile.sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    variable_params['dataset'] = {
        'chunksizes': (1,),
        'zlib': True,
        'complevel': 9,
    }

    driver_data = driver.write_dataset_to_storage(nudata, file_uri,
                                                  global_attributes=global_attributes,
                                                  variable_params=variable_params,
                                                  storage_config=config['storage'])

    if (driver_data is not None) and len(driver_data) > 0:
        datasets.attrs['driver_data'] = driver_data

    _LOG.info('Finished task %s', tile_index)

    return datasets
Exemple #4
0
def ingest_work(config, source_type, output_type, tile, tile_index):
    # pylint: disable=too-many-locals
    _LOG.info('Starting task %s', tile_index)
    driver = storage_writer_by_name(config['storage']['driver'])

    if driver is None:
        _LOG.error('Failed to load storage driver %s', config['storage']['driver'])
        raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option')

    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func)

    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources)

    def mk_uri(file_path):
        if driver.uri_scheme == "file":
            return file_path.absolute().as_uri()
        return '{}://{}'.format(driver.uri_scheme, file_path)

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=mk_uri(file_path),
                            app_info=get_app_metadata(config, config['filename']),
                            valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox))

    datasets = xr_apply(tile.sources, _make_dataset, dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    variable_params['dataset'] = {
        'chunksizes': (1,),
        'zlib': True,
        'complevel': 9,
    }

    storage_metadata = driver.write_dataset_to_storage(nudata, file_path,
                                                       global_attributes=global_attributes,
                                                       variable_params=variable_params,
                                                       storage_config=config['storage'])

    if (storage_metadata is not None) and len(storage_metadata) > 0:
        datasets.attrs['storage_metadata'] = storage_metadata

    _LOG.info('Finished task %s', tile_index)

    return datasets
Exemple #5
0
def ingest_work(driver_manager, config, source_type, output_type, tile,
                tile_index):
    _LOG.info('Starting task %s', tile_index)

    namemap = get_namemap(config)
    measurements = get_measurements(source_type, config)
    variable_params = get_variable_params(config)
    global_attributes = config['global_attributes']

    with datacube.set_options(reproject_threads=1):
        fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')]
        data = Datacube.load_data(tile.sources,
                                  tile.geobox,
                                  measurements,
                                  fuse_func=fuse_func,
                                  driver_manager=driver_manager)
    nudata = data.rename(namemap)
    file_path = get_filename(config, tile_index, tile.sources)

    def _make_dataset(labels, sources):
        return make_dataset(product=output_type,
                            sources=sources,
                            extent=tile.geobox.extent,
                            center_time=labels['time'],
                            uri=file_path.absolute().as_uri(),
                            app_info=get_app_metadata(config,
                                                      config['filename']),
                            valid_data=GeoPolygon.from_sources_extents(
                                sources, tile.geobox))

    datasets = xr_apply(
        tile.sources, _make_dataset,
        dtype='O')  # Store in Dataarray to associate Time -> Dataset
    nudata['dataset'] = datasets_to_doc(datasets)

    # Until ingest becomes a class and DriverManager an instance
    # variable, we call the constructor each time. DriverManager being
    # a singleton, there is little overhead, though.
    datasets.attrs['storage_output'] = driver_manager.write_dataset_to_storage(
        nudata, file_path, global_attributes, variable_params)
    _LOG.info('Finished task %s', tile_index)

    # When using multiproc executor, Driver Manager is a clone.
    if driver_manager.is_clone:
        driver_manager.close()

    return datasets