def get_driver_from_config(config): driver_name = config['storage']['driver'] driver = storage_writer_by_name(driver_name) if driver is None: click.echo('Failed to load requested storage driver: ' + driver_name) sys.exit(2) return driver
def ingest_work(config, source_type, output_type, tile, tile_index): # pylint: disable=too-many-locals _LOG.info('Starting task %s', tile_index) driver = storage_writer_by_name(config['storage']['driver']) if driver is None: _LOG.error('Failed to load storage driver %s', config['storage']['driver']) raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option') namemap = get_namemap(config) # TODO: get_measurements possibly changes dtype, not sure load_data would like that measurements = get_measurements(source_type, config) resampling = get_resampling(config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] datasets = tile.sources.sum().item() for dataset in datasets: if not dataset.uris: _LOG.error('Locationless dataset found in the database: %r', dataset) data = Datacube.load_data(tile.sources, tile.geobox, measurements, resampling=resampling, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, tile_index, tile.sources) file_uri = driver.mk_uri(file_path, config['storage']) def _make_dataset(labels, sources): return make_dataset(product=output_type, sources=sources, extent=tile.geobox.extent, center_time=labels['time'], uri=file_uri, app_info=get_app_metadata(config['filename']), valid_data=polygon_from_sources_extents(sources, tile.geobox)) datasets = xr_apply(tile.sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) variable_params['dataset'] = { 'chunksizes': (1,), 'zlib': True, 'complevel': 9, } driver_data = driver.write_dataset_to_storage(nudata, file_uri, global_attributes=global_attributes, variable_params=variable_params, storage_config=config['storage']) if (driver_data is not None) and len(driver_data) > 0: datasets.attrs['driver_data'] = driver_data _LOG.info('Finished task %s', tile_index) return datasets
def ingest_work(config, source_type, output_type, tile, tile_index): # pylint: disable=too-many-locals _LOG.info('Starting task %s', tile_index) driver = storage_writer_by_name(config['storage']['driver']) if driver is None: _LOG.error('Failed to load storage driver %s', config['storage']['driver']) raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option') namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, tile_index, tile.sources) def mk_uri(file_path): if driver.uri_scheme == "file": return file_path.absolute().as_uri() return '{}://{}'.format(driver.uri_scheme, file_path) def _make_dataset(labels, sources): return make_dataset(product=output_type, sources=sources, extent=tile.geobox.extent, center_time=labels['time'], uri=mk_uri(file_path), app_info=get_app_metadata(config, config['filename']), valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox)) datasets = xr_apply(tile.sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) variable_params['dataset'] = { 'chunksizes': (1,), 'zlib': True, 'complevel': 9, } storage_metadata = driver.write_dataset_to_storage(nudata, file_path, global_attributes=global_attributes, variable_params=variable_params, storage_config=config['storage']) if (storage_metadata is not None) and len(storage_metadata) > 0: datasets.attrs['storage_metadata'] = storage_metadata _LOG.info('Finished task %s', tile_index) return datasets
def init_dea(index: Index, with_permissions: bool, log_header=print_header, log=print_): """ Create or update a DEA configured ODC instance. """ log_header(f"ODC init of {index.url}") was_created = index.init_db(with_default_types=False, with_permissions=with_permissions) if was_created: log('Created.') else: log('Updated.') log('Checking indexes/views.') index.metadata_types.check_field_indexes( allow_table_lock=True, rebuild_indexes=False, rebuild_views=True, ) log_header('Checking DEA metadata types') # Add DEA metadata types, products. for _, md_type_def in read_documents(DEA_MD_TYPES): md = index.metadata_types.add( index.metadata_types.from_doc(md_type_def)) log(f"{md.name}") log_header('Checking DEA products') for _, product_def in read_documents(*DEA_PRODUCTS_DIR.glob('*.yaml')): product = index.products.add_document(product_def) log(f"{product.name}") log_header('Checking DEA ingested definitions') for path in DEA_INGESTION_DIR.glob('*.yaml'): ingest_config = ingest.load_config_from_file(path) driver_name = ingest_config['storage']['driver'] driver = storage_writer_by_name(driver_name) if driver is None: raise ValueError("No driver found for {}".format(driver_name)) source_type, output_type = ingest.ensure_output_type( index, ingest_config, driver.format, allow_product_changes=True) log(f"{output_type.name:<20}\t\t← {source_type.name}")