def run_it(dataset_paths): doc_stream = ui_path_doc_stream(dataset_paths, logger=_LOG, uri=True) dss = dataset_stream(doc_stream, ds_resolve) index_datasets(dss, index, auto_add_lineage=auto_add_lineage, dry_run=dry_run)
def item2dataset_cli(stac_collection, dc_product, url, outdir, max_items, engine_file, datacube_config, verbose, access_token, advanced_filter): _filter = {"collections": [stac_collection]} if advanced_filter: _filter = {**_filter, **prepare_advanced_filter(advanced_filter)} stac_service = stac.STAC(url, False, access_token=access_token) dc_index = datacube_index(datacube_config) features = create_feature_collection_from_stac_elements( stac_service, int(max_items), _filter) odc_datasets = stac2odc.item.item2dataset(engine_file, dc_product, features, dc_index, verbose=verbose) odc_datasets_definition_files = write_odc_element_in_yaml_file( odc_datasets, outdir) # add datasets definitions on datacube index # code adapted from: https://github.com/opendatacube/datacube-core/blob/develop/datacube/scripts/dataset.py ds_resolve = Doc2Dataset(dc_index, [dc_product]) doc_stream = remap_uri_from_doc( ui_path_doc_stream(odc_datasets_definition_files, uri=True)) datasets_on_stream = dataset_stream(doc_stream, ds_resolve) logger_message(f"Adding datasets", logger.info, True) for dataset in datasets_on_stream: try: dc_index.datasets.add(dataset, with_lineage=True) except (ValueError, MissingRecordError): logger_message(f"Error to add dataset ({dataset.local_uri})", logger.warning, True)
def load_datasets(path, ds_resolve): for uri, ds in ui_path_doc_stream(path): dataset, err = ds_resolve(ds, uri) if dataset is None: _LOG.error('dataset is empty', error=str(err)) continue is_consistent, reason = check_dataset_consistent(dataset) if not is_consistent: _LOG.error("dataset inconsistency", dataset=dataset.id, reason=str(reason)) continue yield dataset
def test_ui_path_doc_stream(httpserver): filename = 'dataset_metadata.yaml' file_content = '' out_dir = write_files({filename: file_content}) httpserver.expect_request(filename).respond_with_data(file_content) input_paths = [ Path(out_dir) / 'dataset_metadata.yaml', httpserver.url_for(filename) ] for input_path, (doc, resolved_path) in zip(input_paths, ui_path_doc_stream(input_paths)): assert doc == {} assert input_path == resolved_path
def update_cmd(index, keys_that_can_change, dry_run, location_policy, dataset_paths): def loc_action(action, new_ds, existing_ds, action_name): if len(existing_ds.uris) == 0: return None if len(existing_ds.uris) > 1: _LOG.warning("Refusing to %s old location, there are several", action_name) return None new_uri, = new_ds.uris old_uri, = existing_ds.uris if new_uri == old_uri: return None if dry_run: echo('Will {} old location {}, and add new one {}'.format(action_name, old_uri, new_uri)) return True return action(existing_ds.id, old_uri) def loc_archive(new_ds, existing_ds): return loc_action(index.datasets.archive_location, new_ds, existing_ds, 'archive') def loc_forget(new_ds, existing_ds): return loc_action(index.datasets.remove_location, new_ds, existing_ds, 'forget') def loc_keep(new_ds, existing_ds): return None update_loc = dict(archive=loc_archive, forget=loc_forget, keep=loc_keep)[location_policy] updates_allowed = parse_update_rules(keys_that_can_change) success, fail = 0, 0 for dataset, existing_ds in load_datasets_for_update( ui_path_doc_stream(dataset_paths, logger=_LOG, uri=True), index): _LOG.info('Matched %s', dataset) if location_policy != 'keep': if len(existing_ds.uris) > 1: # TODO: pass if not dry_run: try: index.datasets.update(dataset, updates_allowed=updates_allowed) update_loc(dataset, existing_ds) success += 1 echo('Updated %s' % dataset.id) except ValueError as e: fail += 1 echo('Failed to update %s: %s' % (dataset.id, e)) else: if update_dry_run(index, updates_allowed, dataset): update_loc(dataset, existing_ds) success += 1 else: fail += 1 echo('%d successful, %d failed' % (success, fail))