def test_stac(): product = gm_product(location='/tmp/') reader = TaskReader(str(TEST_DIR / 'test_tiles.db'), product) task = reader.load_task(reader.all_tiles[0]) stac_meta = task.render_metadata() odc_meta = stac_transform(stac_meta)
def bytes2ds_doc(data): if isinstance(data, bytes): data = data.decode("utf-8") stac_doc = json.loads(data) eo3_doc = stac_transform(stac_doc) ds_doc = prep_eo3(eo3_doc) return ds_doc
def transform_items( doc2ds: Doc2Dataset, items: Iterable[Tuple[Dict[str, Any], str, bool]] ) -> Generator[Tuple[dict, str], None, None]: for metadata, uri, relative in items: try: if relative: metadata = stac_transform(metadata) else: metadata = stac_transform_absolute(metadata) except KeyError as e: logging.error( f"Failed to handle item with KeyError: '{e}'\n The URI was {uri}" ) yield None, uri continue try: ds, err = doc2ds(metadata, uri) except ValueError as e: logging.error( f"Exception thrown when trying to create dataset: '{e}'\n The URI was {uri}" ) if ds is not None: yield ds, uri else: logging.error( f"Failed to create dataset with error {err}\n The URI was {uri}" ) yield None, uri
def create_stac(raster, product, platform, band_name, date_string, path): transform = None shape = None crs = None with rasterio.open(raster) as dataset: transform = dataset.transform shape = dataset.shape crs = dataset.crs.to_epsg() bounds = dataset.bounds geometry, bbox = get_geometry(bounds, crs) stac_dict = { "id": raster.stem.replace(" ", "_"), "type": "Feature", "stac_version": "1.0.0-beta.2", "stac_extensions": ["proj"], "properties": { "odc:product": product, "platform": platform, "datetime": date_string, "proj:epsg": crs }, "bbox": bbox, "geometry": geometry, "links": [{ "rel": "self", "href": pathlib.Path(path).joinpath( raster.with_suffix(".json")).as_posix() }], "assets": { band_name: { "title": f"Data file for {band_name}", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "roles": ["data"], "href": raster.stem + raster.suffix, "proj:shape": shape, "proj:transform": transform, } }, } with open(raster.with_suffix(".json"), "w") as f: json.dump(stac_dict, f, indent=2) with open(raster.with_suffix(".odc-dataset.json"), "w") as f: json.dump(stac_transform(stac_dict), f, indent=2) return None
def test_stac(test_db_path): from odc.stats._gm import StatsGMS2 product = StatsGMS2().product(location="/tmp/") reader = TaskReader(test_db_path, product) task = reader.load_task(reader.all_tiles[0]) stac_meta = task.render_metadata() odc_meta = stac_transform(stac_meta) # TODO: actually test content of odc_meta? assert isinstance(odc_meta, dict) stac_item = pystac.Item.from_dict(stac_meta) stac_item.validate()
def item_to_dataset( dc_index: index.Index, product_name: str, item: dict ) -> model.Dataset: doc2ds = Doc2Dataset(index=dc_index, products=[product_name]) uri, relative = guess_location(item) if relative: metadata = stac_transform(item) else: metadata = stac_transform_absolute(item) ds, err = doc2ds(metadata, uri) if ds is not None: return ds
def collect_datasets(data_folder: str): files = fetch_stac_json_files(data_folder) for fld, data_files in files: for fn in data_files: if not fn.endswith(".json"): continue print(f"processing {fn}") full_fn = Path(fld).joinpath(fn) with open(full_fn) as fhin: rendered = json.load(fhin) stac_doc = stac_transform(rendered) metapath = list( filter(lambda item: item["rel"] in "self", rendered["links"]) )[0]["href"] # TODO: if s3 access available use s3-to-dc directly yield list(from_metadata_stream([(metapath, stac_doc)], dc.index))[0][0]
def test_usgs_landsat_stac_transform(usgs_landsat_stac): transformed = stac_transform(usgs_landsat_stac)
def test_sentinel_stac_transform(sentinel_stac, sentinel_odc): actual_doc = stac_transform(sentinel_stac) do_diff(actual_doc, sentinel_odc)
def test_landsat_stac_transform(landsat_stac, landsat_odc): actual_doc = stac_transform(landsat_stac) do_diff(actual_doc, landsat_odc)
def test_lidar_stac_transform(lidar_stac): transformed = stac_transform(lidar_stac)
def cli( skip_lineage, fail_on_missing_lineage, verify_lineage, stac, odc_metadata_link, limit, update, update_if_exists, archive, allow_unsafe, record_path, region_code_list_uri, absolute, queue_name, product, ): """ Iterate through messages on an SQS queue and add them to datacube""" transform = None if stac: transform = lambda stat_doc: stac_transform(stat_doc, relative=not absolute) candidate_products = product.split() sqs = boto3.resource("sqs") queue = sqs.get_queue_by_name(QueueName=queue_name) # Do the thing dc = Datacube() success, failed = queue_to_odc( queue, dc, candidate_products, skip_lineage=skip_lineage, fail_on_missing_lineage=fail_on_missing_lineage, verify_lineage=verify_lineage, transform=transform, limit=limit, update=update, update_if_exists=update_if_exists, archive=archive, allow_unsafe=allow_unsafe, record_path=record_path, odc_metadata_link=odc_metadata_link, region_code_list_uri=region_code_list_uri, ) result_msg = "" if update: result_msg += f"Updated {success} Dataset(s), " elif archive: result_msg += f"Archived {success} Dataset(s), " else: result_msg += f"Added {success} Dataset(s), " result_msg += f"Failed {failed} Dataset(s)" print(result_msg) if failed > 0: sys.exit(failed)