def add_stac(self, tile): if not tile.poly: return None item = pystac.Item( tile.name, mapping(tile.poly), list(tile.poly.bounds), datetime.datetime.now(), {'description': 'A USGS Lidar pointcloud in Entwine/EPT format'}) item.ext.enable(pystac.Extensions.POINTCLOUD) # icky s = tile.ept['schema'] p = [] for d in s: p.append(pystac.extensions.pointcloud.PointcloudSchema(d)) item.ext.pointcloud.apply(tile.num_points, 'lidar', 'ept', p, epsg='EPSG:3857') asset = pystac.Asset(tile.url, 'entwine', 'The ept.json for accessing data') item.add_asset('ept.json', asset) item_link = pystac.Link('self', f'{self.args.stac_base_url}{tile.name}.json') item_parent = pystac.Link('parent', f'{self.args.stac_base_url}catalog.json') item.add_links([item_link, item_parent]) return item
def add_source( self, source_item: pystac.Item, title: Optional[str] = None, assets: Optional[List[str]] = None, ) -> None: """Adds a link to a source item. Args: source_item : Source imagery that the LabelItem applies to. title : Optional title for the link. assets : Optional list of assets that determine what assets in the source item this label item data applies to. """ extra_fields = None if assets is not None: extra_fields = {"label:assets": assets} link = pystac.Link( "source", source_item, title=title, media_type=pystac.MediaType.JSON, extra_fields=extra_fields, ) self.obj.add_link(link)
def collect_items(sar_catalog, usfimr_collection): images = [] labels_collection = pystac.Collection( "labels", "labels", usfimr_collection.extent ) labels_collection = pystac.Collection( "usfimr_sar_labels", "usfimr_sar_labels", usfimr_collection.extent ) for flood_id in ["1", "2", "3", "15", "16"]: usfimr_item = usfimr_collection.get_item(flood_id) usfimr_geojson_asset = usfimr_item.assets["geojson"] usfimr_geojson_asset.set_owner(usfimr_item) usfimr_item_clone = usfimr_item.clone() # Reduce item assets to just the geojson as labels usfimr_item_clone.assets = {"labels": usfimr_item.assets["geojson"]} labels_collection.add_item(usfimr_item_clone) for sar_item in sar_catalog.get_child(flood_id).get_items(): sar_item_clone = sar_item.clone() sar_item_clone.links.append( pystac.Link( "labels", target=usfimr_item_clone, media_type="application/geo+json", link_type=pystac.LinkType.RELATIVE, ).set_owner(sar_item_clone) ) images.append(sar_item_clone) return images, labels_collection
def test_minimal(self) -> None: rel = "my rel" target = "https://example.com/a/b" link = pystac.Link(rel, target) self.assertEqual(target, link.get_href()) self.assertEqual(target, link.get_absolute_href()) expected_repr = f"<Link rel={rel} target={target}>" self.assertEqual(expected_repr, link.__repr__()) self.assertFalse(link.is_resolved()) expected_dict = {"rel": rel, "href": target} self.assertEqual(expected_dict, link.to_dict()) # Run the same tests on the clone. clone = link.clone() self.assertNotEqual(link, clone) self.assertEqual(target, clone.get_href()) self.assertEqual(target, clone.get_absolute_href()) self.assertEqual(expected_repr, clone.__repr__()) self.assertEqual(expected_dict, clone.to_dict()) # Try the modification methods. self.assertIsNone(link.owner) link.set_owner(None) self.assertIsNone(link.owner) link.set_owner(self.item) self.assertEqual(self.item, link.owner)
def create_and_upload_stac(cog_file: Path, s3_dst: str, year) -> Item: out_path = URL(f"{s3_dst}/{year}/") log.info("Item base creation") item = create_stac_item( str(cog_file), id=str(odc_uuid("gmw", "2.0", [cog_file.name.replace("tif", "")])), with_proj=True, input_datetime=datetime(int(year), 12, 31), properties={ "odc:product": "gmw", "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", }, ) log.info("links creation") item.set_self_href(str(out_path / f"gmw_{year}_stac-item.json")) item.add_links([ pystac.Link( target=str(SOURCE_URL_PATH / FILE_NAME.format(year=year)), title="Source file", rel=pystac.RelType.DERIVED_FROM, media_type="application/zip", ) ]) out_data = out_path / cog_file.name # Remove asset created by create_stac_item and add our own del item.assets["asset"] item.assets["mangrove"] = pystac.Asset( href=str(out_data), title="gmw-v1.0", media_type=pystac.MediaType.COG, roles=["data"], ) log.info(f"Item created {item.to_dict()}") log.info(f"Item validated {item.validate()}") log.info(f"Dump the data to S3 {str(cog_file)}") s3_dump( data=open(str(cog_file), "rb").read(), url=str(out_data), ACL="bucket-owner-full-control", ContentType="image/tiff", ) log.info(f"File written to {out_data}") log.info("Write STAC to S3") s3_dump( data=json.dumps(item.to_dict(), indent=2), url=item.self_href, ACL="bucket-owner-full-control", ContentType="application/json", ) log.info(f"STAC written to {item.self_href}") return item
def info(args): keys = list(get_resources(args.bucket, delimiter='/')) logger.debug('Querying boundaries for %d keys' % (len(keys))) queue = Process() catalog = pystac.Catalog('3dep', 'A catalog of USGS 3DEP Lidar hosted on AWS s3.', href=f'{args.stac_base_url}catalog.json', stac_extensions=['POINTCLOUD']) base = Path(args.stac_directory) base.mkdir(exist_ok=True, parents=True) count = 0 for k in keys: if count == args.limit and count != 0: break t = Task(args.bucket, k, args.resolution) queue.put(t) # logger.debug(t) count += 1 queue.do(count=20) l = Layer(args) stac_items = [] for r in queue.results: if not r.error: l.add(r) with open(base / f"{r.name}.json", 'w') as f: d = l.add_stac(r).to_dict() json.dump(d, f) link = pystac.Link('item', f'{args.stac_base_url}{r.name}.json') catalog.add_link(link) errors = [] for r in queue.results: if r.error: errors.append(r.error) f = open('errors.json','wb') f.write(json.dumps(errors).encode('utf-8')) f.close() with open(base / "catalog.json", 'w') as f: json.dump(catalog.to_dict(), f)
def doi(self, v: Optional[str]) -> None: if DOI_PROP in self.properties: if v == self.properties[DOI_PROP]: return remove_link(self.obj.links, self.properties[DOI_PROP]) if v is not None: self.properties[DOI_PROP] = v url = doi_to_url(v) self.obj.add_link(pystac.Link(ScientificRelType.CITE_AS, url))
def test_auto_title_not_found(self) -> None: extent = pystac.Extent.from_items([self.item]) collection = pystac.Collection( id="my_collection", description="Test Collection", extent=extent, ) link = pystac.Link("my rel", target=collection) self.assertEqual(None, link.title)
def test_serialize_link(self) -> None: href = "https://some-domain/path/to/item.json" title = "A Test Link" link = pystac.Link(pystac.RelType.SELF, href, pystac.MediaType.JSON, title) link_dict = link.to_dict() self.assertEqual(str(link_dict["rel"]), "self") self.assertEqual(str(link_dict["type"]), "application/json") self.assertEqual(link_dict["title"], title) self.assertEqual(link_dict["href"], href)
def test_auto_title_is_serialized(self) -> None: extent = pystac.Extent.from_items([self.item]) collection = pystac.Collection( id="my_collection", description="Test Collection", extent=extent, title="Collection Title", ) link = pystac.Link("my rel", target=collection) assert link.to_dict().get("title") == collection.title
def test_auto_title_when_resolved(self) -> None: extent = pystac.Extent.from_items([self.item]) collection = pystac.Collection( id="my_collection", description="Test Collection", extent=extent, title="Collection Title", ) link = pystac.Link("my rel", target=collection) self.assertEqual(collection.title, link.title)
def test_target_getter_setter(self) -> None: link = pystac.Link("my rel", target="./foo/bar.json") self.assertEqual(link.target, "./foo/bar.json") self.assertEqual(link.get_target_str(), "./foo/bar.json") link.target = self.item self.assertEqual(link.target, self.item) self.assertEqual(link.get_target_str(), self.item.get_self_href()) link.target = "./bar/foo.json" self.assertEqual(link.target, "./bar/foo.json")
def test_title_as_init_argument(self) -> None: link_title = "Link title" extent = pystac.Extent.from_items([self.item]) collection = pystac.Collection( id="my_collection", description="Test Collection", extent=extent, title="Collection Title", ) link = pystac.Link("my rel", title=link_title, target=collection) assert link.title == link_title assert link.to_dict().get("title") == link_title
def test_minimal(self): rel = 'my rel' target = 'https://example.com/a/b' link = pystac.Link(rel, target) self.assertEqual(target, link.get_href()) self.assertEqual(target, link.get_absolute_href()) expected_repr = f'<Link rel={rel} target={target}>' self.assertEqual(expected_repr, link.__repr__()) self.assertFalse(link.is_resolved()) expected_dict = {'rel': rel, 'href': target} self.assertEqual(expected_dict, link.to_dict()) # Run the same tests on the clone. clone = link.clone() self.assertNotEqual(link, clone) self.assertEqual(target, clone.get_href()) self.assertEqual(target, clone.get_absolute_href()) self.assertEqual(expected_repr, clone.__repr__()) self.assertEqual(expected_dict, clone.to_dict()) # Try the modification methods. self.assertIsNone(link.owner) link.set_owner(1) # A junk value. self.assertEqual(1, link.owner) link.set_owner(None) self.assertIsNone(link.owner) self.assertEqual(pystac.LinkType.ABSOLUTE, link.link_type) link.make_absolute() self.assertEqual(pystac.LinkType.ABSOLUTE, link.link_type) self.assertEqual(target, link.get_href()) self.assertEqual(target, link.get_absolute_href()) link.make_relative() self.assertEqual(pystac.LinkType.RELATIVE, link.link_type) self.assertEqual(target, link.get_href()) self.assertEqual(target, link.get_absolute_href()) link.set_owner(self.item) self.assertEqual(self.item, link.owner)
def test_relative(self) -> None: rel = "my rel" target = "../elsewhere" mime_type = "example/stac_thing" link = pystac.Link(rel, target, mime_type, "a title", extra_fields={"a": "b"}) expected_dict = { "rel": rel, "href": target, "type": "example/stac_thing", "title": "a title", "a": "b", } self.assertEqual(expected_dict, link.to_dict())
def test_relative(self): rel = 'my rel' target = '../elsewhere' mime_type = 'example/stac_thing' link = pystac.Link(rel, target, mime_type, 'a title', properties={'a': 'b'}, link_type=pystac.LinkType.RELATIVE) expected_dict = { 'rel': rel, 'href': target, 'type': 'example/stac_thing', 'title': 'a title', 'a': 'b' } self.assertEqual(expected_dict, link.to_dict()) self.assertEqual(pystac.LinkType.RELATIVE, link.link_type)
def test_no_auto_title_if_not_resolved(self) -> None: link = pystac.Link( "my rel", target="https://www.some-domain.com/path/to/thing.txt") assert link.title is None
def add_link(self, target: pystac.STACObject) -> None: if self.obj is not None: self.obj.add_link(pystac.Link(TEST_LINK_REL, target)) else: raise pystac.ExtensionAlreadyExistsError( f"{self} does not support links")
def test_get_target_str_no_href(self) -> None: self.item.remove_links("self") link = pystac.Link("self", target=self.item) self.item.add_link(link) self.assertIsNone(link.get_target_str())
def download_cci_lc(year: str, s3_dst: str, workdir: str, overwrite: bool = False): log = setup_logging() assets = {} cci_lc_version = get_version_from_year(year) name = f"{PRODUCT_NAME}_{year}_{cci_lc_version}" out_cog = URL(s3_dst) / year / f"{name}.tif" out_stac = URL(s3_dst) / year / f"{name}.stac-item.json" if s3_head_object(str(out_stac)) is not None and not overwrite: log.info(f"{out_stac} exists, skipping") return workdir = Path(workdir) if not workdir.exists(): workdir.mkdir(parents=True, exist_ok=True) # Create a temporary directory to work with tmpdir = mkdtemp(prefix=str(f"{workdir}/")) log.info(f"Working on {year} in the path {tmpdir}") if s3_head_object(str(out_cog)) is None or overwrite: log.info(f"Downloading {year}") try: local_file = Path(tmpdir) / f"{name}.zip" if not local_file.exists(): # Download the file c = cdsapi.Client() # We could also retrieve the object metadata from the CDS. # e.g. f = c.retrieve("series",{params}) | f.location = URL to download c.retrieve( "satellite-land-cover", { "format": "zip", "variable": "all", "version": cci_lc_version, "year": str(year), }, local_file, ) log.info(f"Downloaded file to {local_file}") else: log.info( f"File {local_file} exists, continuing without downloading" ) # Unzip the file log.info(f"Unzipping {local_file}") unzipped = None with zipfile.ZipFile(local_file, "r") as zip_ref: unzipped = local_file.parent / zip_ref.namelist()[0] zip_ref.extractall(tmpdir) # Process data ds = xr.open_dataset(unzipped) # Subset to Africa ulx, uly, lrx, lry = AFRICA_BBOX # Note: lats are upside down! ds_small = ds.sel(lat=slice(uly, lry), lon=slice(ulx, lrx)) ds_small = assign_crs(ds_small, crs="epsg:4326") # Create cog (in memory - :mem: returns bytes object) mem_dst = write_cog( ds_small.lccs_class, ":mem:", nodata=0, overview_resampling="nearest", ) # Write to s3 s3_dump(mem_dst, str(out_cog), ACL="bucket-owner-full-control") log.info(f"File written to {out_cog}") except Exception: log.exception(f"Failed to process {name}") exit(1) else: log.info(f"{out_cog} exists, skipping") assets["classification"] = pystac.Asset(href=str(out_cog), roles=["data"], media_type=pystac.MediaType.COG) # Write STAC document source_doc = ( "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover" ) item = create_stac_item( str(out_cog), id=str( odc_uuid("Copernicus Land Cover", cci_lc_version, [source_doc, name])), assets=assets, with_proj=True, properties={ "odc:product": PRODUCT_NAME, "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", }, ) item.add_links([ pystac.Link( target=source_doc, title="Source", rel=pystac.RelType.DERIVED_FROM, media_type="text/html", ) ]) s3_dump( json.dumps(item.to_dict(), indent=2), str(out_stac), ContentType="application/json", ACL="bucket-owner-full-control", ) log.info(f"STAC written to {out_stac}")
def download_and_cog_chirps( year: str, month: str, s3_dst: str, day: str = None, overwrite: bool = False, slack_url: str = None, ): # Cleaning and sanity checks s3_dst = s3_dst.rstrip("/") # Set up file strings if day is not None: # Set up a daily process in_file = f"chirps-v2.0.{year}.{month}.{day}.tif.gz" in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file) in_data = f"/vsigzip//vsicurl/{in_href}" if not check_for_url_existence(in_href): log.warning("Couldn't find the gzipped file, trying the .tif") in_file = f"chirps-v2.0.{year}.{month}.{day}.tif" in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file) in_data = f"/vsicurl/{in_href}" if not check_for_url_existence(in_href): log.error("Couldn't find the .tif file either, aborting") sys.exit(1) file_base = f"{s3_dst}/{year}/{month}/chirps-v2.0_{year}.{month}.{day}" out_data = f"{file_base}.tif" out_stac = f"{file_base}.stac-item.json" start_datetime = f"{year}-{month}-{day}T00:00:00Z" end_datetime = f"{year}-{month}-{day}T23:59:59Z" product_name = "rainfall_chirps_daily" else: # Set up a monthly process in_file = f"chirps-v2.0.{year}.{month}.tif.gz" in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file) in_data = f"/vsigzip//vsicurl/{in_href}" if not check_for_url_existence(in_href): log.warning("Couldn't find the gzipped file, trying the .tif") in_file = f"chirps-v2.0.{year}.{month}.tif" in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file) in_data = f"/vsicurl/{in_href}" if not check_for_url_existence(in_href): log.error("Couldn't find the .tif file either, aborting") sys.exit(1) file_base = f"{s3_dst}/chirps-v2.0_{year}.{month}" out_data = f"{file_base}.tif" out_stac = f"{file_base}.stac-item.json" _, end = calendar.monthrange(int(year), int(month)) start_datetime = f"{year}-{month}-01T00:00:00Z" end_datetime = f"{year}-{month}-{end}T23:59:59Z" product_name = "rainfall_chirps_monthly" # Set to 15 for the STAC metadata day = 15 try: # Check if file already exists log.info(f"Working on {in_file}") if not overwrite and s3_head_object(out_stac) is not None: log.warning(f"File {out_stac} already exists. Skipping.") return # COG and STAC with MemoryFile() as mem_dst: # Creating the COG, with a memory cache and no download. Shiny. cog_translate( in_data, mem_dst.name, cog_profiles.get("deflate"), in_memory=True, nodata=-9999, ) # Creating the STAC document with appropriate date range _, end = calendar.monthrange(int(year), int(month)) item = create_stac_item( mem_dst, id=str(odc_uuid("chirps", "2.0", [in_file])), with_proj=True, input_datetime=datetime(int(year), int(month), int(day)), properties={ "odc:processing_datetime": datetime_to_str(datetime.now()), "odc:product": product_name, "start_datetime": start_datetime, "end_datetime": end_datetime, }, ) item.set_self_href(out_stac) # Manually redo the asset del item.assets["asset"] item.assets["rainfall"] = pystac.Asset( href=out_data, title="CHIRPS-v2.0", media_type=pystac.MediaType.COG, roles=["data"], ) # Let's add a link to the source item.add_links([ pystac.Link( target=in_href, title="Source file", rel=pystac.RelType.DERIVED_FROM, media_type="application/gzip", ) ]) # Dump the data to S3 mem_dst.seek(0) log.info(f"Writing DATA to: {out_data}") s3_dump(mem_dst, out_data, ACL="bucket-owner-full-control") # Write STAC to S3 log.info(f"Writing STAC to: {out_stac}") s3_dump( json.dumps(item.to_dict(), indent=2), out_stac, ContentType="application/json", ACL="bucket-owner-full-control", ) # All done! log.info(f"Completed work on {in_file}") except Exception as e: message = f"Failed to handle {in_file} with error {e}" if slack_url is not None: send_slack_notification(slack_url, "Chirps Rainfall Monthly", message) log.exception(message) exit(1)
def test_path_like(self) -> None: rel = "some-rel" target = os.path.abspath("../elsewhere") link = pystac.Link(rel, target) self.assertEqual(os.fspath(link), target)
def get_link(self) -> Optional[pystac.Link]: """Gets a :class:`~pystac.Link` for the DOI for this publication. If :attr:`Publication.doi` is ``None``, this method will also return ``None``.""" if self.doi is None: return None return pystac.Link(ScientificRelType.CITE_AS, doi_to_url(self.doi))
def test_extend_invalid_object(self) -> None: link = pystac.Link("child", "https://some-domain.com/some/path/to.json") with self.assertRaises(pystac.ExtensionTypeError): StorageExtension.ext(link) # type: ignore
def render_metadata( self, ext: str = EXT_TIFF, processing_dt: Optional[datetime] = None) -> Dict[str, Any]: """ Put together STAC metadata document for the output of this task. """ if processing_dt is None: processing_dt = datetime.utcnow() product = self.product geobox = self.geobox region_code = product.region_code(self.tile_index) inputs = list(map(str, self._lineage())) properties: Dict[str, Any] = deepcopy(product.properties) properties["dtr:start_datetime"] = format_datetime( self.time_range.start) properties["dtr:end_datetime"] = format_datetime(self.time_range.end) properties["odc:processing_datetime"] = format_datetime( processing_dt, timespec="seconds") properties["odc:region_code"] = region_code properties["odc:product"] = product.name properties["odc:dataset_version"] = product.version geobox_wgs84 = geobox.extent.to_crs("epsg:4326", resolution=math.inf, wrapdateline=True) bbox = geobox_wgs84.boundingbox item = pystac.Item( id=str(self.uuid), geometry=geobox_wgs84.json, bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top], datetime=self.time_range.start.replace(tzinfo=timezone.utc), properties=properties, stac_extensions=["projection"], ) item.ext.projection.epsg = geobox.crs.epsg # Lineage last item.properties["odc:lineage"] = dict(inputs=inputs) # Add all the assets for band, path in self.paths(ext=ext).items(): asset = pystac.Asset( href=path, media_type="image/tiff; application=geotiff", roles=["data"], title=band, ) item.add_asset(band, asset) item.ext.projection.set_transform(geobox.transform, asset=asset) item.ext.projection.set_shape(geobox.shape, asset=asset) # Add links item.links.append( pystac.Link( rel="product_overview", media_type="application/json", target=product.href, )) item.links.append( pystac.Link( rel="self", media_type="application/json", target=self.metadata_path("absolute", ext="json"), )) return item.to_dict()
def render_metadata( product: OutputProduct, geobox: GeoBox, tile_index: TileIdx_xy, time_range: DateTimeRange, uuid: UUID, paths: Dict[str, str], metadata_path: str, processing_dt: Optional[datetime] = None, ) -> Dict[str, Any]: """ Put together STAC metadata document for the output from the task info. """ if processing_dt is None: processing_dt = datetime.utcnow() region_code = product.region_code(tile_index) inputs: List[str] = [] properties: Dict[str, Any] = deepcopy(product.properties) properties["dtr:start_datetime"] = format_datetime(time_range.start) properties["dtr:end_datetime"] = format_datetime(time_range.end) properties["odc:processing_datetime"] = format_datetime( processing_dt, timespec="seconds") properties["odc:region_code"] = region_code properties["odc:lineage"] = dict(inputs=inputs) properties["odc:product"] = product.name geobox_wgs84 = geobox.extent.to_crs("epsg:4326", resolution=math.inf, wrapdateline=True) bbox = geobox_wgs84.boundingbox item = pystac.Item( id=str(uuid), geometry=geobox_wgs84.json, bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top], datetime=time_range.start.replace(tzinfo=timezone.utc), properties=properties, ) # Enable the Projection extension item.ext.enable("projection") item.ext.projection.epsg = geobox.crs.epsg # Add all the assets for band, path in paths.items(): asset = pystac.Asset( href=path, media_type="image/tiff; application=geotiff", roles=["data"], title=band, ) item.add_asset(band, asset) item.ext.projection.set_transform(geobox.transform, asset=asset) item.ext.projection.set_shape(geobox.shape, asset=asset) # Add links item.links.append( pystac.Link( rel="product_overview", media_type="application/json", target=product.href, )) item.links.append( pystac.Link( rel="self", media_type="application/json", target=metadata_path, )) return item.to_dict()
def download_gls(year: str, s3_dst: str, workdir: Path, overwrite: bool = False): log = setup_logging() assets = {} out_stac = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}.stac-item.json" if s3_head_object(str(out_stac)) is not None and not overwrite: log.info(f"{out_stac} exists, skipping") return # Download the files for name, file in FILES.items(): # Create a temporary directory to work with with TemporaryDirectory(prefix=workdir) as tmpdir: log.info(f"Working on {file}") url = URL( BASE_URL.format( record_id=YEARS[year][1], year_key=YEARS[year][0], file=file ) ) dest_url = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}_{name}.tif" if s3_head_object(str(dest_url)) is None or overwrite: log.info(f"Downloading {url}") try: local_file = Path(tmpdir) / str(url.name) # Download the file download_file(url, local_file) log.info(f"Downloaded file to {local_file}") local_file_small = translate_file_deafrica_extent(local_file) log.info(f"Clipped Africa out and saved to {local_file_small}") resampling = "nearest" if name in DO_NEAREST else "bilinear" # Create a COG in memory and upload to S3 with MemoryFile() as mem_dst: # Creating the COG, with a memory cache and no download. Shiny. cog_translate( local_file_small, mem_dst.name, cog_profiles.get("deflate"), in_memory=True, nodata=255, overview_resampling=resampling, ) mem_dst.seek(0) s3_dump(mem_dst, str(dest_url), ACL="bucket-owner-full-control") log.info(f"File written to {dest_url}") except Exception: log.exception(f"Failed to process {url}") exit(1) else: log.info(f"{dest_url} exists, skipping") assets[name] = pystac.Asset( href=str(dest_url), roles=["data"], media_type=pystac.MediaType.COG ) # Write STAC document from the last-written file source_doc = f"https://zenodo.org/record/{YEARS[year][1]}" item = create_stac_item( str(dest_url), id=str(odc_uuid("Copernicus Global Land Cover", "3.0.1", [source_doc])), assets=assets, with_proj=True, properties={ "odc:product": PRODUCT_NAME, "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", }, ) item.add_links( [ pystac.Link( target=source_doc, title="Source", rel=pystac.RelType.DERIVED_FROM, media_type="text/html", ) ] ) s3_dump( json.dumps(item.to_dict(), indent=2), str(out_stac), ContentType="application/json", ACL="bucket-owner-full-control", ) log.info(f"STAC written to {out_stac}")
def main(): """ Pull Copernicus EU Rapid Mapping Activations data from the GeoRSS feed """ sentinel_oauth_id = os.environ.get("SENTINELHUB_OAUTH_ID") sentinel_oauth_secret = os.environ.get("SENTINELHUB_OAUTH_SECRET") if sentinel_oauth_id is None: raise ValueError("Must set SENTINELHUB_OAUTH_ID") if sentinel_oauth_secret is None: raise ValueError("Must set SENTINELHUB_OAUTH_SECRET") events_xml_url = "https://emergency.copernicus.eu/mapping/activations-rapid/feed" events_xml_file = Path("./data/copernicus-rapid-mapping-activations.xml") if not events_xml_file.is_file(): logger.info("Pulling {}...".format(events_xml_url)) urlretrieve(events_xml_url, str(events_xml_file)) event_xml_dir = Path("./data/event-xml") os.makedirs(event_xml_dir, exist_ok=True) # Generate a list of all unique CEMS products (combination of event, aoi, # monitoring type, revision and version) for all flood events in 2019 and 2020 products = [] events_root = ET.parse(events_xml_file).getroot() for event in events_root.iter("item"): category = event.find("category").text.strip().lower() if category != "flood": continue event_id = event.find("guid").text title = event.find("title").text rss_url = event.find("{http://www.iwg-sem.org/}activationRSS").text logger.info(title) description = event.find("description").text event_dts = re.findall( r"Date\/Time of Event \(UTC\):[</b>\s]*?(\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{2}:\d{2})", description, flags=re.MULTILINE, ) if len(event_dts) != 1: logger.warning("{}: Available event date times {}".format( title, event_dts)) raise AssertionError() event_datetime = datetime.strptime( event_dts[0], "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc) if event_datetime < datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc): continue event_country = event.find( "{http://www.iwg-sem.org/}activationAffectedCountries").text event_xml_file = Path(event_xml_dir, event_id).with_suffix(".xml") if not event_xml_file.is_file(): logger.info("\tPulling {} GeoRSS: {}...".format( event_id, event_xml_file)) urlretrieve(rss_url, event_xml_file) event_root = ET.parse(event_xml_file).getroot() for item in event_root.iter("item"): try: data_type = item.find("{http://www.gdacs.org/}cemsctype").text except AttributeError: data_type = "" try: product_type = item.find( "{http://www.gdacs.org/}cemsptype").text except AttributeError: product_type = "" # Only care about downloading VECTOR data for Delineation product # More info at https://emergency.copernicus.eu/mapping/ems/rapid-mapping-portfolio if not (data_type == "VECTOR" and (product_type == "DEL" or product_type == "GRA")): continue item_url = urlparse(item.find("link").text) _, _, product_id, version_id = item_url.path.lstrip("/").split("/") ( product_event_id, aoi_id, product_type_id, monitoring_type, revision_id, data_type_id, ) = product_id.split("_") # Some sanity checks to ensure we've parsed our product id string correctly assert event_id == product_event_id assert product_type_id == product_type assert data_type_id == "VECTORS" georss_polygon = item.find( "{http://www.georss.org/georss}polygon").text # Split string, group number pairs, convert to float and swap pairs to lon first polygon = Polygon( map( lambda x: (float(x[1]), float(x[0])), grouper(georss_polygon.split(" "), 2), )) event_product = EventProduct( # Rebuild product_id from scratch because we need to include version "_".join([ event_id, aoi_id, product_type_id, monitoring_type, revision_id, version_id, data_type_id, ]), event_id, event_country, aoi_id, event_datetime.timestamp(), polygon, data_type_id, product_type_id, monitoring_type, revision_id, version_id, urlunparse(item_url), ) products.append(event_product) df = gpd.GeoDataFrame(products) geojson_file = "./data/cems-rapid-mapping-flood-products-2019-2020.geojson" logger.info( "Writing GeoJSON of flood event products to {}".format(geojson_file)) df.to_file(geojson_file, driver="GeoJSON") sentinel_session = get_session(sentinel_oauth_id, sentinel_oauth_secret) catalog = pystac.Catalog( "copernicus-rapid-mapping-floods-2019-2020", "Copernicus Rapid Mapping provisions geospatial information within hours or days from the activation in support of emergency management activities immediately following a disaster. Standardised mapping products are provided: e.g. to ascertain the situation before the event (reference product), to roughly identify and assess the most affected locations (first estimate product), assess the geographical extent of the event (delineation product) or to evaluate the intensity and scope of the damage resulting from the event (grading product). This catalog contains a subset of products for flood events from 2019-2020 that intersect with Sentinel 2 L2A Chips.", title="Copernicus Rapid Mapping Floods 2019-2020", ) s2_collection = pystac.Collection( "Sentinel-2-L2A", "Sentinel 2 L2A images corresponding to CEMS rapid mapping floods", pystac.Extent( pystac.SpatialExtent([None, None, None, None]), pystac.TemporalExtent([( # TODO: Make this more specific by looping actual dts # after ingest datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc), datetime(2020, 12, 31, 23, 59, 59, tzinfo=timezone.utc), )]), ), ) catalog.add_child(s2_collection) # Loop Products grouped by event id, lookup Sentinel 2 matches for each # Product, and create STAC Items in catalog for any matches sorted_products = sorted(products, key=lambda x: x.event_id) for event_id, event_products in groupby(sorted_products, key=lambda x: x.event_id): for p in event_products: event_datetime = datetime.fromtimestamp(p.event_time, tz=timezone.utc) # Check for sentinel 2 results before anything else, so we # don't do unnecessary work. We'll use these results later # after we've created our STAC Item response = stac_search( p.geometry.bounds, "sentinel-2-l2a", event_datetime - timedelta(hours=12), event_datetime + timedelta(hours=12), sentinel_session, ).json() if len(response["features"]) < 1: logger.debug("No Sentinel 2 results for {}".format( p.product_id)) continue event_collection = catalog.get_child(event_id) if event_collection is None: event_collection = pystac.Collection( event_id, "", pystac.Extent( pystac.SpatialExtent([None, None, None, None]), pystac.TemporalExtent([(event_datetime, None)]), ), ) catalog.add_child(event_collection) pystac_item = pystac.Item( p.product_id, mapping(p.geometry), p.geometry.bounds, event_datetime, properties={ "aoi_id": p.aoi_id, "country": p.event_country, "event_id": p.event_id, "product_type": p.product_type, "data_type": p.data_type, "monitoring_type": p.monitoring_type, "revision": p.revision, "version": p.version, }, ) event_collection.add_item(pystac_item) url_link = pystac.Link("alternate", p.product_link, media_type="text/html") pystac_item.add_link(url_link) # Get or create Item in S2 collection for each match from # SentinelHub and add as links to our Product Item for feature in response["features"]: s2_item = s2_collection.get_item(feature["id"]) if s2_item is None: s2_item = pystac.Item.from_dict(feature) s2_collection.add_item(s2_item) s2_link = pystac.Link( "data", s2_item, link_type=pystac.LinkType.RELATIVE).set_owner(pystac_item) pystac_item.add_link(s2_link) logger.info("Created STAC Item {} with {} Sentinel 2 links".format( p.product_id, len(response["features"]))) # Set spatial extents for collection in catalog.get_children(): if not isinstance(collection, pystac.Collection): continue bounds = GeometryCollection( [shape(s.geometry) for s in collection.get_all_items()]).bounds collection.extent.spatial = pystac.SpatialExtent(bounds) catalog_root = "./data/catalog" logger.info("Writing STAC Catalog to {}...".format(catalog_root)) catalog.normalize_and_save(catalog_root, pystac.CatalogType.SELF_CONTAINED)
def test_resolve_stac_object_no_root_and_target_is_item(self) -> None: link = pystac.Link("my rel", target=self.item) link.resolve_stac_object()
def create_stac_item( mtl_xml_href: str, read_href_modifier: Optional[ReadHrefModifier] = None) -> pystac.Item: """Creates a Landsat 8 C2 L2 STAC Item. Reads data from a single scene of Landsat Collection 2 Level-2 Surface Reflectance Product data. Uses the MTL XML HREF as the bases for other files; assumes that all files are co-located in a directory or blob prefix. """ base_href = '_'.join(mtl_xml_href.split('_')[:-1]) # Remove the _MTL.txt mtl_metadata = MtlMetadata.from_file(mtl_xml_href, read_href_modifier) ang_href = ANG_ASSET_DEF.get_href(base_href) ang_metadata = AngMetadata.from_file(ang_href, read_href_modifier) scene_datetime = mtl_metadata.scene_datetime item = pystac.Item(id=mtl_metadata.scene_id, bbox=mtl_metadata.bbox, geometry=ang_metadata.get_scene_geometry( mtl_metadata.bbox), datetime=scene_datetime, properties={}) item.common_metadata.platform = L8_PLATFORM item.common_metadata.instruments = L8_INSTRUMENTS item.common_metadata.description = L8_ITEM_DESCRIPTION # eo item.ext.enable('eo') item.ext.eo.cloud_cover = mtl_metadata.cloud_cover # view item.ext.enable('view') item.ext.view.off_nadir = mtl_metadata.off_nadir item.ext.view.sun_elevation = mtl_metadata.sun_elevation # Sun Azimuth in landsat metadata is -180 to 180 from north, west being negative. # In STAC, it's 0 to 360 clockwise from north. sun_azimuth = mtl_metadata.sun_azimuth if sun_azimuth < 0.0: sun_azimuth = 360 + sun_azimuth item.ext.view.sun_azimuth = sun_azimuth # projection item.ext.enable('projection') item.ext.projection.epsg = mtl_metadata.epsg item.ext.projection.bbox = mtl_metadata.proj_bbox # landsat8 item.stac_extensions.append(L8_EXTENSION_SCHEMA) item.properties.update(**mtl_metadata.additional_metadata) item.properties['landsat8:scene_id'] = ang_metadata.scene_id # -- Add assets # Add common assets for asset_definition in COMMON_ASSET_DEFS: asset_definition.add_asset(item, mtl_metadata, base_href) # Add SR assets for asset_definition in SR_ASSET_DEFS: asset_definition.add_asset(item, mtl_metadata, base_href) # Add thermal assets, if this is a L2SP product if mtl_metadata.processing_level == 'L2SP': for asset_definition in THERMAL_ASSET_DEFS: asset_definition.add_asset(item, mtl_metadata, base_href) # -- Add links usgs_item_page = ( f"https://landsatlook.usgs.gov/stac-browser/collection02/level-2/standard/oli-tirs" f"/{scene_datetime.year}" f"/{mtl_metadata.wrs_path}/{mtl_metadata.wrs_row}" f"/{mtl_metadata.scene_id}") item.add_link( pystac.Link(rel="alternate", target=usgs_item_page, title="USGS stac-browser page", media_type="text/html")) return item