def get_stac_item_template(self, URLs): ''' read first geotiff to get STAC Item template (returns pystac.Item) ''' template = bandsDict[self.bands[0]]['template'] first_url = URLs[0].replace(template, self.bands[0]) productType = bandsDict[self.bands[0]]['name'] index1 = productTypeDict[productType]['index1'] index2 = productTypeDict[productType]['index2'] date, _ = self.datesFromGrimpName(os.path.basename(first_url), index1=index1, index2=index2) # collection = first_url.split('/')[-3], item = rio_stac.create_stac_item( first_url, input_datetime=date, asset_media_type=str(pystac.MediaType.COG), with_proj=True, with_raster=True, ) self.dtype = \ item.assets['asset'].extra_fields['raster:bands'][0]['data_type'] # Could remove: #['links'] #['assets']['asset']['roles'] # Remove statistics and histogram, b/c only applies to first item.assets['asset'].extra_fields['raster:bands'][0].pop('statistics') item.assets['asset'].extra_fields['raster:bands'][0].pop('histogram') return item
def create_and_upload_stac(cog_file: Path, s3_dst: str, year) -> Item: out_path = URL(f"{s3_dst}/{year}/") log.info("Item base creation") item = create_stac_item( str(cog_file), id=str(odc_uuid("gmw", "2.0", [cog_file.name.replace("tif", "")])), with_proj=True, input_datetime=datetime(int(year), 12, 31), properties={ "odc:product": "gmw", "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", }, ) log.info("links creation") item.set_self_href(str(out_path / f"gmw_{year}_stac-item.json")) item.add_links([ pystac.Link( target=str(SOURCE_URL_PATH / FILE_NAME.format(year=year)), title="Source file", rel=pystac.RelType.DERIVED_FROM, media_type="application/zip", ) ]) out_data = out_path / cog_file.name # Remove asset created by create_stac_item and add our own del item.assets["asset"] item.assets["mangrove"] = pystac.Asset( href=str(out_data), title="gmw-v1.0", media_type=pystac.MediaType.COG, roles=["data"], ) log.info(f"Item created {item.to_dict()}") log.info(f"Item validated {item.validate()}") log.info(f"Dump the data to S3 {str(cog_file)}") s3_dump( data=open(str(cog_file), "rb").read(), url=str(out_data), ACL="bucket-owner-full-control", ContentType="image/tiff", ) log.info(f"File written to {out_data}") log.info("Write STAC to S3") s3_dump( data=json.dumps(item.to_dict(), indent=2), url=item.self_href, ACL="bucket-owner-full-control", ContentType="application/json", ) log.info(f"STAC written to {item.self_href}") return item
def stac( input, input_datetime, extension, collection, property, id, asset_name, asset_href, asset_mediatype, output, ): """Rasterio stac cli.""" property = property or {} if not input_datetime: input_datetime = datetime.datetime.utcnow() else: if "/" in input_datetime: start_datetime, end_datetime = input_datetime.split("/") property["start_datetime"] = datetime_to_str( str_to_datetime(start_datetime)) property["end_datetime"] = datetime_to_str( str_to_datetime(end_datetime)) input_datetime = None else: input_datetime = str_to_datetime(input_datetime) if asset_mediatype and asset_mediatype != "auto": asset_mediatype = MediaType[asset_mediatype] extensions = [e for e in extension if e] item = create_stac_item( input, input_datetime=input_datetime, extensions=extensions, collection=collection, properties=property, id=id, asset_name=asset_name, asset_href=asset_href, asset_media_type=asset_mediatype, ) if output: with open(output, "w") as f: f.write(json.dumps(item.to_dict(), separators=(",", ":"))) else: click.echo(json.dumps(item.to_dict(), separators=(",", ":")))
def process_uri_tile( uri_tile: Tuple[str, str, str], product: str, dc: Datacube, doc2ds: Doc2Dataset, update_if_exists: bool = True, ) -> Tuple[pystac.Item, str]: product_name = f"dem_{product}" uri, tile = uri_tile properties = { "odc:product": product_name, "odc:region_code": tile, "start_datetime": "1900-01-01", "end_datetime": "2100-01-01", } with rasterio.Env(aws_unsigned=True, GDAL_DISABLE_READDIR_ON_OPEN="EMPTY_DIR"): item = create_stac_item( uri, collection=product_name, with_proj=True, properties=properties, asset_media_type=pystac.MediaType.COG, asset_name="elevation", ) index_update_dataset( stac_transform(item.to_dict()), uri, dc, doc2ds, update_if_exists=update_if_exists, allow_unsafe=True, ) return True
def create_mosaic( dc: Datacube, product: str, out_product: str, time: Tuple[str, str], time_str: str, bands: Tuple[str], s3_output_root: str, split_bands: bool = False, resolution: int = 120, overwrite: bool = False, ): log = setup_logging() log.info(f"Creating mosaic for {product} over {time}") client = start_local_dask() assets = {} data = dc.load( product=product, time=time, resolution=(-resolution, resolution), dask_chunks={"x": 2048, "y": 2048}, measurements=bands, ) # This is a bad idea, we run out of memory # data.persist() if not split_bands: log.info("Creating a single tif file") out_file = _get_path(s3_output_root, out_product, time_str, "tif") exists = s3_head_object(out_file) is not None skip_writing = not (not exists or overwrite) try: asset, _ = _save_opinionated_cog( data, out_file, skip_writing=skip_writing, ) except ValueError: log.exception( "Failed to create COG, please check that you only have one timestep in the period." ) exit(1) assets[bands[0]] = asset if skip_writing: log.info(f"File exists, and overwrite is False. Not writing {out_file}") else: log.info(f"Finished writing: {asset.href}") else: log.info("Creating multiple tif files") for band in bands: out_file = _get_path( s3_output_root, out_product, time_str, "tif", band=band ) exists = s3_head_object(out_file) is not None skip_writing = not (not exists or overwrite) try: asset, band = _save_opinionated_cog( data=data, out_file=out_file, band=band, skip_writing=skip_writing, ) except ValueError: log.exception( "Failed to create COG, please check that you only have one timestep in the period." ) exit(1) assets[band] = asset if skip_writing: log.info(f"File exists, and overwrite is False. Not writing {out_file}") else: log.info(f"Finished writing: {asset.href}") # Aggressively heavy handed, but we get memory leaks otherwise client.restart() out_stac_file = _get_path(s3_output_root, out_product, time_str, "stac-item.json") item = create_stac_item( assets[bands[0]].href, id=f"{product}_{time_str}", assets=assets, with_proj=True, properties={ "odc:product": out_product, "start_datetime": f"{time[0]}T00:00:00Z", "end_datetime": f"{time[1]}T23:59:59Z", }, ) item.set_self_href(out_stac_file) log.info(f"Writing STAC: {out_stac_file}") client = s3_client(aws_unsigned=False) s3_dump( data=json.dumps(item.to_dict(), indent=2), url=item.self_href, ACL="bucket-owner-full-control", ContentType="application/json", s3=client, )
def download_gls(year: str, s3_dst: str, workdir: Path, overwrite: bool = False): log = setup_logging() assets = {} out_stac = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}.stac-item.json" if s3_head_object(str(out_stac)) is not None and not overwrite: log.info(f"{out_stac} exists, skipping") return # Download the files for name, file in FILES.items(): # Create a temporary directory to work with with TemporaryDirectory(prefix=workdir) as tmpdir: log.info(f"Working on {file}") url = URL( BASE_URL.format( record_id=YEARS[year][1], year_key=YEARS[year][0], file=file ) ) dest_url = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}_{name}.tif" if s3_head_object(str(dest_url)) is None or overwrite: log.info(f"Downloading {url}") try: local_file = Path(tmpdir) / str(url.name) # Download the file download_file(url, local_file) log.info(f"Downloaded file to {local_file}") local_file_small = translate_file_deafrica_extent(local_file) log.info(f"Clipped Africa out and saved to {local_file_small}") resampling = "nearest" if name in DO_NEAREST else "bilinear" # Create a COG in memory and upload to S3 with MemoryFile() as mem_dst: # Creating the COG, with a memory cache and no download. Shiny. cog_translate( local_file_small, mem_dst.name, cog_profiles.get("deflate"), in_memory=True, nodata=255, overview_resampling=resampling, ) mem_dst.seek(0) s3_dump(mem_dst, str(dest_url), ACL="bucket-owner-full-control") log.info(f"File written to {dest_url}") except Exception: log.exception(f"Failed to process {url}") exit(1) else: log.info(f"{dest_url} exists, skipping") assets[name] = pystac.Asset( href=str(dest_url), roles=["data"], media_type=pystac.MediaType.COG ) # Write STAC document from the last-written file source_doc = f"https://zenodo.org/record/{YEARS[year][1]}" item = create_stac_item( str(dest_url), id=str(odc_uuid("Copernicus Global Land Cover", "3.0.1", [source_doc])), assets=assets, with_proj=True, properties={ "odc:product": PRODUCT_NAME, "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", }, ) item.add_links( [ pystac.Link( target=source_doc, title="Source", rel=pystac.RelType.DERIVED_FROM, media_type="text/html", ) ] ) s3_dump( json.dumps(item.to_dict(), indent=2), str(out_stac), ContentType="application/json", ACL="bucket-owner-full-control", ) log.info(f"STAC written to {out_stac}")
def download_cci_lc(year: str, s3_dst: str, workdir: str, overwrite: bool = False): log = setup_logging() assets = {} cci_lc_version = get_version_from_year(year) name = f"{PRODUCT_NAME}_{year}_{cci_lc_version}" out_cog = URL(s3_dst) / year / f"{name}.tif" out_stac = URL(s3_dst) / year / f"{name}.stac-item.json" if s3_head_object(str(out_stac)) is not None and not overwrite: log.info(f"{out_stac} exists, skipping") return workdir = Path(workdir) if not workdir.exists(): workdir.mkdir(parents=True, exist_ok=True) # Create a temporary directory to work with tmpdir = mkdtemp(prefix=str(f"{workdir}/")) log.info(f"Working on {year} in the path {tmpdir}") if s3_head_object(str(out_cog)) is None or overwrite: log.info(f"Downloading {year}") try: local_file = Path(tmpdir) / f"{name}.zip" if not local_file.exists(): # Download the file c = cdsapi.Client() # We could also retrieve the object metadata from the CDS. # e.g. f = c.retrieve("series",{params}) | f.location = URL to download c.retrieve( "satellite-land-cover", { "format": "zip", "variable": "all", "version": cci_lc_version, "year": str(year), }, local_file, ) log.info(f"Downloaded file to {local_file}") else: log.info( f"File {local_file} exists, continuing without downloading" ) # Unzip the file log.info(f"Unzipping {local_file}") unzipped = None with zipfile.ZipFile(local_file, "r") as zip_ref: unzipped = local_file.parent / zip_ref.namelist()[0] zip_ref.extractall(tmpdir) # Process data ds = xr.open_dataset(unzipped) # Subset to Africa ulx, uly, lrx, lry = AFRICA_BBOX # Note: lats are upside down! ds_small = ds.sel(lat=slice(uly, lry), lon=slice(ulx, lrx)) ds_small = assign_crs(ds_small, crs="epsg:4326") # Create cog (in memory - :mem: returns bytes object) mem_dst = write_cog( ds_small.lccs_class, ":mem:", nodata=0, overview_resampling="nearest", ) # Write to s3 s3_dump(mem_dst, str(out_cog), ACL="bucket-owner-full-control") log.info(f"File written to {out_cog}") except Exception: log.exception(f"Failed to process {name}") exit(1) else: log.info(f"{out_cog} exists, skipping") assets["classification"] = pystac.Asset(href=str(out_cog), roles=["data"], media_type=pystac.MediaType.COG) # Write STAC document source_doc = ( "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover" ) item = create_stac_item( str(out_cog), id=str( odc_uuid("Copernicus Land Cover", cci_lc_version, [source_doc, name])), assets=assets, with_proj=True, properties={ "odc:product": PRODUCT_NAME, "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", }, ) item.add_links([ pystac.Link( target=source_doc, title="Source", rel=pystac.RelType.DERIVED_FROM, media_type="text/html", ) ]) s3_dump( json.dumps(item.to_dict(), indent=2), str(out_stac), ContentType="application/json", ACL="bucket-owner-full-control", ) log.info(f"STAC written to {out_stac}")
def download_and_cog_chirps( year: str, month: str, s3_dst: str, day: str = None, overwrite: bool = False, slack_url: str = None, ): # Cleaning and sanity checks s3_dst = s3_dst.rstrip("/") # Set up file strings if day is not None: # Set up a daily process in_file = f"chirps-v2.0.{year}.{month}.{day}.tif.gz" in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file) in_data = f"/vsigzip//vsicurl/{in_href}" if not check_for_url_existence(in_href): log.warning("Couldn't find the gzipped file, trying the .tif") in_file = f"chirps-v2.0.{year}.{month}.{day}.tif" in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file) in_data = f"/vsicurl/{in_href}" if not check_for_url_existence(in_href): log.error("Couldn't find the .tif file either, aborting") sys.exit(1) file_base = f"{s3_dst}/{year}/{month}/chirps-v2.0_{year}.{month}.{day}" out_data = f"{file_base}.tif" out_stac = f"{file_base}.stac-item.json" start_datetime = f"{year}-{month}-{day}T00:00:00Z" end_datetime = f"{year}-{month}-{day}T23:59:59Z" product_name = "rainfall_chirps_daily" else: # Set up a monthly process in_file = f"chirps-v2.0.{year}.{month}.tif.gz" in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file) in_data = f"/vsigzip//vsicurl/{in_href}" if not check_for_url_existence(in_href): log.warning("Couldn't find the gzipped file, trying the .tif") in_file = f"chirps-v2.0.{year}.{month}.tif" in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file) in_data = f"/vsicurl/{in_href}" if not check_for_url_existence(in_href): log.error("Couldn't find the .tif file either, aborting") sys.exit(1) file_base = f"{s3_dst}/chirps-v2.0_{year}.{month}" out_data = f"{file_base}.tif" out_stac = f"{file_base}.stac-item.json" _, end = calendar.monthrange(int(year), int(month)) start_datetime = f"{year}-{month}-01T00:00:00Z" end_datetime = f"{year}-{month}-{end}T23:59:59Z" product_name = "rainfall_chirps_monthly" # Set to 15 for the STAC metadata day = 15 try: # Check if file already exists log.info(f"Working on {in_file}") if not overwrite and s3_head_object(out_stac) is not None: log.warning(f"File {out_stac} already exists. Skipping.") return # COG and STAC with MemoryFile() as mem_dst: # Creating the COG, with a memory cache and no download. Shiny. cog_translate( in_data, mem_dst.name, cog_profiles.get("deflate"), in_memory=True, nodata=-9999, ) # Creating the STAC document with appropriate date range _, end = calendar.monthrange(int(year), int(month)) item = create_stac_item( mem_dst, id=str(odc_uuid("chirps", "2.0", [in_file])), with_proj=True, input_datetime=datetime(int(year), int(month), int(day)), properties={ "odc:processing_datetime": datetime_to_str(datetime.now()), "odc:product": product_name, "start_datetime": start_datetime, "end_datetime": end_datetime, }, ) item.set_self_href(out_stac) # Manually redo the asset del item.assets["asset"] item.assets["rainfall"] = pystac.Asset( href=out_data, title="CHIRPS-v2.0", media_type=pystac.MediaType.COG, roles=["data"], ) # Let's add a link to the source item.add_links([ pystac.Link( target=in_href, title="Source file", rel=pystac.RelType.DERIVED_FROM, media_type="application/gzip", ) ]) # Dump the data to S3 mem_dst.seek(0) log.info(f"Writing DATA to: {out_data}") s3_dump(mem_dst, out_data, ACL="bucket-owner-full-control") # Write STAC to S3 log.info(f"Writing STAC to: {out_stac}") s3_dump( json.dumps(item.to_dict(), indent=2), out_stac, ContentType="application/json", ACL="bucket-owner-full-control", ) # All done! log.info(f"Completed work on {in_file}") except Exception as e: message = f"Failed to handle {in_file} with error {e}" if slack_url is not None: send_slack_notification(slack_url, "Chirps Rainfall Monthly", message) log.exception(message) exit(1)
def write_stac(s3_destination: str, file_path: str, file_key: str, year: str, log: Logger) -> str: region_code = file_key.split("_")[0] stac_href = f"s3://{s3_destination}/{file_key}.stac-item.json" log.info(f"Creating STAC file in memory, targeting here: {stac_href}") if int(year) > 2010: hhpath = f"{file_key}_sl_HH_F02DAR.tif" hvpath = f"{file_key}_sl_HV_F02DAR.tif" lincipath = f"{file_key}_sl_linci_F02DAR.tif" maskpath = f"{file_key}_sl_mask_F02DAR.tif" datepath = f"{file_key}_sl_date_F02DAR.tif" launch_date = "2014-05-24" shortname = "alos" else: hhpath = f"{file_key}_sl_HH.tif" hvpath = f"{file_key}_sl_HV.tif" lincipath = f"{file_key}_sl_linci.tif" maskpath = f"{file_key}_sl_mask.tif" datepath = f"{file_key}_sl_date.tif" if int(year) > 2000: launch_date = "2006-01-24" shortname = "alos" else: launch_date = "1992-02-11" shortname = "jers" if shortname == "alos": product_name = "alos_palsar_mosaic" platform = "ALOS/ALOS-2" instrument = "PALSAR/PALSAR-2" cf = "83.0 dB" bandpaths = { "hh": hhpath, "hv": hvpath, "linci": lincipath, "mask": maskpath, "date": datepath, } else: product_name = "jers_sar_mosaic" platform = "JERS-1" instrument = "SAR" cf = "84.66 dB" bandpaths = { "hh": hhpath, "linci": lincipath, "mask": maskpath, "date": datepath, } properties = { "odc:product": product_name, "odc:region_code": region_code, "platform": platform, "instruments": [instrument], "cf": cf, "launchdate": launch_date, "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", } assets = {} for name, path in bandpaths.items(): href = f"s3://{s3_destination}/{path}" assets[name] = pystac.Asset(href=href, media_type=pystac.MediaType.COG, roles=["data"]) item = create_stac_item( file_path, id=str( odc_uuid(shortname, "1", [], year=year, tile=file_key.split("_")[0])), properties=properties, assets=assets, with_proj=True, ) item.set_self_href(stac_href) s3_dump( json.dumps(item.to_dict(), indent=2), item.self_href, ContentType="application/json", ACL="bucket-owner-full-control", ) log.info(f"STAC written to {item.self_href}")