Exemple #1
0
    def add_stac(self, tile):

        if not tile.poly:
            return None

        item = pystac.Item(
            tile.name, mapping(tile.poly), list(tile.poly.bounds),
            datetime.datetime.now(),
            {'description': 'A USGS Lidar pointcloud in Entwine/EPT format'})

        item.ext.enable(pystac.Extensions.POINTCLOUD)

        # icky
        s = tile.ept['schema']
        p = []
        for d in s:
            p.append(pystac.extensions.pointcloud.PointcloudSchema(d))

        item.ext.pointcloud.apply(tile.num_points,
                                  'lidar',
                                  'ept',
                                  p,
                                  epsg='EPSG:3857')

        asset = pystac.Asset(tile.url, 'entwine',
                             'The ept.json for accessing data')
        item.add_asset('ept.json', asset)

        item_link = pystac.Link('self',
                                f'{self.args.stac_base_url}{tile.name}.json')
        item_parent = pystac.Link('parent',
                                  f'{self.args.stac_base_url}catalog.json')
        item.add_links([item_link, item_parent])
        return item
Exemple #2
0
    def add_source(
        self,
        source_item: pystac.Item,
        title: Optional[str] = None,
        assets: Optional[List[str]] = None,
    ) -> None:
        """Adds a link to a source item.

        Args:
            source_item : Source imagery that the LabelItem applies to.
            title : Optional title for the link.
            assets : Optional list of assets that determine what
                assets in the source item this label item data applies to.
        """
        extra_fields = None
        if assets is not None:
            extra_fields = {"label:assets": assets}
        link = pystac.Link(
            "source",
            source_item,
            title=title,
            media_type=pystac.MediaType.JSON,
            extra_fields=extra_fields,
        )
        self.obj.add_link(link)
Exemple #3
0
def collect_items(sar_catalog, usfimr_collection):
    images = []
    labels_collection = pystac.Collection(
        "labels", "labels", usfimr_collection.extent
    )
    labels_collection = pystac.Collection(
        "usfimr_sar_labels", "usfimr_sar_labels", usfimr_collection.extent
    )

    for flood_id in ["1", "2", "3", "15", "16"]:
        usfimr_item = usfimr_collection.get_item(flood_id)
        usfimr_geojson_asset = usfimr_item.assets["geojson"]
        usfimr_geojson_asset.set_owner(usfimr_item)
        usfimr_item_clone = usfimr_item.clone()
        # Reduce item assets to just the geojson as labels
        usfimr_item_clone.assets = {"labels": usfimr_item.assets["geojson"]}
        labels_collection.add_item(usfimr_item_clone)

        for sar_item in sar_catalog.get_child(flood_id).get_items():
            sar_item_clone = sar_item.clone()
            sar_item_clone.links.append(
                pystac.Link(
                    "labels",
                    target=usfimr_item_clone,
                    media_type="application/geo+json",
                    link_type=pystac.LinkType.RELATIVE,
                ).set_owner(sar_item_clone)
            )
            images.append(sar_item_clone)

    return images, labels_collection
Exemple #4
0
    def test_minimal(self) -> None:
        rel = "my rel"
        target = "https://example.com/a/b"
        link = pystac.Link(rel, target)
        self.assertEqual(target, link.get_href())
        self.assertEqual(target, link.get_absolute_href())

        expected_repr = f"<Link rel={rel} target={target}>"
        self.assertEqual(expected_repr, link.__repr__())

        self.assertFalse(link.is_resolved())

        expected_dict = {"rel": rel, "href": target}
        self.assertEqual(expected_dict, link.to_dict())

        # Run the same tests on the clone.
        clone = link.clone()
        self.assertNotEqual(link, clone)

        self.assertEqual(target, clone.get_href())
        self.assertEqual(target, clone.get_absolute_href())

        self.assertEqual(expected_repr, clone.__repr__())

        self.assertEqual(expected_dict, clone.to_dict())

        # Try the modification methods.
        self.assertIsNone(link.owner)
        link.set_owner(None)
        self.assertIsNone(link.owner)

        link.set_owner(self.item)
        self.assertEqual(self.item, link.owner)
Exemple #5
0
def create_and_upload_stac(cog_file: Path, s3_dst: str, year) -> Item:
    out_path = URL(f"{s3_dst}/{year}/")

    log.info("Item base creation")
    item = create_stac_item(
        str(cog_file),
        id=str(odc_uuid("gmw", "2.0", [cog_file.name.replace("tif", "")])),
        with_proj=True,
        input_datetime=datetime(int(year), 12, 31),
        properties={
            "odc:product": "gmw",
            "start_datetime": f"{year}-01-01T00:00:00Z",
            "end_datetime": f"{year}-12-31T23:59:59Z",
        },
    )

    log.info("links creation")
    item.set_self_href(str(out_path / f"gmw_{year}_stac-item.json"))
    item.add_links([
        pystac.Link(
            target=str(SOURCE_URL_PATH / FILE_NAME.format(year=year)),
            title="Source file",
            rel=pystac.RelType.DERIVED_FROM,
            media_type="application/zip",
        )
    ])

    out_data = out_path / cog_file.name
    # Remove asset created by create_stac_item and add our own
    del item.assets["asset"]
    item.assets["mangrove"] = pystac.Asset(
        href=str(out_data),
        title="gmw-v1.0",
        media_type=pystac.MediaType.COG,
        roles=["data"],
    )

    log.info(f"Item created {item.to_dict()}")
    log.info(f"Item validated {item.validate()}")

    log.info(f"Dump the data to S3 {str(cog_file)}")
    s3_dump(
        data=open(str(cog_file), "rb").read(),
        url=str(out_data),
        ACL="bucket-owner-full-control",
        ContentType="image/tiff",
    )
    log.info(f"File written to {out_data}")

    log.info("Write STAC to S3")
    s3_dump(
        data=json.dumps(item.to_dict(), indent=2),
        url=item.self_href,
        ACL="bucket-owner-full-control",
        ContentType="application/json",
    )
    log.info(f"STAC written to {item.self_href}")

    return item
Exemple #6
0
def info(args):

    keys = list(get_resources(args.bucket, delimiter='/'))
    logger.debug('Querying boundaries for %d keys' % (len(keys)))

    queue = Process()


    catalog = pystac.Catalog('3dep',
                         'A catalog of USGS 3DEP Lidar hosted on AWS s3.',
                         href=f'{args.stac_base_url}catalog.json',
                         stac_extensions=['POINTCLOUD'])

    base = Path(args.stac_directory)
    base.mkdir(exist_ok=True, parents=True)

    count = 0
    for k in keys:

        if count == args.limit and count != 0:
            break

        t = Task(args.bucket, k, args.resolution)
        queue.put(t)

#        logger.debug(t)

        count += 1

    queue.do(count=20)

    l = Layer(args)
    stac_items = []
    for r in queue.results:
        if not r.error:
            l.add(r)

            with open(base / f"{r.name}.json", 'w') as f:
                d = l.add_stac(r).to_dict()
                json.dump(d, f)

            link = pystac.Link('item', f'{args.stac_base_url}{r.name}.json')
            catalog.add_link(link)


    errors = []
    for r in queue.results:
        if r.error:
            errors.append(r.error)

    f = open('errors.json','wb')
    f.write(json.dumps(errors).encode('utf-8'))
    f.close()


    with open(base / "catalog.json", 'w') as f:
        json.dump(catalog.to_dict(), f)
Exemple #7
0
    def doi(self, v: Optional[str]) -> None:
        if DOI_PROP in self.properties:
            if v == self.properties[DOI_PROP]:
                return
            remove_link(self.obj.links, self.properties[DOI_PROP])

        if v is not None:
            self.properties[DOI_PROP] = v
            url = doi_to_url(v)
            self.obj.add_link(pystac.Link(ScientificRelType.CITE_AS, url))
Exemple #8
0
    def test_auto_title_not_found(self) -> None:
        extent = pystac.Extent.from_items([self.item])
        collection = pystac.Collection(
            id="my_collection",
            description="Test Collection",
            extent=extent,
        )
        link = pystac.Link("my rel", target=collection)

        self.assertEqual(None, link.title)
Exemple #9
0
    def test_serialize_link(self) -> None:
        href = "https://some-domain/path/to/item.json"
        title = "A Test Link"
        link = pystac.Link(pystac.RelType.SELF, href, pystac.MediaType.JSON,
                           title)
        link_dict = link.to_dict()

        self.assertEqual(str(link_dict["rel"]), "self")
        self.assertEqual(str(link_dict["type"]), "application/json")
        self.assertEqual(link_dict["title"], title)
        self.assertEqual(link_dict["href"], href)
Exemple #10
0
    def test_auto_title_is_serialized(self) -> None:
        extent = pystac.Extent.from_items([self.item])
        collection = pystac.Collection(
            id="my_collection",
            description="Test Collection",
            extent=extent,
            title="Collection Title",
        )
        link = pystac.Link("my rel", target=collection)

        assert link.to_dict().get("title") == collection.title
Exemple #11
0
    def test_auto_title_when_resolved(self) -> None:
        extent = pystac.Extent.from_items([self.item])
        collection = pystac.Collection(
            id="my_collection",
            description="Test Collection",
            extent=extent,
            title="Collection Title",
        )
        link = pystac.Link("my rel", target=collection)

        self.assertEqual(collection.title, link.title)
Exemple #12
0
    def test_target_getter_setter(self) -> None:
        link = pystac.Link("my rel", target="./foo/bar.json")
        self.assertEqual(link.target, "./foo/bar.json")
        self.assertEqual(link.get_target_str(), "./foo/bar.json")

        link.target = self.item
        self.assertEqual(link.target, self.item)
        self.assertEqual(link.get_target_str(), self.item.get_self_href())

        link.target = "./bar/foo.json"
        self.assertEqual(link.target, "./bar/foo.json")
Exemple #13
0
    def test_title_as_init_argument(self) -> None:
        link_title = "Link title"
        extent = pystac.Extent.from_items([self.item])
        collection = pystac.Collection(
            id="my_collection",
            description="Test Collection",
            extent=extent,
            title="Collection Title",
        )
        link = pystac.Link("my rel", title=link_title, target=collection)

        assert link.title == link_title
        assert link.to_dict().get("title") == link_title
Exemple #14
0
    def test_minimal(self):
        rel = 'my rel'
        target = 'https://example.com/a/b'
        link = pystac.Link(rel, target)
        self.assertEqual(target, link.get_href())
        self.assertEqual(target, link.get_absolute_href())

        expected_repr = f'<Link rel={rel} target={target}>'
        self.assertEqual(expected_repr, link.__repr__())

        self.assertFalse(link.is_resolved())

        expected_dict = {'rel': rel, 'href': target}
        self.assertEqual(expected_dict, link.to_dict())

        # Run the same tests on the clone.
        clone = link.clone()
        self.assertNotEqual(link, clone)

        self.assertEqual(target, clone.get_href())
        self.assertEqual(target, clone.get_absolute_href())

        self.assertEqual(expected_repr, clone.__repr__())

        self.assertEqual(expected_dict, clone.to_dict())

        # Try the modification methods.
        self.assertIsNone(link.owner)
        link.set_owner(1)  # A junk value.
        self.assertEqual(1, link.owner)
        link.set_owner(None)
        self.assertIsNone(link.owner)

        self.assertEqual(pystac.LinkType.ABSOLUTE, link.link_type)

        link.make_absolute()
        self.assertEqual(pystac.LinkType.ABSOLUTE, link.link_type)
        self.assertEqual(target, link.get_href())
        self.assertEqual(target, link.get_absolute_href())

        link.make_relative()
        self.assertEqual(pystac.LinkType.RELATIVE, link.link_type)
        self.assertEqual(target, link.get_href())
        self.assertEqual(target, link.get_absolute_href())

        link.set_owner(self.item)
        self.assertEqual(self.item, link.owner)
Exemple #15
0
 def test_relative(self) -> None:
     rel = "my rel"
     target = "../elsewhere"
     mime_type = "example/stac_thing"
     link = pystac.Link(rel,
                        target,
                        mime_type,
                        "a title",
                        extra_fields={"a": "b"})
     expected_dict = {
         "rel": rel,
         "href": target,
         "type": "example/stac_thing",
         "title": "a title",
         "a": "b",
     }
     self.assertEqual(expected_dict, link.to_dict())
Exemple #16
0
    def test_relative(self):
        rel = 'my rel'
        target = '../elsewhere'
        mime_type = 'example/stac_thing'
        link = pystac.Link(rel,
                           target,
                           mime_type,
                           'a title',
                           properties={'a': 'b'},
                           link_type=pystac.LinkType.RELATIVE)
        expected_dict = {
            'rel': rel,
            'href': target,
            'type': 'example/stac_thing',
            'title': 'a title',
            'a': 'b'
        }
        self.assertEqual(expected_dict, link.to_dict())

        self.assertEqual(pystac.LinkType.RELATIVE, link.link_type)
Exemple #17
0
    def test_no_auto_title_if_not_resolved(self) -> None:
        link = pystac.Link(
            "my rel", target="https://www.some-domain.com/path/to/thing.txt")

        assert link.title is None
Exemple #18
0
 def add_link(self, target: pystac.STACObject) -> None:
     if self.obj is not None:
         self.obj.add_link(pystac.Link(TEST_LINK_REL, target))
     else:
         raise pystac.ExtensionAlreadyExistsError(
             f"{self} does not support links")
Exemple #19
0
 def test_get_target_str_no_href(self) -> None:
     self.item.remove_links("self")
     link = pystac.Link("self", target=self.item)
     self.item.add_link(link)
     self.assertIsNone(link.get_target_str())
Exemple #20
0
def download_cci_lc(year: str,
                    s3_dst: str,
                    workdir: str,
                    overwrite: bool = False):
    log = setup_logging()
    assets = {}

    cci_lc_version = get_version_from_year(year)
    name = f"{PRODUCT_NAME}_{year}_{cci_lc_version}"

    out_cog = URL(s3_dst) / year / f"{name}.tif"
    out_stac = URL(s3_dst) / year / f"{name}.stac-item.json"

    if s3_head_object(str(out_stac)) is not None and not overwrite:
        log.info(f"{out_stac} exists, skipping")
        return

    workdir = Path(workdir)
    if not workdir.exists():
        workdir.mkdir(parents=True, exist_ok=True)

    # Create a temporary directory to work with
    tmpdir = mkdtemp(prefix=str(f"{workdir}/"))
    log.info(f"Working on {year} in the path {tmpdir}")

    if s3_head_object(str(out_cog)) is None or overwrite:
        log.info(f"Downloading {year}")
        try:
            local_file = Path(tmpdir) / f"{name}.zip"
            if not local_file.exists():
                # Download the file
                c = cdsapi.Client()

                # We could also retrieve the object metadata from the CDS.
                # e.g. f = c.retrieve("series",{params}) | f.location = URL to download
                c.retrieve(
                    "satellite-land-cover",
                    {
                        "format": "zip",
                        "variable": "all",
                        "version": cci_lc_version,
                        "year": str(year),
                    },
                    local_file,
                )

                log.info(f"Downloaded file to {local_file}")
            else:
                log.info(
                    f"File {local_file} exists, continuing without downloading"
                )

            # Unzip the file
            log.info(f"Unzipping {local_file}")
            unzipped = None
            with zipfile.ZipFile(local_file, "r") as zip_ref:
                unzipped = local_file.parent / zip_ref.namelist()[0]
                zip_ref.extractall(tmpdir)

            # Process data
            ds = xr.open_dataset(unzipped)
            # Subset to Africa
            ulx, uly, lrx, lry = AFRICA_BBOX
            # Note: lats are upside down!
            ds_small = ds.sel(lat=slice(uly, lry), lon=slice(ulx, lrx))
            ds_small = assign_crs(ds_small, crs="epsg:4326")

            # Create cog (in memory - :mem: returns bytes object)
            mem_dst = write_cog(
                ds_small.lccs_class,
                ":mem:",
                nodata=0,
                overview_resampling="nearest",
            )

            # Write to s3
            s3_dump(mem_dst, str(out_cog), ACL="bucket-owner-full-control")
            log.info(f"File written to {out_cog}")

        except Exception:
            log.exception(f"Failed to process {name}")
            exit(1)
    else:
        log.info(f"{out_cog} exists, skipping")

    assets["classification"] = pystac.Asset(href=str(out_cog),
                                            roles=["data"],
                                            media_type=pystac.MediaType.COG)

    # Write STAC document
    source_doc = (
        "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover"
    )
    item = create_stac_item(
        str(out_cog),
        id=str(
            odc_uuid("Copernicus Land Cover", cci_lc_version,
                     [source_doc, name])),
        assets=assets,
        with_proj=True,
        properties={
            "odc:product": PRODUCT_NAME,
            "start_datetime": f"{year}-01-01T00:00:00Z",
            "end_datetime": f"{year}-12-31T23:59:59Z",
        },
    )
    item.add_links([
        pystac.Link(
            target=source_doc,
            title="Source",
            rel=pystac.RelType.DERIVED_FROM,
            media_type="text/html",
        )
    ])
    s3_dump(
        json.dumps(item.to_dict(), indent=2),
        str(out_stac),
        ContentType="application/json",
        ACL="bucket-owner-full-control",
    )
    log.info(f"STAC written to {out_stac}")
Exemple #21
0
def download_and_cog_chirps(
    year: str,
    month: str,
    s3_dst: str,
    day: str = None,
    overwrite: bool = False,
    slack_url: str = None,
):
    # Cleaning and sanity checks
    s3_dst = s3_dst.rstrip("/")

    # Set up file strings
    if day is not None:
        # Set up a daily process
        in_file = f"chirps-v2.0.{year}.{month}.{day}.tif.gz"
        in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file)
        in_data = f"/vsigzip//vsicurl/{in_href}"
        if not check_for_url_existence(in_href):
            log.warning("Couldn't find the gzipped file, trying the .tif")
            in_file = f"chirps-v2.0.{year}.{month}.{day}.tif"
            in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file)
            in_data = f"/vsicurl/{in_href}"

            if not check_for_url_existence(in_href):
                log.error("Couldn't find the .tif file either, aborting")
                sys.exit(1)

        file_base = f"{s3_dst}/{year}/{month}/chirps-v2.0_{year}.{month}.{day}"
        out_data = f"{file_base}.tif"
        out_stac = f"{file_base}.stac-item.json"

        start_datetime = f"{year}-{month}-{day}T00:00:00Z"
        end_datetime = f"{year}-{month}-{day}T23:59:59Z"
        product_name = "rainfall_chirps_daily"
    else:
        # Set up a monthly process
        in_file = f"chirps-v2.0.{year}.{month}.tif.gz"
        in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file)
        in_data = f"/vsigzip//vsicurl/{in_href}"
        if not check_for_url_existence(in_href):
            log.warning("Couldn't find the gzipped file, trying the .tif")
            in_file = f"chirps-v2.0.{year}.{month}.tif"
            in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file)
            in_data = f"/vsicurl/{in_href}"

            if not check_for_url_existence(in_href):
                log.error("Couldn't find the .tif file either, aborting")
                sys.exit(1)

        file_base = f"{s3_dst}/chirps-v2.0_{year}.{month}"
        out_data = f"{file_base}.tif"
        out_stac = f"{file_base}.stac-item.json"

        _, end = calendar.monthrange(int(year), int(month))
        start_datetime = f"{year}-{month}-01T00:00:00Z"
        end_datetime = f"{year}-{month}-{end}T23:59:59Z"
        product_name = "rainfall_chirps_monthly"

        # Set to 15 for the STAC metadata
        day = 15

    try:
        # Check if file already exists
        log.info(f"Working on {in_file}")
        if not overwrite and s3_head_object(out_stac) is not None:
            log.warning(f"File {out_stac} already exists. Skipping.")
            return

        # COG and STAC
        with MemoryFile() as mem_dst:
            # Creating the COG, with a memory cache and no download. Shiny.
            cog_translate(
                in_data,
                mem_dst.name,
                cog_profiles.get("deflate"),
                in_memory=True,
                nodata=-9999,
            )
            # Creating the STAC document with appropriate date range
            _, end = calendar.monthrange(int(year), int(month))
            item = create_stac_item(
                mem_dst,
                id=str(odc_uuid("chirps", "2.0", [in_file])),
                with_proj=True,
                input_datetime=datetime(int(year), int(month), int(day)),
                properties={
                    "odc:processing_datetime": datetime_to_str(datetime.now()),
                    "odc:product": product_name,
                    "start_datetime": start_datetime,
                    "end_datetime": end_datetime,
                },
            )
            item.set_self_href(out_stac)
            # Manually redo the asset
            del item.assets["asset"]
            item.assets["rainfall"] = pystac.Asset(
                href=out_data,
                title="CHIRPS-v2.0",
                media_type=pystac.MediaType.COG,
                roles=["data"],
            )
            # Let's add a link to the source
            item.add_links([
                pystac.Link(
                    target=in_href,
                    title="Source file",
                    rel=pystac.RelType.DERIVED_FROM,
                    media_type="application/gzip",
                )
            ])

            # Dump the data to S3
            mem_dst.seek(0)
            log.info(f"Writing DATA to: {out_data}")
            s3_dump(mem_dst, out_data, ACL="bucket-owner-full-control")
            # Write STAC to S3
            log.info(f"Writing STAC to: {out_stac}")
            s3_dump(
                json.dumps(item.to_dict(), indent=2),
                out_stac,
                ContentType="application/json",
                ACL="bucket-owner-full-control",
            )
            # All done!
            log.info(f"Completed work on {in_file}")

    except Exception as e:
        message = f"Failed to handle {in_file} with error {e}"

        if slack_url is not None:
            send_slack_notification(slack_url, "Chirps Rainfall Monthly",
                                    message)
        log.exception(message)

        exit(1)
Exemple #22
0
    def test_path_like(self) -> None:
        rel = "some-rel"
        target = os.path.abspath("../elsewhere")
        link = pystac.Link(rel, target)

        self.assertEqual(os.fspath(link), target)
Exemple #23
0
 def get_link(self) -> Optional[pystac.Link]:
     """Gets a :class:`~pystac.Link` for the DOI for this publication. If
     :attr:`Publication.doi` is ``None``, this method will also return ``None``."""
     if self.doi is None:
         return None
     return pystac.Link(ScientificRelType.CITE_AS, doi_to_url(self.doi))
Exemple #24
0
    def test_extend_invalid_object(self) -> None:
        link = pystac.Link("child",
                           "https://some-domain.com/some/path/to.json")

        with self.assertRaises(pystac.ExtensionTypeError):
            StorageExtension.ext(link)  # type: ignore
Exemple #25
0
    def render_metadata(
            self,
            ext: str = EXT_TIFF,
            processing_dt: Optional[datetime] = None) -> Dict[str, Any]:
        """
        Put together STAC metadata document for the output of this task.
        """
        if processing_dt is None:
            processing_dt = datetime.utcnow()

        product = self.product
        geobox = self.geobox
        region_code = product.region_code(self.tile_index)
        inputs = list(map(str, self._lineage()))

        properties: Dict[str, Any] = deepcopy(product.properties)

        properties["dtr:start_datetime"] = format_datetime(
            self.time_range.start)
        properties["dtr:end_datetime"] = format_datetime(self.time_range.end)
        properties["odc:processing_datetime"] = format_datetime(
            processing_dt, timespec="seconds")
        properties["odc:region_code"] = region_code
        properties["odc:product"] = product.name
        properties["odc:dataset_version"] = product.version

        geobox_wgs84 = geobox.extent.to_crs("epsg:4326",
                                            resolution=math.inf,
                                            wrapdateline=True)
        bbox = geobox_wgs84.boundingbox

        item = pystac.Item(
            id=str(self.uuid),
            geometry=geobox_wgs84.json,
            bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top],
            datetime=self.time_range.start.replace(tzinfo=timezone.utc),
            properties=properties,
            stac_extensions=["projection"],
        )

        item.ext.projection.epsg = geobox.crs.epsg
        # Lineage last
        item.properties["odc:lineage"] = dict(inputs=inputs)

        # Add all the assets
        for band, path in self.paths(ext=ext).items():
            asset = pystac.Asset(
                href=path,
                media_type="image/tiff; application=geotiff",
                roles=["data"],
                title=band,
            )
            item.add_asset(band, asset)

            item.ext.projection.set_transform(geobox.transform, asset=asset)
            item.ext.projection.set_shape(geobox.shape, asset=asset)

        # Add links
        item.links.append(
            pystac.Link(
                rel="product_overview",
                media_type="application/json",
                target=product.href,
            ))
        item.links.append(
            pystac.Link(
                rel="self",
                media_type="application/json",
                target=self.metadata_path("absolute", ext="json"),
            ))

        return item.to_dict()
Exemple #26
0
    def render_metadata(
        product: OutputProduct,
        geobox: GeoBox,
        tile_index: TileIdx_xy,
        time_range: DateTimeRange,
        uuid: UUID,
        paths: Dict[str, str],
        metadata_path: str,
        processing_dt: Optional[datetime] = None,
    ) -> Dict[str, Any]:
        """
        Put together STAC metadata document for the output from the task info.
        """
        if processing_dt is None:
            processing_dt = datetime.utcnow()

        region_code = product.region_code(tile_index)
        inputs: List[str] = []

        properties: Dict[str, Any] = deepcopy(product.properties)
        properties["dtr:start_datetime"] = format_datetime(time_range.start)
        properties["dtr:end_datetime"] = format_datetime(time_range.end)
        properties["odc:processing_datetime"] = format_datetime(
            processing_dt, timespec="seconds")
        properties["odc:region_code"] = region_code
        properties["odc:lineage"] = dict(inputs=inputs)
        properties["odc:product"] = product.name

        geobox_wgs84 = geobox.extent.to_crs("epsg:4326",
                                            resolution=math.inf,
                                            wrapdateline=True)
        bbox = geobox_wgs84.boundingbox

        item = pystac.Item(
            id=str(uuid),
            geometry=geobox_wgs84.json,
            bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top],
            datetime=time_range.start.replace(tzinfo=timezone.utc),
            properties=properties,
        )

        # Enable the Projection extension
        item.ext.enable("projection")
        item.ext.projection.epsg = geobox.crs.epsg

        # Add all the assets
        for band, path in paths.items():
            asset = pystac.Asset(
                href=path,
                media_type="image/tiff; application=geotiff",
                roles=["data"],
                title=band,
            )
            item.add_asset(band, asset)

            item.ext.projection.set_transform(geobox.transform, asset=asset)
            item.ext.projection.set_shape(geobox.shape, asset=asset)

        # Add links
        item.links.append(
            pystac.Link(
                rel="product_overview",
                media_type="application/json",
                target=product.href,
            ))
        item.links.append(
            pystac.Link(
                rel="self",
                media_type="application/json",
                target=metadata_path,
            ))

        return item.to_dict()
Exemple #27
0
def download_gls(year: str, s3_dst: str, workdir: Path, overwrite: bool = False):
    log = setup_logging()
    assets = {}
    out_stac = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}.stac-item.json"

    if s3_head_object(str(out_stac)) is not None and not overwrite:
        log.info(f"{out_stac} exists, skipping")
        return

    # Download the files
    for name, file in FILES.items():
        # Create a temporary directory to work with
        with TemporaryDirectory(prefix=workdir) as tmpdir:
            log.info(f"Working on {file}")
            url = URL(
                BASE_URL.format(
                    record_id=YEARS[year][1], year_key=YEARS[year][0], file=file
                )
            )

            dest_url = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}_{name}.tif"

            if s3_head_object(str(dest_url)) is None or overwrite:
                log.info(f"Downloading {url}")

                try:
                    local_file = Path(tmpdir) / str(url.name)
                    # Download the file
                    download_file(url, local_file)

                    log.info(f"Downloaded file to {local_file}")
                    local_file_small = translate_file_deafrica_extent(local_file)
                    log.info(f"Clipped Africa out and saved to {local_file_small}")
                    resampling = "nearest" if name in DO_NEAREST else "bilinear"

                    # Create a COG in memory and upload to S3
                    with MemoryFile() as mem_dst:
                        # Creating the COG, with a memory cache and no download. Shiny.
                        cog_translate(
                            local_file_small,
                            mem_dst.name,
                            cog_profiles.get("deflate"),
                            in_memory=True,
                            nodata=255,
                            overview_resampling=resampling,
                        )
                        mem_dst.seek(0)
                        s3_dump(mem_dst, str(dest_url), ACL="bucket-owner-full-control")
                        log.info(f"File written to {dest_url}")
                except Exception:
                    log.exception(f"Failed to process {url}")
                    exit(1)
            else:
                log.info(f"{dest_url} exists, skipping")

            assets[name] = pystac.Asset(
                href=str(dest_url), roles=["data"], media_type=pystac.MediaType.COG
            )

    # Write STAC document from the last-written file
    source_doc = f"https://zenodo.org/record/{YEARS[year][1]}"
    item = create_stac_item(
        str(dest_url),
        id=str(odc_uuid("Copernicus Global Land Cover", "3.0.1", [source_doc])),
        assets=assets,
        with_proj=True,
        properties={
            "odc:product": PRODUCT_NAME,
            "start_datetime": f"{year}-01-01T00:00:00Z",
            "end_datetime": f"{year}-12-31T23:59:59Z",
        },
    )
    item.add_links(
        [
            pystac.Link(
                target=source_doc,
                title="Source",
                rel=pystac.RelType.DERIVED_FROM,
                media_type="text/html",
            )
        ]
    )
    s3_dump(
        json.dumps(item.to_dict(), indent=2),
        str(out_stac),
        ContentType="application/json",
        ACL="bucket-owner-full-control",
    )
    log.info(f"STAC written to {out_stac}")
Exemple #28
0
def main():
    """ Pull Copernicus EU Rapid Mapping Activations data from the GeoRSS feed """
    sentinel_oauth_id = os.environ.get("SENTINELHUB_OAUTH_ID")
    sentinel_oauth_secret = os.environ.get("SENTINELHUB_OAUTH_SECRET")
    if sentinel_oauth_id is None:
        raise ValueError("Must set SENTINELHUB_OAUTH_ID")
    if sentinel_oauth_secret is None:
        raise ValueError("Must set SENTINELHUB_OAUTH_SECRET")

    events_xml_url = "https://emergency.copernicus.eu/mapping/activations-rapid/feed"
    events_xml_file = Path("./data/copernicus-rapid-mapping-activations.xml")
    if not events_xml_file.is_file():
        logger.info("Pulling {}...".format(events_xml_url))
        urlretrieve(events_xml_url, str(events_xml_file))

    event_xml_dir = Path("./data/event-xml")
    os.makedirs(event_xml_dir, exist_ok=True)

    # Generate a list of all unique CEMS products (combination of event, aoi,
    # monitoring type, revision and version) for all flood events in 2019 and 2020
    products = []
    events_root = ET.parse(events_xml_file).getroot()
    for event in events_root.iter("item"):
        category = event.find("category").text.strip().lower()
        if category != "flood":
            continue

        event_id = event.find("guid").text
        title = event.find("title").text
        rss_url = event.find("{http://www.iwg-sem.org/}activationRSS").text
        logger.info(title)

        description = event.find("description").text
        event_dts = re.findall(
            r"Date\/Time of Event \(UTC\):[</b>\s]*?(\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{2}:\d{2})",
            description,
            flags=re.MULTILINE,
        )
        if len(event_dts) != 1:
            logger.warning("{}: Available event date times {}".format(
                title, event_dts))
            raise AssertionError()
        event_datetime = datetime.strptime(
            event_dts[0], "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
        if event_datetime < datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc):
            continue

        event_country = event.find(
            "{http://www.iwg-sem.org/}activationAffectedCountries").text

        event_xml_file = Path(event_xml_dir, event_id).with_suffix(".xml")
        if not event_xml_file.is_file():
            logger.info("\tPulling {} GeoRSS: {}...".format(
                event_id, event_xml_file))
            urlretrieve(rss_url, event_xml_file)

        event_root = ET.parse(event_xml_file).getroot()

        for item in event_root.iter("item"):
            try:
                data_type = item.find("{http://www.gdacs.org/}cemsctype").text
            except AttributeError:
                data_type = ""
            try:
                product_type = item.find(
                    "{http://www.gdacs.org/}cemsptype").text
            except AttributeError:
                product_type = ""

            # Only care about downloading VECTOR data for Delineation product
            # More info at https://emergency.copernicus.eu/mapping/ems/rapid-mapping-portfolio
            if not (data_type == "VECTOR" and
                    (product_type == "DEL" or product_type == "GRA")):
                continue

            item_url = urlparse(item.find("link").text)
            _, _, product_id, version_id = item_url.path.lstrip("/").split("/")
            (
                product_event_id,
                aoi_id,
                product_type_id,
                monitoring_type,
                revision_id,
                data_type_id,
            ) = product_id.split("_")

            # Some sanity checks to ensure we've parsed our product id string correctly
            assert event_id == product_event_id
            assert product_type_id == product_type
            assert data_type_id == "VECTORS"

            georss_polygon = item.find(
                "{http://www.georss.org/georss}polygon").text
            # Split string, group number pairs, convert to float and swap pairs to lon first
            polygon = Polygon(
                map(
                    lambda x: (float(x[1]), float(x[0])),
                    grouper(georss_polygon.split(" "), 2),
                ))

            event_product = EventProduct(
                # Rebuild product_id from scratch because we need to include version
                "_".join([
                    event_id,
                    aoi_id,
                    product_type_id,
                    monitoring_type,
                    revision_id,
                    version_id,
                    data_type_id,
                ]),
                event_id,
                event_country,
                aoi_id,
                event_datetime.timestamp(),
                polygon,
                data_type_id,
                product_type_id,
                monitoring_type,
                revision_id,
                version_id,
                urlunparse(item_url),
            )
            products.append(event_product)

    df = gpd.GeoDataFrame(products)
    geojson_file = "./data/cems-rapid-mapping-flood-products-2019-2020.geojson"
    logger.info(
        "Writing GeoJSON of flood event products to {}".format(geojson_file))
    df.to_file(geojson_file, driver="GeoJSON")

    sentinel_session = get_session(sentinel_oauth_id, sentinel_oauth_secret)

    catalog = pystac.Catalog(
        "copernicus-rapid-mapping-floods-2019-2020",
        "Copernicus Rapid Mapping provisions geospatial information within hours or days from the activation in support of emergency management activities immediately following a disaster. Standardised mapping products are provided: e.g. to ascertain the situation before the event (reference product), to roughly identify and assess the most affected locations (first estimate product), assess the geographical extent of the event (delineation product) or to evaluate the intensity and scope of the damage resulting from the event (grading product). This catalog contains a subset of products for flood events from 2019-2020 that intersect with Sentinel 2 L2A Chips.",
        title="Copernicus Rapid Mapping Floods 2019-2020",
    )
    s2_collection = pystac.Collection(
        "Sentinel-2-L2A",
        "Sentinel 2 L2A images corresponding to CEMS rapid mapping floods",
        pystac.Extent(
            pystac.SpatialExtent([None, None, None, None]),
            pystac.TemporalExtent([(
                # TODO: Make this more specific by looping actual dts
                #       after ingest
                datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
                datetime(2020, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
            )]),
        ),
    )
    catalog.add_child(s2_collection)

    # Loop Products grouped by event id, lookup Sentinel 2 matches for each
    # Product, and create STAC Items in catalog for any matches
    sorted_products = sorted(products, key=lambda x: x.event_id)
    for event_id, event_products in groupby(sorted_products,
                                            key=lambda x: x.event_id):
        for p in event_products:
            event_datetime = datetime.fromtimestamp(p.event_time,
                                                    tz=timezone.utc)

            # Check for sentinel 2 results before anything else, so we
            # don't do unnecessary work. We'll use these results later
            # after we've created our STAC Item
            response = stac_search(
                p.geometry.bounds,
                "sentinel-2-l2a",
                event_datetime - timedelta(hours=12),
                event_datetime + timedelta(hours=12),
                sentinel_session,
            ).json()

            if len(response["features"]) < 1:
                logger.debug("No Sentinel 2 results for {}".format(
                    p.product_id))
                continue

            event_collection = catalog.get_child(event_id)
            if event_collection is None:
                event_collection = pystac.Collection(
                    event_id,
                    "",
                    pystac.Extent(
                        pystac.SpatialExtent([None, None, None, None]),
                        pystac.TemporalExtent([(event_datetime, None)]),
                    ),
                )
                catalog.add_child(event_collection)

            pystac_item = pystac.Item(
                p.product_id,
                mapping(p.geometry),
                p.geometry.bounds,
                event_datetime,
                properties={
                    "aoi_id": p.aoi_id,
                    "country": p.event_country,
                    "event_id": p.event_id,
                    "product_type": p.product_type,
                    "data_type": p.data_type,
                    "monitoring_type": p.monitoring_type,
                    "revision": p.revision,
                    "version": p.version,
                },
            )
            event_collection.add_item(pystac_item)
            url_link = pystac.Link("alternate",
                                   p.product_link,
                                   media_type="text/html")
            pystac_item.add_link(url_link)

            # Get or create Item in S2 collection for each match from
            # SentinelHub and add as links to our Product Item
            for feature in response["features"]:
                s2_item = s2_collection.get_item(feature["id"])
                if s2_item is None:
                    s2_item = pystac.Item.from_dict(feature)
                    s2_collection.add_item(s2_item)

                s2_link = pystac.Link(
                    "data", s2_item,
                    link_type=pystac.LinkType.RELATIVE).set_owner(pystac_item)
                pystac_item.add_link(s2_link)

            logger.info("Created STAC Item {} with {} Sentinel 2 links".format(
                p.product_id, len(response["features"])))

    # Set spatial extents
    for collection in catalog.get_children():
        if not isinstance(collection, pystac.Collection):
            continue
        bounds = GeometryCollection(
            [shape(s.geometry) for s in collection.get_all_items()]).bounds
        collection.extent.spatial = pystac.SpatialExtent(bounds)

    catalog_root = "./data/catalog"
    logger.info("Writing STAC Catalog to {}...".format(catalog_root))
    catalog.normalize_and_save(catalog_root, pystac.CatalogType.SELF_CONTAINED)
Exemple #29
0
 def test_resolve_stac_object_no_root_and_target_is_item(self) -> None:
     link = pystac.Link("my rel", target=self.item)
     link.resolve_stac_object()
Exemple #30
0
def create_stac_item(
        mtl_xml_href: str,
        read_href_modifier: Optional[ReadHrefModifier] = None) -> pystac.Item:
    """Creates a Landsat 8 C2 L2 STAC Item.

    Reads data from a single scene of
    Landsat Collection 2 Level-2 Surface Reflectance Product data.

    Uses the MTL XML HREF as the bases for other files; assumes that all
    files are co-located in a directory or blob prefix.
    """
    base_href = '_'.join(mtl_xml_href.split('_')[:-1])  # Remove the _MTL.txt

    mtl_metadata = MtlMetadata.from_file(mtl_xml_href, read_href_modifier)

    ang_href = ANG_ASSET_DEF.get_href(base_href)
    ang_metadata = AngMetadata.from_file(ang_href, read_href_modifier)

    scene_datetime = mtl_metadata.scene_datetime

    item = pystac.Item(id=mtl_metadata.scene_id,
                       bbox=mtl_metadata.bbox,
                       geometry=ang_metadata.get_scene_geometry(
                           mtl_metadata.bbox),
                       datetime=scene_datetime,
                       properties={})

    item.common_metadata.platform = L8_PLATFORM
    item.common_metadata.instruments = L8_INSTRUMENTS
    item.common_metadata.description = L8_ITEM_DESCRIPTION

    # eo
    item.ext.enable('eo')
    item.ext.eo.cloud_cover = mtl_metadata.cloud_cover

    # view
    item.ext.enable('view')
    item.ext.view.off_nadir = mtl_metadata.off_nadir
    item.ext.view.sun_elevation = mtl_metadata.sun_elevation
    # Sun Azimuth in landsat metadata is -180 to 180 from north, west being negative.
    # In STAC, it's 0 to 360 clockwise from north.
    sun_azimuth = mtl_metadata.sun_azimuth
    if sun_azimuth < 0.0:
        sun_azimuth = 360 + sun_azimuth
    item.ext.view.sun_azimuth = sun_azimuth

    # projection
    item.ext.enable('projection')
    item.ext.projection.epsg = mtl_metadata.epsg
    item.ext.projection.bbox = mtl_metadata.proj_bbox

    # landsat8
    item.stac_extensions.append(L8_EXTENSION_SCHEMA)
    item.properties.update(**mtl_metadata.additional_metadata)
    item.properties['landsat8:scene_id'] = ang_metadata.scene_id

    # -- Add assets

    # Add common assets
    for asset_definition in COMMON_ASSET_DEFS:
        asset_definition.add_asset(item, mtl_metadata, base_href)

    # Add SR assets
    for asset_definition in SR_ASSET_DEFS:
        asset_definition.add_asset(item, mtl_metadata, base_href)

    # Add thermal assets, if this is a L2SP product
    if mtl_metadata.processing_level == 'L2SP':
        for asset_definition in THERMAL_ASSET_DEFS:
            asset_definition.add_asset(item, mtl_metadata, base_href)

    # -- Add links

    usgs_item_page = (
        f"https://landsatlook.usgs.gov/stac-browser/collection02/level-2/standard/oli-tirs"
        f"/{scene_datetime.year}"
        f"/{mtl_metadata.wrs_path}/{mtl_metadata.wrs_row}"
        f"/{mtl_metadata.scene_id}")

    item.add_link(
        pystac.Link(rel="alternate",
                    target=usgs_item_page,
                    title="USGS stac-browser page",
                    media_type="text/html"))

    return item