Beispiel #1
0
    def test_validates_geojson_with_tuple_coordinates(self) -> None:
        """This unit tests guards against a bug where if a geometry
        dict has tuples instead of lists for the coordinate sequence,
        which can be produced by shapely, then the geometry still passses
        validation.
        """
        geom: Dict[str, Any] = {
            "type":
            "Polygon",
            # Last , is required to ensure tuple creation.
            "coordinates": ((
                (-115.305, 36.126),
                (-115.305, 36.128),
                (-115.307, 36.128),
                (-115.307, 36.126),
                (-115.305, 36.126),
            ), ),
        }

        item = pystac.Item(
            id="test-item",
            geometry=geom,
            bbox=[-115.308, 36.126, -115.305, 36.129],
            datetime=datetime.utcnow(),
            properties={},
        )

        # Should not raise.
        item.validate()
Beispiel #2
0
    def tostac(self):
        """ create a STAC item structure with whatever info we have """
        if self.source == 'NIC':
            if self.region == 'arctic':
                bplate = stac_templates.NIC_ARCTIC_STAC
            else:
                bplate = stac_templates.NIC_ANTARCTIC_STAC
        else:  # if not NIC then CIS
            bplate = CIS_STAC[self.region]

        self.stac = pystac.Item(id=self.name,
                                geometry=bplate['geometry'],
                                bbox=bplate['bbox'],
                                datetime=self.epoch,
                                properties=bplate['properties'],
                                stac_extensions=bplate['stac_extensions'])

        self.stac.properties['region'] = self.region
        if self.format == FMT_SHP:
            self.stac.add_asset(key='data',
                                asset=pystac.Asset(
                                    href=self.href,
                                    media_type='x-gis/x-shapefile'))
        elif self.format == FMT_E00:
            self.stac.add_asset(key='data',
                                asset=pystac.Asset(
                                    href=self.href,
                                    media_type='application/x-ogc-avce00'))
        else:
            self.stac.add_asset(key='data',
                                asset=pystac.Asset(href=self.href,
                                                   media_type='text/plain'))
Beispiel #3
0
    def add_stac(self, tile):

        if not tile.poly:
            return None

        item = pystac.Item(
            tile.name, mapping(tile.poly), list(tile.poly.bounds),
            datetime.datetime.now(),
            {'description': 'A USGS Lidar pointcloud in Entwine/EPT format'})

        item.ext.enable(pystac.Extensions.POINTCLOUD)

        # icky
        s = tile.ept['schema']
        p = []
        for d in s:
            p.append(pystac.extensions.pointcloud.PointcloudSchema(d))

        item.ext.pointcloud.apply(tile.num_points,
                                  'lidar',
                                  'ept',
                                  p,
                                  epsg='EPSG:3857')

        asset = pystac.Asset(tile.url, 'entwine',
                             'The ept.json for accessing data')
        item.add_asset('ept.json', asset)

        item_link = pystac.Link('self',
                                f'{self.args.stac_base_url}{tile.name}.json')
        item_parent = pystac.Link('parent',
                                  f'{self.args.stac_base_url}catalog.json')
        item.add_links([item_link, item_parent])
        return item
Beispiel #4
0
def cmr_to_item(cmrxml, endpoint, version):
    band1_file = f"{os.path.splitext(os.path.splitext(cmrxml)[0])[0]}.B01.tif"
    cmr = untangle.parse(cmrxml)
    granule = cmr.Granule
    item_id = granule.GranuleUR.cdata
    datetime_str = granule.Temporal.RangeDateTime.BeginningDateTime.cdata
    item_datetime = datetime.datetime.strptime(datetime_str,
                                               "%Y-%m-%dT%H:%M:%S.%fZ")

    item_geometry = get_geometry(granule)
    multi = shape(item_geometry)
    item_bbox = list(multi.bounds)
    item = pystac.Item(
        id=item_id,
        datetime=item_datetime,
        geometry=item_geometry,
        bbox=item_bbox,
        properties={},
    )

    process_common_metadata(item, granule)
    process_eo(item, granule)
    add_assets(item, granule, endpoint, version)
    process_projection(item, granule, band1_file)
    process_view_geometry(item, granule)
    process_scientific(item, granule)
    item.validate()
    feature = item.to_dict()
    return feature
Beispiel #5
0
    def test_templates_item_start_datetime(self) -> None:
        year = 2020
        month = 11
        day = 3
        date = "2020-11-03"
        dt = datetime(year, month, day, 18, 30)

        template = LayoutTemplate("${year}/${month}/${day}/${date}/item.json")

        item = pystac.Item(
            "test",
            geometry=ARBITRARY_GEOM,
            bbox=ARBITRARY_BBOX,
            datetime=None,
            properties={
                "start_datetime": dt.isoformat(),
                "end_datetime": (dt + timedelta(days=1)).isoformat(),
            },
        )

        parts = template.get_template_values(item)

        self.assertEqual(set(parts), set(["year", "month", "day", "date"]))

        self.assertEqual(parts["year"], year)
        self.assertEqual(parts["month"], month)
        self.assertEqual(parts["day"], day)
        self.assertEqual(parts["date"], date)

        path = template.substitute(item)
        self.assertEqual(path, "2020/11/3/2020-11-03/item.json")
Beispiel #6
0
    def test_nested_properties(self) -> None:
        dt = datetime(2020, 11, 3, 18, 30)

        template = LayoutTemplate(
            "${test.prop}/${ext:extra.test.prop}/item.json")

        item = pystac.Item(
            "test",
            geometry=ARBITRARY_GEOM,
            bbox=ARBITRARY_BBOX,
            datetime=dt,
            properties={"test": {
                "prop": 4326
            }},
            extra_fields={"ext:extra": {
                "test": {
                    "prop": 3857
                }
            }},
        )

        parts = template.get_template_values(item)

        self.assertEqual(set(parts), set(["test.prop", "ext:extra.test.prop"]))

        self.assertEqual(parts["test.prop"], 4326)
        self.assertEqual(parts["ext:extra.test.prop"], 3857)

        path = template.substitute(item)

        self.assertEqual(path, "4326/3857/item.json")
Beispiel #7
0
    def _create_item(self, product, product_id, output_name, ref_image):
        # Get common properties from B04

        # If file is not found, it may have been generated with old version where image format was not correclty managed
        if not os.path.exists(ref_image) and ref_image.endswith('.jp2'):
            ref_image = f"{ref_image[:-4]}.TIF"

        bbox, footprint = get_bbox_and_footprint(ref_image)

        # Create item
        eo_item = pystac.Item(id=product_id,
                              geometry=footprint,
                              bbox=bbox,
                              datetime=product.acqdate,
                              properties={},
                              href=os.path.normpath(output_name))

        eo_item.ext.enable(pystac.Extensions.EO)
        eo_item.ext.eo.apply(bands=self.s2_bands)
        eo_item.properties["Platform"] = product.sensor
        eo_item.properties["Instrument"] = product.mtl.sensor
        eo_item.properties["Sun azimuth"] = f"{float(product.mtl.sun_azimuth_angle):.3f}\u00b0"
        eo_item.properties["Sun elevation"] = f"{float(product.mtl.sun_zenith_angle):.3f}\u00b0"
        eo_item.properties["Processing level"] = ref_image.split('_')[0]
        eo_item.properties[
            "Cloud cover"] = f"{float(product.mtl.cloud_cover):.2}%" if product.mtl.cloud_cover is not None else None
        return eo_item
Beispiel #8
0
 def setUp(self) -> None:
     self.item = pystac.Item(
         id="test-item",
         geometry=None,
         bbox=None,
         datetime=TEST_DATETIME,
         properties={},
     )
Beispiel #9
0
def make_item() -> pystac.Item:
    """Create basic test items that are only slightly different."""
    asset_id = "an/asset"
    start = datetime.datetime(2018, 1, 2)
    item = pystac.Item(
        id=asset_id, geometry=None, bbox=None, datetime=start, properties={}
    )

    SatExtension.add_to(item)
    return item
Beispiel #10
0
def make_item() -> pystac.Item:
    asset_id = "my/items/2011"
    start = datetime.datetime(2020, 11, 7)
    item = pystac.Item(id=asset_id,
                       geometry=None,
                       bbox=None,
                       datetime=start,
                       properties={})

    SarExtension.add_to(item)
    return item
Beispiel #11
0
def make_item() -> pystac.Item:
    asset_id = 'my/items/2011'
    start = datetime.datetime(2020, 11, 7)
    item = pystac.Item(id=asset_id,
                       geometry=None,
                       bbox=None,
                       datetime=start,
                       properties={})

    item.ext.enable(pystac.Extensions.SAR)
    return item
Beispiel #12
0
def make_item() -> pystac.Item:
    asset_id = "USGS/GAP/CONUS/2011"
    start = datetime.datetime(2011, 1, 2)
    item = pystac.Item(id=asset_id,
                       geometry=None,
                       bbox=None,
                       datetime=start,
                       properties={})
    item.set_self_href(URL_TEMPLATE % 2011)

    ScientificExtension.add_to(item)
    return item
Beispiel #13
0
 def setUp(self) -> None:
     self.maxDiff = None
     self.collection = pystac.Collection("collection id",
                                         "desc",
                                         extent=ARBITRARY_EXTENT)
     self.item = pystac.Item(
         id="test-item",
         geometry=None,
         bbox=None,
         datetime=TEST_DATETIME,
         properties={},
     )
Beispiel #14
0
 def parse(cls, id: str, product_name_without_version: str,
           cmr_item: Dict) -> pystac.Item:
     geom = cls.parse_polygons(cmr_item)
     bbox = cls.geojson_polygon_to_bbox(geom['coordinates'])
     datetime = cls.parse_start_date(cmr_item)
     properties = cls.parse_properties(cmr_item)
     return pystac.Item(id,
                        geom,
                        bbox,
                        datetime,
                        properties,
                        collection=cls.form_stac_collection_id(
                            product_name_without_version.lower()))
Beispiel #15
0
def make_item(year: int) -> pystac.Item:
    """Create basic test items that are only slightly different."""
    asset_id = f'USGS/GAP/CONUS/{year}'
    start = datetime.datetime(year, 1, 2)

    item = pystac.Item(id=asset_id,
                       geometry=None,
                       bbox=None,
                       datetime=start,
                       properties={})
    item.set_self_href(URL_TEMPLATE % year)

    item.ext.enable(pystac.Extensions.VERSION)

    return item
Beispiel #16
0
    def test_substitute_with_colon_properties(self) -> None:
        dt = datetime(2020, 11, 3, 18, 30)

        template = LayoutTemplate("${ext:prop}/item.json")

        item = pystac.Item(
            "test",
            geometry=ARBITRARY_GEOM,
            bbox=ARBITRARY_BBOX,
            datetime=dt,
            properties={"ext:prop": 1},
        )

        path = template.substitute(item)

        self.assertEqual(path, "1/item.json")
Beispiel #17
0
def generate_stac_item(filename_tiff, cog_collection, planet_id, s3_uri):
    logger.info(f'Using gdalinfo to get metadata')
    filename_json = filename_tiff.replace('.tiff', '.json')
    os.system(f'gdalinfo -proj4 -json {filename_tiff} > {filename_json}')
    with open(filename_json, 'r') as f:
        data = json.load(f)

    logger.info(f'Organizing metadata')
    tifftag_datetime = data.get('metadata').get('').get('TIFFTAG_DATETIME')
    year, month, day = [
        int(n) for n in tifftag_datetime.split(' ')[0].split(':')
    ]
    dt = datetime(year, month, day, tzinfo=timezone.utc)
    polygon = data.get('wgs84Extent')
    coords = polygon.get('coordinates')
    crs = CRS.from_string(data.get('coordinateSystem').get('proj4'))
    while len(coords) == 1:
        coords = coords[0]
    ys = [y for (y, x) in coords]
    xs = [x for (y, x) in coords]
    bbox = [min(ys), min(xs), max(ys), max(xs)]
    props = {
        'eo:bands': cog_collection.properties['eo:bands'],
        'hsi:wavelength_min': cog_collection.properties['hsi:wavelength_min'],
        'hsi:wavelength_min': cog_collection.properties['hsi:wavelength_min'],
        'proj:epsg': crs.to_authority()[-1],
    }

    logger.info(f'Creating new cog item')
    cog_item = pystac.Item(planet_id,
                           polygon,
                           bbox,
                           dt,
                           props,
                           stac_extensions=COG_ITEM_EXTENSIONS,
                           collection=cog_collection.id)
    cog_item.add_asset(
        'tiff_0',
        pystac.Asset(s3_uri, media_type=pystac.MediaType.COG, roles=['data']))

    return cog_item
Beispiel #18
0
    def test_validates_geojson_with_tuple_coordinates(self):
        """This unit tests guards against a bug where if a geometry
        dict has tuples instead of lists for the coordinate sequence,
        which can be produced by shapely, then the geometry still passses
        validation.
        """
        geom = {
            'type':
            'Polygon',
            # Last , is required to ensure tuple creation.
            'coordinates':
            (((-115.305, 36.126), (-115.305, 36.128), (-115.307, 36.128),
              (-115.307, 36.126), (-115.305, 36.126)), )
        }

        item = pystac.Item(id='test-item',
                           geometry=geom,
                           bbox=[-115.308, 36.126, -115.305, 36.129],
                           datetime=datetime.utcnow(),
                           properties={})

        self.assertIsNone(item.validate())
Beispiel #19
0
    def create_stac(self) -> pystac.Item:
        stac = pystac.Item(
            id=self.id,
            geometry=None,
            bbox=None,
            datetime=datetime.now(),
            properties=self.properties,
            stac_extensions=list(self.stac_extensions),
        )
        existing_asset_hrefs = {}
        for asset in self.assets:
            if not asset.needs_upload:
                continue

            asset.href = f"./{self.collection.title}/{self.id}{asset.file_ext()}"
            if asset.href in existing_asset_hrefs:
                raise Exception(f"{asset.href} already exists.")

            stac.add_asset(
                key=(asset.get_content_type()
                     if asset.get_content_type() else asset.file_ext()),
                asset=asset.create_stac())
            existing_asset_hrefs[asset.href] = asset
        return stac
Beispiel #20
0
def create_item(metadata_href):
    """Creates a STAC Item from CORINE data.
    Args:
        metadata_href (str): The href to the metadata for this tif.
    This function will read the metadata file for information to place in
    the STAC item.
    Returns:
        pystac.Item: A STAC Item representing this CORINE Land Cover.
    """

    metadata_root = ET.parse(metadata_href).getroot()

    # Item id
    image_name_node = 'Esri/DataProperties/itemProps/itemName'
    image_name = metadata_root.find(image_name_node).text
    item_id = os.path.splitext(image_name)[0]

    # Bounding box
    bounding_box_node = 'dataIdInfo/dataExt/geoEle/GeoBndBox/{}'
    west_long = float(
        metadata_root.find(bounding_box_node.format('westBL')).text)
    east_long = float(
        metadata_root.find(bounding_box_node.format('eastBL')).text)
    south_lat = float(
        metadata_root.find(bounding_box_node.format('southBL')).text)
    north_lat = float(
        metadata_root.find(bounding_box_node.format('northBL')).text)

    geom = mapping(box(west_long, south_lat, east_long, north_lat))
    bounds = shape(geom).bounds

    # EPSG
    epsg_element = 'refSysInfo/RefSystem/refSysID/identCode'
    epsg = int(
        metadata_root.find(epsg_element).attrib['code'].replace('EPSG:', ''))

    # Item date
    id_dt_node = 'dataIdInfo/idCitation/date/pubDate'
    id_dt_text = metadata_root.find(id_dt_node).text
    id_dt = str_to_datetime(id_dt_text)

    # Title
    title_node = 'dataIdInfo/idCitation/resTitle'
    title_text = metadata_root.find(title_node).text

    item = pystac.Item(id=item_id,
                       geometry=geom,
                       bbox=bounds,
                       datetime=id_dt,
                       properties={'corine:title': title_text})

    # Common metadata
    item.common_metadata.providers = [COPERNICUS_PROVIDER]

    # proj
    item.ext.enable('projection')
    item.ext.projection.epsg = epsg

    # Tif
    item.add_asset(
        ITEM_TIF_IMAGE_NAME,
        pystac.Asset(href=image_name,
                     media_type=pystac.MediaType.TIFF,
                     roles=['data'],
                     title="tif image"))

    # Metadata
    item.add_asset(
        ITEM_METADATA_NAME,
        pystac.Asset(href=metadata_href,
                     media_type=pystac.MediaType.TEXT,
                     roles=['metadata'],
                     title='FGDC Metdata'))

    return item
Beispiel #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--pipeline-uri",
                        type=str,
                        help="A URI to JSON with instructions")
    parser.add_argument("--pipeline", type=str, help="JSON with instructions")
    parser.add_argument(
        "--aviris-stac-id",
        type=str,
        help="STAC Item ID to process from the STAC collection")
    parser.add_argument(
        "--aviris-collection-id",
        type=str,
        default=AVIRIS_ARCHIVE_COLLECTION_ID,
    )
    parser.add_argument(
        "--stac-api-uri",
        type=str,
        default=os.environ.get("STAC_API_URI", "http://franklin:9090"),
    )
    parser.add_argument("--s3-bucket",
                        type=str,
                        default=os.environ.get("S3_BUCKET", "aviris-data"))
    parser.add_argument(
        "--s3-prefix",
        type=str,
        default=os.environ.get("S3_PREFIX"),
    )
    parser.add_argument("--temp-dir",
                        type=str,
                        default=os.environ.get("TEMP_DIR", None))
    parser.add_argument("--output-format",
                        type=str,
                        default=os.environ.get("GDAL_OUTPUT_FORMAT", "COG"))
    parser.add_argument(
        "--keep-temp-dir",
        action="store_true",
        help=
        "If provided, script does not delete temporary directory before script exits. Useful for debugging.",
    )
    parser.add_argument(
        "--skip-large",
        action="store_true",
        help=
        "If provided, script will not process any COG > 200 MB to keep processing times reasonable. Useful for debugging.",
    )
    parser.add_argument(
        "--force",
        action="store_true",
        help=
        "If provided, force reingest StacItem even though this it is already present in the catalog.",
    )
    parser.add_argument(
        "--l2",
        action="store_true",
        help="If provided, use L2 imagery instead of L1.",
    )

    try:
        warpMemoryLimit = int(os.environ.get("GDAL_WARP_MEMORY_LIMIT", None))
    except TypeError:
        warpMemoryLimit = None

    # TODO: replace it with parser.parse_args() later
    cli_args, cli_unknown = parser.parse_known_args()

    # parse all cli arguments
    args = CliConfig(cli_args, cli_unknown)

    s3 = boto3.client("s3")
    stac_client = STACClient(args.stac_api_uri)

    cog_collection = get_aviris_cog_collection(args.level)

    # GET STAC Item from AVIRIS Catalog
    item = stac_client.get_collection_item(args.aviris_collection_id,
                                           args.aviris_stac_id)

    asset_key = 'https_refl' if args.l2 else 'https'
    asset = item.assets.get(asset_key, None)
    if asset is None:
        raise ValueError(
            f'STAC Item {args.aviris_stac_id} from {args.stac_api_uri} has no asset "{asset_key}"!'
        )
    scene_name = item.properties.get("Name")

    # Create new COG STAC Item
    cog_item_id = "{}_{}_{}".format(
        cog_collection.id,
        item.properties.get("Name"),
        item.properties.get("Scene"),
    )

    item.properties['eo:bands'] = cog_collection.properties['eo:bands']
    item.properties['hsi:wavelength_min'] = cog_collection.properties[
        'hsi:wavelength_min']
    item.properties['hsi:wavelength_max'] = cog_collection.properties[
        'hsi:wavelength_max']
    item.properties.pop('layer:ids', None)

    cog_item = pystac.Item(
        cog_item_id,
        item.geometry,
        item.bbox,
        item.datetime,
        item.properties,
        stac_extensions=COG_ITEM_EXTENSIONS,
        collection=cog_collection.id,
    )

    # Create COG Collection if it doesn't exist
    if not stac_client.has_collection(cog_collection.id):
        stac_client.post_collection(cog_collection)

    if not args.force:
        # Exit early if COG STAC Item already exists
        try:
            stac_client.get_collection_item(cog_collection.id, cog_item_id)
            print(cog_collection.id)
            print(cog_item_id)
            logger.info(f'STAC Item {cog_item_id} already exists. Exiting.')
            activation_output(cog_item_id, cog_collection.id)
            return
        except requests.exceptions.HTTPError:
            pass

    # Create tmpdir
    temp_dir = Path(args.temp_dir if args.temp_dir is not None else mkdtemp())
    temp_dir.mkdir(parents=True, exist_ok=True)
    try:
        # Retrieve AVIRIS GZIP for matching scene name
        local_archive = Path(temp_dir, Path(asset.href).name)
        if local_archive.exists():
            logger.info(f'Using existing archive: {local_archive}')
        else:
            logger.info(f'Downloading {asset.href} archive {local_archive}...')
            gzip_https_url = asset.href
            with DownloadProgressBar(unit='B',
                                     unit_scale=True,
                                     miniters=1,
                                     desc=gzip_https_url.split('/')[-1]) as t:
                urllib.request.urlretrieve(gzip_https_url,
                                           filename=local_archive,
                                           reporthook=t.update_to)

        # Retrieve file names from archive and extract if not already extracted to temp_dir
        extract_path = Path(temp_dir, f'{scene_name}_{args.level}')
        with tarfile.open(local_archive, mode="r") as tar_gz_fp:
            logger.info(f'Retrieving filenames from {local_archive}')
            with timing("Query archive"):
                tar_files = tar_gz_fp.getnames()
            logger.info(f"Files: {tar_files}")

            if extract_path.exists():
                logger.info(f'Skipping extract, exists at {extract_path}')
            else:
                logger.info(f"Extracting {local_archive} to {extract_path}")
                with timing("Extract"):
                    tar_gz_fp.extractall(extract_path)

        # Find HDR data files in unzipped package
        hdr_ext = '.hdr' if args.l2 else 'ort_img.hdr'
        hdr_files = [tf for tf in tar_files if tf.endswith(hdr_ext)]
        logger.info("HDR Files: {}".format(hdr_files))
        for idx, hdr_file_w_ext in enumerate(hdr_files):
            hdr_file_w_ext_path = Path(hdr_file_w_ext)
            hdr_path = Path(extract_path, hdr_file_w_ext_path.with_suffix(""))
            cog_path = Path(
                f'{hdr_path.with_suffix("")}_{args.output_asset_name}.tiff')

            if args.skip_large and os.path.getsize(hdr_path) > 0.2 * GB:
                file_mb = floor(os.path.getsize(hdr_path) / 1024 / 1024)
                logger.info(
                    "--skip-large provided. Skipping {} with size {}mb".format(
                        hdr_path, file_mb))
                continue

            # Convert HDR data to pixel interleaved COG with GDAL
            # NUM_THREADS only speeds up compression and overview generation
            # gdal.Warp is used to fix rasters rotation
            # NOTE:
            # We can't directly write TIFFs on S3 as the result of the gdal.Warp operation
            # see: https://github.com/OSGeo/gdal/issues/1189
            warp_opts = gdal.WarpOptions(callback=warp_callback,
                                         warpOptions=["NUM_THREADS=ALL_CPUS"],
                                         creationOptions=[
                                             "NUM_THREADS=ALL_CPUS",
                                             "COMPRESS=DEFLATE", "BIGTIFF=YES",
                                             "TILED=YES"
                                         ],
                                         multithread=True,
                                         warpMemoryLimit=warpMemoryLimit,
                                         format=args.output_format)
            logger.info(f"Converting {hdr_path} to {cog_path}...")
            with timing("GDAL Warp"):
                gdal.Warp(str(cog_path), str(hdr_path), options=warp_opts)

            # read metadata from the transformed TIFF
            cog_ds = gdal.Open(str(cog_path))
            cog_proj = osr.SpatialReference(wkt=cog_ds.GetProjection())
            cog_proj.AutoIdentifyEPSG()

            # set projection
            cog_item.properties['proj:epsg'] = int(
                cog_proj.GetAttrValue('AUTHORITY', 1))

            # Upload  COG and metadata, if written, to S3 bucket + key
            key = Path(
                args.s3_prefix,
                str(item.properties.get("Year")),
                str(item.properties.get("Name")),
                cog_path.name,
            )
            s3_uri = f's3://{args.s3_bucket}/{key}'
            logger.info(f"Uploading {cog_path} to {s3_uri}")
            s3.upload_file(
                str(cog_path),
                args.s3_bucket,
                str(key),
                Callback=ProgressPercentage(str(cog_path)),
                Config=TransferConfig(multipart_threshold=1 * GB),
            )
            cog_metadata_path = cog_path.with_suffix(".tiff.aux.xml")
            if cog_metadata_path.exists():
                metadata_key = Path(args.s3_prefix, cog_metadata_path.name)
                metadata_s3_uri = f's3://{args.s3_bucket}/{metadata_key}'
                logger.info(
                    f'Uploading {cog_metadata_path} to {metadata_s3_uri}')
                s3.upload_file(str(cog_metadata_path), args.s3_bucket,
                               str(metadata_key))

            # Add assets to COG STAC Item
            cog_item.add_asset(
                f'{args.output_asset_name}_{idx}',
                pystac.Asset(s3_uri,
                             media_type=pystac.MediaType.COG,
                             roles=["data"]),
            )
            if cog_metadata_path.exists():
                cog_item.add_asset(
                    f'metadata_{idx}',
                    pystac.Asset(
                        metadata_s3_uri,
                        media_type=pystac.MediaType.XML,
                        roles=["metadata"],
                    ),
                )
    finally:
        if not args.keep_temp_dir:
            logger.info(f"Removing temp dir: {temp_dir}")
            shutil.rmtree(temp_dir, ignore_errors=True)

    # Add COG Item to AVIRIS L2 STAC Collection
    logger.info(f"POST Item {cog_item.id} to {args.stac_api_uri}")
    item_data = stac_client.post_collection_item(cog_collection.id, cog_item)
    if item_data.get('id', None):
        logger.info(f"Success: {item_data['id']}")
        activation_output(item_data['id'], cog_collection.id)
    else:
        logger.error(f"Failure: {item_data}")
        return -1
Beispiel #22
0
    def render_metadata(
        product: OutputProduct,
        geobox: GeoBox,
        tile_index: TileIdx_xy,
        time_range: DateTimeRange,
        uuid: UUID,
        paths: Dict[str, str],
        metadata_path: str,
        processing_dt: Optional[datetime] = None,
    ) -> Dict[str, Any]:
        """
        Put together STAC metadata document for the output from the task info.
        """
        if processing_dt is None:
            processing_dt = datetime.utcnow()

        region_code = product.region_code(tile_index)
        inputs: List[str] = []

        properties: Dict[str, Any] = deepcopy(product.properties)
        properties["dtr:start_datetime"] = format_datetime(time_range.start)
        properties["dtr:end_datetime"] = format_datetime(time_range.end)
        properties["odc:processing_datetime"] = format_datetime(
            processing_dt, timespec="seconds")
        properties["odc:region_code"] = region_code
        properties["odc:lineage"] = dict(inputs=inputs)
        properties["odc:product"] = product.name

        geobox_wgs84 = geobox.extent.to_crs("epsg:4326",
                                            resolution=math.inf,
                                            wrapdateline=True)
        bbox = geobox_wgs84.boundingbox

        item = pystac.Item(
            id=str(uuid),
            geometry=geobox_wgs84.json,
            bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top],
            datetime=time_range.start.replace(tzinfo=timezone.utc),
            properties=properties,
        )

        # Enable the Projection extension
        item.ext.enable("projection")
        item.ext.projection.epsg = geobox.crs.epsg

        # Add all the assets
        for band, path in paths.items():
            asset = pystac.Asset(
                href=path,
                media_type="image/tiff; application=geotiff",
                roles=["data"],
                title=band,
            )
            item.add_asset(band, asset)

            item.ext.projection.set_transform(geobox.transform, asset=asset)
            item.ext.projection.set_shape(geobox.shape, asset=asset)

        # Add links
        item.links.append(
            pystac.Link(
                rel="product_overview",
                media_type="application/json",
                target=product.href,
            ))
        item.links.append(
            pystac.Link(
                rel="self",
                media_type="application/json",
                target=metadata_path,
            ))

        return item.to_dict()
Beispiel #23
0
    def to_representation(self, instance: models.RasterMeta) -> dict:
        item = pystac.Item(
            id=instance.pk,
            geometry=json.loads(instance.footprint.json),
            bbox=instance.extent,
            datetime=(instance.acquisition_date or instance.modified
                      or instance.created),
            properties=dict(
                datetime=str(instance.acquisition_date),
                platform=instance.instrumentation,
            ),
        )
        # 'proj' extension
        item.ext.enable('projection')
        item.ext.projection.apply(
            epsg=CRS.from_proj4(instance.crs).to_epsg(),
            transform=instance.transform,
        )
        # 'eo' extension
        item.ext.enable('eo')
        item.ext.eo.apply(cloud_cover=instance.cloud_cover, bands=[])
        # Add assets
        for image in instance.parent_raster.image_set.images.all():
            if image.file.type != FileSourceType.URL:
                # TODO: we need fix this
                raise ValueError(
                    'Files must point to valid URL resources, not internal storage.'
                )
            bands = []
            for bandmeta in image.bandmeta_set.filter(
                    band_range__contained_by=(None, None)):
                band = pystac.extensions.eo.Band.create(
                    name=f'B{bandmeta.band_number}',
                    description=bandmeta.description,
                )
                # The wavelength statistics is described by either the
                # common_name or via center_wavelength and full_width_half_max.
                # We can derive our bandmeta.band_range.lower,
                # bandmeta.band_range.upper from the center_wavelength
                # and full_width_half_max.
                if (
                        bandmeta.band_range.lower,
                        bandmeta.band_range.upper,
                ) in BAND_RANGE_BY_COMMON_NAMES.inverse:
                    band.common_name = BAND_RANGE_BY_COMMON_NAMES.inverse[(
                        bandmeta.band_range.lower, bandmeta.band_range.upper)]
                else:
                    with decimal.localcontext(decimal.BasicContext):
                        band.center_wavelength = float(
                            (bandmeta.band_range.lower +
                             bandmeta.band_range.upper) / 2)
                        band.full_width_half_max = float(
                            bandmeta.band_range.upper -
                            bandmeta.band_range.lower)

                bands.append(band)
            asset = pystac.Asset(
                href=image.file.get_url(),
                title=image.file.name,
                roles=[
                    'data',
                ],
            )
            item.add_asset(f'image-{image.pk}', asset)
            item.ext.eo.set_bands(
                bands=bands or [
                    pystac.extensions.eo.Band.create(
                        name=image.file.name,
                        description=image.bandmeta_set.first().description,
                    )
                ],
                asset=asset,
            )

        for ancillary_file in instance.parent_raster.ancillary_files.all():
            asset = pystac.Asset(
                href=ancillary_file.get_url(),
                title=ancillary_file.name,
                roles=[
                    'metadata',
                ],
            )
            item.add_asset(f'ancillary-{ancillary_file.pk}', asset)

        return item.to_dict()
Beispiel #24
0
    def render_metadata(
            self,
            ext: str = EXT_TIFF,
            processing_dt: Optional[datetime] = None) -> Dict[str, Any]:
        """
        Put together STAC metadata document for the output of this task.
        """
        if processing_dt is None:
            processing_dt = datetime.utcnow()

        product = self.product
        geobox = self.geobox
        region_code = product.region_code(self.tile_index)
        inputs = list(map(str, self._lineage()))

        properties: Dict[str, Any] = deepcopy(product.properties)

        properties["dtr:start_datetime"] = format_datetime(
            self.time_range.start)
        properties["dtr:end_datetime"] = format_datetime(self.time_range.end)
        properties["odc:processing_datetime"] = format_datetime(
            processing_dt, timespec="seconds")
        properties["odc:region_code"] = region_code
        properties["odc:product"] = product.name
        properties["odc:dataset_version"] = product.version

        geobox_wgs84 = geobox.extent.to_crs("epsg:4326",
                                            resolution=math.inf,
                                            wrapdateline=True)
        bbox = geobox_wgs84.boundingbox

        item = pystac.Item(
            id=str(self.uuid),
            geometry=geobox_wgs84.json,
            bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top],
            datetime=self.time_range.start.replace(tzinfo=timezone.utc),
            properties=properties,
            stac_extensions=["projection"],
        )

        item.ext.projection.epsg = geobox.crs.epsg
        # Lineage last
        item.properties["odc:lineage"] = dict(inputs=inputs)

        # Add all the assets
        for band, path in self.paths(ext=ext).items():
            asset = pystac.Asset(
                href=path,
                media_type="image/tiff; application=geotiff",
                roles=["data"],
                title=band,
            )
            item.add_asset(band, asset)

            item.ext.projection.set_transform(geobox.transform, asset=asset)
            item.ext.projection.set_shape(geobox.shape, asset=asset)

        # Add links
        item.links.append(
            pystac.Link(
                rel="product_overview",
                media_type="application/json",
                target=product.href,
            ))
        item.links.append(
            pystac.Link(
                rel="self",
                media_type="application/json",
                target=self.metadata_path("absolute", ext="json"),
            ))

        return item.to_dict()
Beispiel #25
0
def create_item(tif_href, additional_providers=None):
    """Creates a STAC Item from Copernicus Global Land Cover Layers data.
    Args:
        tif_href (str): The href to the metadata for this tif.
    This function will read the metadata file for information to place in
    the STAC item.
    Returns:
        pystac.Item: A STAC Item representing this Copernicus Global Land Cover Layers data.
    """

    with rio.open(tif_href) as f:
        tags = f.tags()
        band_tags = f.tags(1)
        bounds = f.bounds

    # Item id
    item_id = os.path.basename(tif_href).replace('.tif', '')

    # Bounds
    geom = mapping(box(bounds.left, bounds.bottom, bounds.right, bounds.top))
    bounds = shape(geom).bounds

    start_dt = str_to_datetime(tags.pop('time_coverage_start'))
    end_dt = str_to_datetime(tags.pop('time_coverage_end'))
    file_creation_dt = str_to_datetime(tags.pop('file_creation'))

    item = pystac.Item(id=item_id,
                       geometry=geom,
                       bbox=bounds,
                       datetime=None,
                       properties={
                           'start_datetime':
                           start_dt,
                           'end_datetime':
                           end_dt,
                           'discrete_classification_class_names':
                           DISCRETE_CLASSIFICATION_CLASS_NAMES,
                           'discrete_classification_class_palette':
                           DISCRETE_CLASSIFICATION_CLASS_PALETTE
                       })

    # Common metadata
    copernicus_provider = pystac.Provider(name=PROVIDER_NAME,
                                          url=(tags.pop('doi')),
                                          roles=['producer', 'licensor'])

    item.common_metadata.providers = [copernicus_provider]
    if additional_providers is not None:
        item.common_metadata.providers.extend(additional_providers)

    item.common_metadata.start_datetime = start_dt
    item.common_metadata.end_datetime = end_dt
    item.common_metadata.created = file_creation_dt

    item.common_metadata.description = tags.pop('Info')
    item.common_metadata.platform = tags.pop('platform')
    item.common_metadata.title = tags.pop('title')

    # proj
    item.ext.enable('projection')
    item.ext.projection.epsg = int(
        tags.pop('delivered_product_crs').replace('WGS84 (EPSG:',
                                                  '').replace(')', ''))

    # Extra fields
    for k, v in tags.items():
        item.extra_fields[k] = v

    # Bands
    long_name = band_tags.pop('long_name')
    band = pystac.extensions.eo.Band.create(
        name=long_name,
        common_name=band_tags.pop('short_name'),
        description=long_name)

    item.ext.enable('eo')
    item.ext.eo.bands = [band]

    # Tif
    item.add_asset(
        ITEM_TIF_IMAGE_NAME,
        pystac.Asset(href=tif_href,
                     media_type=pystac.MediaType.TIFF,
                     roles=['data'],
                     title="tif image"))
    return item
Beispiel #26
0
    def construct_metadata(self, meta, platform):
        """Constructs a STAC item that is harmonized across the different satellite image sources.

        :param meta: Source metadata (GeoJSON-like mapping)
        :param platform: Image platform (<enum 'Platform'>).
        :returns: PySTAC item
        """
        if self.src == Datahub.STAC_local or self.src == Datahub.STAC_API:
            raise NotImplementedError(
                f"construct_metadata not supported for {self.src}.")

        elif self.src == Datahub.EarthExplorer:
            item = pystac.Item(
                id=meta["display_id"],
                datetime=meta["start_time"],
                geometry=meta["spatial_coverage"].__geo_interface__,
                bbox=meta["spatial_bounds"],
                properties={
                    "producttype":
                    "L1TP",
                    "srcuuid":
                    meta["entity_id"],
                    "start_datetime":
                    meta["start_time"].astimezone(
                        tz=datetime.timezone.utc).isoformat(),
                    "end_datetime":
                    meta["stop_time"].astimezone(
                        tz=datetime.timezone.utc).isoformat(),
                },
                stac_extensions=[pystac.Extensions.EO, pystac.Extensions.SAT],
            )

            if "cloudCover" in meta:
                item.ext.eo.cloud_cover = round(float(meta["cloud_cover"]), 2)

            item.common_metadata.platform = platform.value

            relative_orbit = int(f"{meta['wrs_path']}{meta['wrs_row']}")
            item.ext.sat.apply(orbit_state=sat.OrbitState.DESCENDING,
                               relative_orbit=relative_orbit)

        else:  # Scihub
            item = pystac.Item(
                id=meta["properties"]["identifier"],
                datetime=parse(meta["properties"]["beginposition"]),
                geometry=meta["geometry"],
                bbox=_get_bbox_from_geometry_string(meta["geometry"]),
                properties={
                    "producttype":
                    meta["properties"]["producttype"],
                    "size":
                    meta["properties"]["size"],
                    "srcurl":
                    meta["properties"]["link"],
                    "srcuuid":
                    meta["properties"]["uuid"],
                    "start_datetime":
                    parse(meta["properties"]["beginposition"]).astimezone(
                        tz=datetime.timezone.utc).isoformat(),
                    "end_datetime":
                    parse(meta["properties"]["endposition"]).astimezone(
                        tz=datetime.timezone.utc).isoformat(),
                },
                stac_extensions=[pystac.Extensions.EO, pystac.Extensions.SAT],
            )

            if "cloudcoverpercentage" in meta["properties"]:
                item.ext.eo.cloud_cover = round(
                    float(meta["properties"]["cloudcoverpercentage"]), 2)

            item.common_metadata.platform = platform.value

            item.ext.sat.apply(
                orbit_state=sat.OrbitState[meta["properties"]["orbitdirection"]
                                           .upper()],  # for enum key to work
                relative_orbit=int(meta["properties"]["orbitnumber"]),
            )

        return item
Beispiel #27
0
def main():
    """ Pull Copernicus EU Rapid Mapping Activations data from the GeoRSS feed """
    sentinel_oauth_id = os.environ.get("SENTINELHUB_OAUTH_ID")
    sentinel_oauth_secret = os.environ.get("SENTINELHUB_OAUTH_SECRET")
    if sentinel_oauth_id is None:
        raise ValueError("Must set SENTINELHUB_OAUTH_ID")
    if sentinel_oauth_secret is None:
        raise ValueError("Must set SENTINELHUB_OAUTH_SECRET")

    events_xml_url = "https://emergency.copernicus.eu/mapping/activations-rapid/feed"
    events_xml_file = Path("./data/copernicus-rapid-mapping-activations.xml")
    if not events_xml_file.is_file():
        logger.info("Pulling {}...".format(events_xml_url))
        urlretrieve(events_xml_url, str(events_xml_file))

    event_xml_dir = Path("./data/event-xml")
    os.makedirs(event_xml_dir, exist_ok=True)

    # Generate a list of all unique CEMS products (combination of event, aoi,
    # monitoring type, revision and version) for all flood events in 2019 and 2020
    products = []
    events_root = ET.parse(events_xml_file).getroot()
    for event in events_root.iter("item"):
        category = event.find("category").text.strip().lower()
        if category != "flood":
            continue

        event_id = event.find("guid").text
        title = event.find("title").text
        rss_url = event.find("{http://www.iwg-sem.org/}activationRSS").text
        logger.info(title)

        description = event.find("description").text
        event_dts = re.findall(
            r"Date\/Time of Event \(UTC\):[</b>\s]*?(\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{2}:\d{2})",
            description,
            flags=re.MULTILINE,
        )
        if len(event_dts) != 1:
            logger.warning("{}: Available event date times {}".format(
                title, event_dts))
            raise AssertionError()
        event_datetime = datetime.strptime(
            event_dts[0], "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
        if event_datetime < datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc):
            continue

        event_country = event.find(
            "{http://www.iwg-sem.org/}activationAffectedCountries").text

        event_xml_file = Path(event_xml_dir, event_id).with_suffix(".xml")
        if not event_xml_file.is_file():
            logger.info("\tPulling {} GeoRSS: {}...".format(
                event_id, event_xml_file))
            urlretrieve(rss_url, event_xml_file)

        event_root = ET.parse(event_xml_file).getroot()

        for item in event_root.iter("item"):
            try:
                data_type = item.find("{http://www.gdacs.org/}cemsctype").text
            except AttributeError:
                data_type = ""
            try:
                product_type = item.find(
                    "{http://www.gdacs.org/}cemsptype").text
            except AttributeError:
                product_type = ""

            # Only care about downloading VECTOR data for Delineation product
            # More info at https://emergency.copernicus.eu/mapping/ems/rapid-mapping-portfolio
            if not (data_type == "VECTOR" and
                    (product_type == "DEL" or product_type == "GRA")):
                continue

            item_url = urlparse(item.find("link").text)
            _, _, product_id, version_id = item_url.path.lstrip("/").split("/")
            (
                product_event_id,
                aoi_id,
                product_type_id,
                monitoring_type,
                revision_id,
                data_type_id,
            ) = product_id.split("_")

            # Some sanity checks to ensure we've parsed our product id string correctly
            assert event_id == product_event_id
            assert product_type_id == product_type
            assert data_type_id == "VECTORS"

            georss_polygon = item.find(
                "{http://www.georss.org/georss}polygon").text
            # Split string, group number pairs, convert to float and swap pairs to lon first
            polygon = Polygon(
                map(
                    lambda x: (float(x[1]), float(x[0])),
                    grouper(georss_polygon.split(" "), 2),
                ))

            event_product = EventProduct(
                # Rebuild product_id from scratch because we need to include version
                "_".join([
                    event_id,
                    aoi_id,
                    product_type_id,
                    monitoring_type,
                    revision_id,
                    version_id,
                    data_type_id,
                ]),
                event_id,
                event_country,
                aoi_id,
                event_datetime.timestamp(),
                polygon,
                data_type_id,
                product_type_id,
                monitoring_type,
                revision_id,
                version_id,
                urlunparse(item_url),
            )
            products.append(event_product)

    df = gpd.GeoDataFrame(products)
    geojson_file = "./data/cems-rapid-mapping-flood-products-2019-2020.geojson"
    logger.info(
        "Writing GeoJSON of flood event products to {}".format(geojson_file))
    df.to_file(geojson_file, driver="GeoJSON")

    sentinel_session = get_session(sentinel_oauth_id, sentinel_oauth_secret)

    catalog = pystac.Catalog(
        "copernicus-rapid-mapping-floods-2019-2020",
        "Copernicus Rapid Mapping provisions geospatial information within hours or days from the activation in support of emergency management activities immediately following a disaster. Standardised mapping products are provided: e.g. to ascertain the situation before the event (reference product), to roughly identify and assess the most affected locations (first estimate product), assess the geographical extent of the event (delineation product) or to evaluate the intensity and scope of the damage resulting from the event (grading product). This catalog contains a subset of products for flood events from 2019-2020 that intersect with Sentinel 2 L2A Chips.",
        title="Copernicus Rapid Mapping Floods 2019-2020",
    )
    s2_collection = pystac.Collection(
        "Sentinel-2-L2A",
        "Sentinel 2 L2A images corresponding to CEMS rapid mapping floods",
        pystac.Extent(
            pystac.SpatialExtent([None, None, None, None]),
            pystac.TemporalExtent([(
                # TODO: Make this more specific by looping actual dts
                #       after ingest
                datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
                datetime(2020, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
            )]),
        ),
    )
    catalog.add_child(s2_collection)

    # Loop Products grouped by event id, lookup Sentinel 2 matches for each
    # Product, and create STAC Items in catalog for any matches
    sorted_products = sorted(products, key=lambda x: x.event_id)
    for event_id, event_products in groupby(sorted_products,
                                            key=lambda x: x.event_id):
        for p in event_products:
            event_datetime = datetime.fromtimestamp(p.event_time,
                                                    tz=timezone.utc)

            # Check for sentinel 2 results before anything else, so we
            # don't do unnecessary work. We'll use these results later
            # after we've created our STAC Item
            response = stac_search(
                p.geometry.bounds,
                "sentinel-2-l2a",
                event_datetime - timedelta(hours=12),
                event_datetime + timedelta(hours=12),
                sentinel_session,
            ).json()

            if len(response["features"]) < 1:
                logger.debug("No Sentinel 2 results for {}".format(
                    p.product_id))
                continue

            event_collection = catalog.get_child(event_id)
            if event_collection is None:
                event_collection = pystac.Collection(
                    event_id,
                    "",
                    pystac.Extent(
                        pystac.SpatialExtent([None, None, None, None]),
                        pystac.TemporalExtent([(event_datetime, None)]),
                    ),
                )
                catalog.add_child(event_collection)

            pystac_item = pystac.Item(
                p.product_id,
                mapping(p.geometry),
                p.geometry.bounds,
                event_datetime,
                properties={
                    "aoi_id": p.aoi_id,
                    "country": p.event_country,
                    "event_id": p.event_id,
                    "product_type": p.product_type,
                    "data_type": p.data_type,
                    "monitoring_type": p.monitoring_type,
                    "revision": p.revision,
                    "version": p.version,
                },
            )
            event_collection.add_item(pystac_item)
            url_link = pystac.Link("alternate",
                                   p.product_link,
                                   media_type="text/html")
            pystac_item.add_link(url_link)

            # Get or create Item in S2 collection for each match from
            # SentinelHub and add as links to our Product Item
            for feature in response["features"]:
                s2_item = s2_collection.get_item(feature["id"])
                if s2_item is None:
                    s2_item = pystac.Item.from_dict(feature)
                    s2_collection.add_item(s2_item)

                s2_link = pystac.Link(
                    "data", s2_item,
                    link_type=pystac.LinkType.RELATIVE).set_owner(pystac_item)
                pystac_item.add_link(s2_link)

            logger.info("Created STAC Item {} with {} Sentinel 2 links".format(
                p.product_id, len(response["features"])))

    # Set spatial extents
    for collection in catalog.get_children():
        if not isinstance(collection, pystac.Collection):
            continue
        bounds = GeometryCollection(
            [shape(s.geometry) for s in collection.get_all_items()]).bounds
        collection.extent.spatial = pystac.SpatialExtent(bounds)

    catalog_root = "./data/catalog"
    logger.info("Writing STAC Catalog to {}...".format(catalog_root))
    catalog.normalize_and_save(catalog_root, pystac.CatalogType.SELF_CONTAINED)
Beispiel #28
0
def create_item(
        granule_href: str,
        additional_providers: Optional[List[pystac.Provider]] = None,
        read_href_modifier: Optional[ReadHrefModifier] = None) -> pystac.Item:
    """Create a STC Item from a Sentinel 2 granule.

    Arguments:
        granule_href: The HREF to the granule. This is expected to be a path
            to a SAFE archive, e.g. : https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/01/C/CV/2016/03/27/S2A_MSIL2A_20160327T204522_N0212_R128_T01CCV_20210214T042702.SAFE
        additional_providers: Optional list of additional providers to set into the Item
        read_href_modifier: A function that takes an HREF and returns a modified HREF.
            This can be used to modify a HREF to make it readable, e.g. appending
            an Azure SAS token or creating a signed URL.

    Returns:
        pystac.Item: An item representing the Sentinel 2 scene
    """ # noqa

    safe_manifest = SafeManifest(granule_href, read_href_modifier)

    product_metadata = ProductMetadata(safe_manifest.product_metadata_href,
                                       read_href_modifier)
    granule_metadata = GranuleMetadata(safe_manifest.granule_metadata_href,
                                       read_href_modifier)

    item = pystac.Item(id=product_metadata.product_id,
                       geometry=product_metadata.geometry,
                       bbox=product_metadata.bbox,
                       datetime=product_metadata.datetime,
                       properties={})

    # --Common metadata--

    item.common_metadata.providers = [SENTINEL_PROVIDER]

    if additional_providers is not None:
        item.common_metadata.providers.extend(additional_providers)

    item.common_metadata.platform = product_metadata.platform
    item.common_metadata.constellation = SENTINEL_CONSTELLATION
    item.common_metadata.instruments = SENTINEL_INSTRUMENTS

    # --Extensions--

    # eo

    item.ext.enable('eo')
    item.ext.eo.cloud_cover = granule_metadata.cloudiness_percentage

    # sat

    item.ext.enable('sat')
    item.ext.sat.orbit_state = OrbitState(product_metadata.orbit_state.lower())
    item.ext.sat.relative_orbit = product_metadata.relative_orbit

    # proj
    item.ext.enable('projection')
    item.ext.projection.epsg = granule_metadata.epsg
    if item.ext.projection.epsg is None:
        raise ValueError(
            f'Could not determine EPSG code for {granule_href}; which is required.'
        )

    # s2 properties
    item.properties.update({
        **product_metadata.metadata_dict,
        **granule_metadata.metadata_dict
    })

    # --Assets--

    # Metadata

    item.add_asset(*safe_manifest.create_asset())
    item.add_asset(*product_metadata.create_asset())
    item.add_asset(*granule_metadata.create_asset())
    item.add_asset(
        INSPIRE_METADATA_ASSET_KEY,
        pystac.Asset(href=safe_manifest.inspire_metadata_href,
                     media_type=pystac.MediaType.XML,
                     roles=['metadata']))
    item.add_asset(
        DATASTRIP_METADATA_ASSET_KEY,
        pystac.Asset(href=safe_manifest.datastrip_metadata_href,
                     media_type=pystac.MediaType.XML,
                     roles=['metadata']))

    # Image assets
    proj_bbox = granule_metadata.proj_bbox

    image_assets = dict([
        image_asset_from_href(os.path.join(granule_href, image_path), item,
                              granule_metadata.resolution_to_shape, proj_bbox,
                              product_metadata.image_media_type)
        for image_path in product_metadata.image_paths
    ])

    for key, asset in image_assets.items():
        assert key not in item.assets
        item.add_asset(key, asset)

    # Thumbnail

    if safe_manifest.thumbnail_href is not None:
        item.add_asset(
            "preview",
            pystac.Asset(href=safe_manifest.thumbnail_href,
                         media_type=pystac.MediaType.COG,
                         roles=['thumbnail']))

    # --Links--

    item.links.append(SENTINEL_LICENSE)

    return item
Beispiel #29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--pipeline-uri",
                        type=str,
                        help="A URI to JSON with instructions")
    parser.add_argument("--pipeline", type=str, help="JSON with instructions")
    parser.add_argument(
        "--sentinel-stac-id",
        type=str,
        help="STAC Item ID to process from the STAC collection")
    parser.add_argument(
        "--sentinel-collection-id",
        type=str,
        default=SENTINEL_ARCHIVE_COLLECTION_ID,
    )
    parser.add_argument(
        "--stac-api-uri",
        type=str,
        default=os.environ.get("STAC_API_URI", "http://franklin:9090"),
    )
    parser.add_argument(
        "--stac-api-uri-sentinel",
        type=str,
        default=os.environ.get("STAC_API_URI_SENTINEL",
                               "https://earth-search.aws.element84.com/v0"),
    )

    parser.add_argument("--s3-bucket",
                        type=str,
                        default=os.environ.get("S3_BUCKET",
                                               "sentinel-s2-data"))
    parser.add_argument(
        "--s3-prefix",
        type=str,
        default=os.environ.get("S3_PREFIX", "aviris-scene-cogs-l2"),
    )
    parser.add_argument("--temp-dir",
                        type=str,
                        default=os.environ.get("TEMP_DIR", None))
    parser.add_argument("--output-format",
                        type=str,
                        default=os.environ.get("GDAL_OUTPUT_FORMAT", "COG"))
    parser.add_argument(
        "--keep-temp-dir",
        action="store_true",
        help=
        "If provided, script does not delete temporary directory before script exits. Useful for debugging.",
    )
    parser.add_argument(
        "--force",
        action="store_true",
        help=
        "If provided, force reingest StacItem even though this it is already present in the catalog.",
    )

    try:
        warpMemoryLimit = int(os.environ.get("GDAL_WARP_MEMORY_LIMIT", None))
    except TypeError:
        warpMemoryLimit = None

    # TODO: replace it with parser.parse_args() later
    cli_args, cli_unknown = parser.parse_known_args()

    # parse all cli arguments
    args = CliConfig(cli_args, cli_unknown)

    s3 = boto3.client("s3")
    stac_client_sentinel = STACClient(args.stac_api_uri_sentinel)
    stac_client = STACClient(args.stac_api_uri)

    collection = stac_client_sentinel.get_collection(
        args.sentinel_collection_id)

    SENTINEL_COG_COLLECTION = pystac.Collection(
        SENTINEL_COG_COLLECTION_ID,
        "Sentinel-2a and Sentinel-2b imagery, processed to Level 2A (Surface Reflectance) and converted to Cloud-Optimized GeoTIFFs",
        collection.extent,
        stac_extensions=COG_COLLECTION_EXTENSIONS)
    SENTINEL_COG_COLLECTION.links = []
    SENTINEL_COG_COLLECTION.properties = {}
    SENTINEL_COG_COLLECTION.properties['eo:bands'] = SENTINEL_BANDS

    SENTINEL_COG_COLLECTION.properties[
        'hsi:wavelength_min'] = SENTINEL_WAVELENGTH_MIN
    SENTINEL_COG_COLLECTION.properties[
        'hsi:wavelength_max'] = SENTINEL_WAVELENGTH_MAX

    # GET STAC Item from SENTINEL Catalog
    item = stac_client_sentinel.get_collection_item(
        args.sentinel_collection_id, args.sentinel_stac_id)
    assets = item.assets
    bands_map = {
        'B01': vsis3(strip_scheme(assets['B01'].href)),
        'B02': vsis3(strip_scheme(assets['B02'].href)),
        'B03': vsis3(strip_scheme(assets['B03'].href)),
        'B04': vsis3(strip_scheme(assets['B04'].href)),
        'B05': vsis3(strip_scheme(assets['B05'].href)),
        'B06': vsis3(strip_scheme(assets['B06'].href)),
        'B07': vsis3(strip_scheme(assets['B07'].href)),
        'B08': vsis3(strip_scheme(assets['B08'].href)),
        'B8A': vsis3(strip_scheme(assets['B8A'].href)),
        'B09': vsis3(strip_scheme(assets['B09'].href)),
        'B11': vsis3(strip_scheme(assets['B11'].href)),
        'B12': vsis3(strip_scheme(assets['B12'].href)),
        'AOT': vsis3(strip_scheme(assets['AOT'].href)),
        # 'WVP': vsis3(strip_scheme(assets['WVP'].href)),
        # 'SCL': vsis3(strip_scheme(assets['SCL'].href))
    }

    # we don't need assets here, since the gather scripts knows what and how to download by the sentinel path
    properties = item.properties
    datetime = dateutil.parser.isoparse(properties['datetime'])

    # here "href": "s3://sentinel-s2-l2a/tiles/31/V/CE/2021/8/19/0/R60m/B01.jp2"
    # path is tiles/31/V/CE/2021/8/19/0
    sentintel_path = 'tiles/{}/{}/{}/{}/{}/{}/{}'.format(
        properties['sentinel:utm_zone'], properties['sentinel:latitude_band'],
        properties['sentinel:grid_square'], str(datetime.year),
        str(datetime.month), str(datetime.day),
        properties['sentinel:sequence'])

    # Create new COG STAC Item
    cog_item_id = "{}_{}".format(SENTINEL_COG_COLLECTION.id, item.id)

    cog_item = pystac.Item(
        cog_item_id,
        item.geometry,
        item.bbox,
        item.datetime,
        item.properties,
        stac_extensions=COG_ITEM_EXTENSIONS,
        collection=SENTINEL_COG_COLLECTION.id,
    )

    cog_item.properties['eo:bands'] = SENTINEL_COG_COLLECTION.properties[
        'eo:bands']
    cog_item.properties[
        'hsi:wavelength_min'] = SENTINEL_COG_COLLECTION.properties[
            'hsi:wavelength_min']
    cog_item.properties[
        'hsi:wavelength_max'] = SENTINEL_COG_COLLECTION.properties[
            'hsi:wavelength_max']
    cog_item.properties['proj:epsg'] = '4326'

    # Create COG Collection if it doesn't exist
    if not stac_client.has_collection(SENTINEL_COG_COLLECTION.id):
        stac_client.post_collection(SENTINEL_COG_COLLECTION)

    if not args.force:
        # Exit early if COG STAC Item already exists
        try:
            stac_client.get_collection_item(SENTINEL_COG_COLLECTION.id,
                                            cog_item_id)
            logger.info(f'STAC Item {cog_item_id} already exists. Exiting.')
            activation_output(cog_item_id)
            return
        except requests.exceptions.HTTPError:
            pass

    _, s3_uri = gather_sentinel(
        f'{cog_item_id}.tiff',
        f's3://{args.s3_bucket}/{args.s3_prefix}/{sentintel_path}/', bands_map)

    # Add assets to COG STAC Item
    idx = 0
    cog_item.add_asset(
        f'{args.output_asset_name}_{idx}',
        pystac.Asset(s3_uri, media_type=pystac.MediaType.COG, roles=["data"]),
    )

    # Add COG Item to AVIRIS L2 STAC Collection
    logger.info(f"POST Item {cog_item.id} to {args.stac_api_uri}")
    item_data = stac_client.post_collection_item(SENTINEL_COG_COLLECTION.id,
                                                 cog_item)
    if item_data.get('id', None):
        logger.info(f"Success: {item_data['id']}")
        activation_output(item_data['id'])
    else:
        logger.error(f"Failure: {item_data}")
        return -1
Beispiel #30
0
        footprint = Polygon([[bounds.left, bounds.bottom],
                             [bounds.left, bounds.top],
                             [bounds.right, bounds.top],
                             [bounds.right, bounds.bottom]])

        return (bbox, mapping(footprint))


bbox1, footprint1 = get_bbox_and_footprint(path1)
bbox2, footprint2 = get_bbox_and_footprint(path2)

from datetime import datetime

item1 = stac.Item(id='canopy-height',
                  geometry=footprint1,
                  bbox=bbox1,
                  datetime=datetime(2018, 7, 5),
                  properties={})

item2 = stac.Item(id='landcover',
                  geometry=footprint2,
                  bbox=bbox2,
                  datetime=datetime(2019, 7, 5),
                  properties={})

item1.add_asset(key='data',
                asset=stac.Asset(href=path1, media_type=stac.MediaType.COG))
item1.add_asset(key='metadata',
                asset=stac.Asset(href=metapath1,
                                 media_type=stac.MediaType.XML))
item1.add_asset(key='thumbnail',