Ejemplo n.º 1
0
 def create_asset(self, href: str) -> pystac.Asset:
     """Creates a new :class:`~pystac.Asset` instance using the fields from this
     ``AssetDefinition`` and the given ``href``."""
     return pystac.Asset(
         href=href,
         title=self.title,
         description=self.description,
         media_type=self.media_type,
         roles=self.roles,
         extra_fields={
             k: v
             for k, v in self.properties.items() if k not in {
                 ASSET_TITLE_PROP,
                 ASSET_DESC_PROP,
                 ASSET_TYPE_PROP,
                 ASSET_ROLES_PROP,
             }
         },
     )
Ejemplo n.º 2
0
    def add_stac(self, tile):

        if not tile.poly:
            return None

        item = pystac.Item(
            tile.name, mapping(tile.poly), list(tile.poly.bounds),
            datetime.datetime.now(),
            {'description': 'A USGS Lidar pointcloud in Entwine/EPT format'})

        #item.ext.enable(pystac.Extensions.POINTCLOUD)

        # icky
        s = tile.ept['schema']
        p = []
        for d in s:
            p.append(Schema(d))

        PointcloudExtension.add_to(item)
        PointcloudExtension.ext(item).apply(
            tile.num_points,
            PhenomenologyType.LIDAR,
            "ept",
            p,
        )

        ProjectionExtension.add_to(item)
        ProjectionExtension.ext(item).apply(3857, projjson=PROJJSON)

        #        item.ext.pointcloud.apply(tile.num_points, 'lidar', 'ept', p, epsg='EPSG:3857')

        asset = pystac.Asset(tile.url, 'entwine',
                             'The ept.json for accessing data')
        item.add_asset('ept.json', asset)

        item_link = pystac.Link('self',
                                f'{self.args.stac_base_url}{tile.name}.json')
        item_parent = pystac.Link('parent',
                                  f'{self.args.stac_base_url}catalog.json')
        item.add_links([item_link, item_parent])
        return item
Ejemplo n.º 3
0
def generate_stac_item(filename_tiff, cog_collection, planet_id, s3_uri):
    logger.info(f'Using gdalinfo to get metadata')
    filename_json = filename_tiff.replace('.tiff', '.json')
    os.system(f'gdalinfo -proj4 -json {filename_tiff} > {filename_json}')
    with open(filename_json, 'r') as f:
        data = json.load(f)

    logger.info(f'Organizing metadata')
    tifftag_datetime = data.get('metadata').get('').get('TIFFTAG_DATETIME')
    year, month, day = [
        int(n) for n in tifftag_datetime.split(' ')[0].split(':')
    ]
    dt = datetime(year, month, day, tzinfo=timezone.utc)
    polygon = data.get('wgs84Extent')
    coords = polygon.get('coordinates')
    crs = CRS.from_string(data.get('coordinateSystem').get('proj4'))
    while len(coords) == 1:
        coords = coords[0]
    ys = [y for (y, x) in coords]
    xs = [x for (y, x) in coords]
    bbox = [min(ys), min(xs), max(ys), max(xs)]
    props = {
        'eo:bands': cog_collection.properties['eo:bands'],
        'hsi:wavelength_min': cog_collection.properties['hsi:wavelength_min'],
        'hsi:wavelength_min': cog_collection.properties['hsi:wavelength_min'],
        'proj:epsg': crs.to_authority()[-1],
    }

    logger.info(f'Creating new cog item')
    cog_item = pystac.Item(planet_id,
                           polygon,
                           bbox,
                           dt,
                           props,
                           stac_extensions=COG_ITEM_EXTENSIONS,
                           collection=cog_collection.id)
    cog_item.add_asset(
        'tiff_0',
        pystac.Asset(s3_uri, media_type=pystac.MediaType.COG, roles=['data']))

    return cog_item
Ejemplo n.º 4
0
    def test_asset_bands(self):
        eo_item = pystac.read_file(self.LANDSAT_EXAMPLE_URI)

        # Get

        b1_asset = eo_item.assets['B1']
        asset_bands = eo_item.ext.eo.get_bands(b1_asset)
        self.assertIsNot(None, asset_bands)
        self.assertEqual(len(asset_bands), 1)
        self.assertEqual(asset_bands[0].name, 'B1')

        index_asset = eo_item.assets['index']
        asset_bands = eo_item.ext.eo.get_bands(index_asset)
        self.assertIs(None, asset_bands)

        # Set
        b2_asset = eo_item.assets['B2']
        self.assertEqual(eo_item.ext.eo.get_bands(b2_asset)[0].name, "B2")
        eo_item.ext.eo.set_bands(eo_item.ext.eo.get_bands(b1_asset), b2_asset)

        new_b2_asset_bands = eo_item.ext.eo.get_bands(eo_item.assets['B2'])

        self.assertEqual(new_b2_asset_bands[0].name, 'B1')

        eo_item.validate()

        # Check adding a new asset
        new_bands = [
            Band.create(name="red", description=Band.band_description("red")),
            Band.create(name="green",
                        description=Band.band_description("green")),
            Band.create(name="blue",
                        description=Band.band_description("blue")),
        ]
        asset = pystac.Asset(href="some/path.tif",
                             media_type=pystac.MediaType.GEOTIFF)
        eo_item.ext.eo.set_bands(new_bands, asset)
        eo_item.add_asset("test", asset)

        self.assertEqual(len(eo_item.assets["test"].properties["eo:bands"]), 3)
Ejemplo n.º 5
0
    def add_asset(self, item: pystac.Item, mtl_metadata: MtlMetadata,
                  base_href: str) -> None:
        asset = pystac.Asset(href=self.get_href(base_href),
                             media_type=self.media_type)
        if self.title:
            asset.title = self.title
        if self.description:
            asset.description = self.description

        # common_metadata

        if self.gsd is not None:
            item.common_metadata.set_gsd(self.gsd, asset)
        else:
            if self.is_sr or self.is_qa:
                sr_grd = mtl_metadata.sr_gsd
                if item.common_metadata.gsd != sr_grd:
                    item.common_metadata.set_gsd(sr_grd, asset)
            if self.is_thermal:
                thermal_grd = mtl_metadata.thermal_gsd
                if item.common_metadata.gsd != thermal_grd:
                    item.common_metadata.set_gsd(thermal_grd, asset)

        # eo

        if self.bands:
            asset.properties["eo:bands"] = [b.to_dict() for b in self.bands]

        # projection
        if self.is_sr or self.is_qa:
            item.ext.projection.set_shape(mtl_metadata.sr_shape, asset)
            item.ext.projection.set_transform(mtl_metadata.sr_transform, asset)
        if self.is_thermal:
            item.ext.projection.set_shape(mtl_metadata.thermal_shape, asset)
            item.ext.projection.set_transform(mtl_metadata.thermal_transform,
                                              asset)

        item.add_asset(self.key, asset)
Ejemplo n.º 6
0
def add_assets(item, base_url):
    # add non-band assets
    item.add_asset(
        'thumbnail',
        pystac.Asset(title='Thumbnail',
                     href=base_url + '_thumb_large.jpg',
                     media_type=pystac.MediaType.JPEG,
                     roles=['thumbnail']))
    item.add_asset(
        'index',
        pystac.Asset(title='HTML Page',
                     href=os.path.dirname(base_url) + '/index.html',
                     media_type='application/html'))
    item.add_asset(
        'ANG',
        pystac.Asset(title='ANG Metadata',
                     href=base_url + '_ANG.txt',
                     media_type='text/plain',
                     roles=['metadata']))
    item.add_asset(
        'MTL',
        pystac.Asset(title='MTL Metadata',
                     href=base_url + '_MTL.txt',
                     media_type='text/plain',
                     roles=['metadata']))
    item.add_asset(
        'BQA',
        pystac.Asset(title='Quality Band',
                     href=base_url + '_BQA.TIF',
                     media_type=pystac.MediaType.GEOTIFF,
                     roles=['quality']))

    # Add bands
    for band_id, info in band_info.items():
        band_url = f"{base_url}_{band_id}.TIF"
        asset = pystac.Asset(href=band_url, media_type=pystac.MediaType.COG)
        bands = [info['band']]
        item.ext.eo.set_bands(bands, asset)
        item.add_asset(band_id, asset)

        # If this asset has a different GSD than the item, set it on the asset
        if info['gsd'] != item.common_metadata.gsd:
            item.common_metadata.set_gsd(info['gsd'], asset)
Ejemplo n.º 7
0
from datetime import datetime

item1 = stac.Item(id='canopy-height',
                  geometry=footprint1,
                  bbox=bbox1,
                  datetime=datetime(2018, 7, 5),
                  properties={})

item2 = stac.Item(id='landcover',
                  geometry=footprint2,
                  bbox=bbox2,
                  datetime=datetime(2019, 7, 5),
                  properties={})

item1.add_asset(key='data',
                asset=stac.Asset(href=path1, media_type=stac.MediaType.COG))
item1.add_asset(key='metadata',
                asset=stac.Asset(href=metapath1,
                                 media_type=stac.MediaType.XML))
item1.add_asset(key='thumbnail',
                asset=stac.Asset(href=thumbpath1,
                                 media_type=stac.MediaType.PNG))
item2.add_asset(key='data',
                asset=stac.Asset(href=path2, media_type=stac.MediaType.COG))
item2.add_asset(key='metadata',
                asset=stac.Asset(href=metapath1,
                                 media_type=stac.MediaType.XML))
item2.add_asset(key='thumbnail',
                asset=stac.Asset(href=thumbpath2,
                                 media_type=stac.MediaType.PNG))
Ejemplo n.º 8
0
def aviris_series_to_item(s2_scenes_map, series):
    """ Convert AVIRIS CSV series to another Series compatible with stacframes

    s2_scenes_map is an object where the key is Flight Scene and the value is an ftp url to
    that Flight Scene's s2 atmo corrected data file

    This method is currently valid for both AVIRIS Class and AVIRIS NG

    """
    year = int(series["Year"])
    hour = min(max(int(series.get("UTC Hour", 0)), 0), 23)
    minute = min(max(int(series.get("UTC Minute", 0)), 0), 59)
    try:
        flight_dt = datetime(int(year),
                             int(series["Month"]),
                             int(series["Day"]),
                             tzinfo=timezone.utc) + timedelta(hours=hour,
                                                              minutes=minute)
    except (ValueError, OverflowError):
        [month, day, year] = series["Date"].split("/")
        flight_dt = datetime(
            int(year), int(month), int(day), tzinfo=timezone.utc) + timedelta(
                hours=hour, minutes=minute)

    item_id = "aviris_{}".format(series["Flight Scene"])

    lons = [float(series["Lon{}".format(n)]) for n in range(1, 5)]
    lats = [float(series["Lat{}".format(n)]) for n in range(1, 5)]
    bbox = [min(lons), min(lats), max(lons), max(lats)]
    try:
        geometry = kml_poly_to_geom(series["kml_poly"])
    except IndexError:
        geometry = box(*bbox)

    properties = {
        k: series[k]
        for k in (
            "Year",
            "Site Name",
            "NASA Log",
            "Investigator",
            "Comments",
            "Name",
            "Flight Scene",
            "RDN Ver",
            "Scene",
            "GEO Ver",
            "YY",
            "Tape",
            "Flight ID",
            "Flight",
            "Run",
            "Pixel Size",
            "Rotation",
            "Number of Lines",
            "Number of Samples",
            "Solar Elevation",
            "Solar Azimuth",
            "Mean Scene Elevation",
            "Min Scene Elevation",
            "Max Scene Elevation",
            "File Size (Bytes)",
            "Gzip File Size (Bytes)",
        )
    }
    # Add any layer ids of interest
    properties["layer:ids"] = [series["collection"]]

    assets = {
        "ftp":
        pystac.Asset(
            series["link_ftp"],
            title="ftp",
            description=
            "AVIRIS data archive. The file size is described by the 'Gzip File Size' property.",
            media_type="application/gzip",
        ).to_dict(),
        "kml_overlay":
        pystac.Asset(
            series["link_kml_overlay"],
            title="kml_overlay",
            description="KML file describing the bounding box of the flight",
            media_type="application/vnd.google-earth.kml+xml",
        ).to_dict(),
        "kml_outline":
        pystac.Asset(
            series["link_kml_outline"],
            title="kml_outline",
            description="KML file describing the flight outline",
            media_type="application/vnd.google-earth.kml+xml",
        ).to_dict(),
        "rgb":
        pystac.Asset(
            series["link_rgb"],
            title="rgb",
            description="Full resolution RGB image captured by the flight",
            media_type="image/jpeg",
        ).to_dict(),
        "rgb_small":
        pystac.Asset(
            series["link_rgb_small"],
            title="rgb_small",
            description=
            "A lower resolution thumbnail of the same image as the 'rgb' asset.",
            media_type="image/jpeg",
        ).to_dict(),
        "flight_log":
        pystac.Asset(
            series["link_log"],
            title="flight_log",
            description=
            "HTML page with table listing the runs for this flight.",
            media_type="text/html",
        ).to_dict(),
    }
    if series["Name"] in s2_scenes_map:
        # Include in properties so we can STAC API query for Items with this asset
        properties["has_refl"] = True
        assets["ftp_refl"] = pystac.Asset(
            s2_scenes_map[series["Name"]],
            title="ftp_refl",
            description=
            "AVIRIS data archive of atmospheric corrected imagery for this scene.",
            media_type="application/gzip",
        ).to_dict()

    return pd.Series({
        "id": item_id,
        "datetime": flight_dt,
        "geometry": geometry,
        "bbox": bbox,
        "properties": properties,
        "assets": assets,
        "links": [],
    })
Ejemplo n.º 9
0
 def create_stac(self) -> pystac.Asset:
     stac = pystac.Asset(href=self.href,
                         properties=self.properties,
                         media_type=self.get_content_type())
     return stac
Ejemplo n.º 10
0
def image_asset_from_href(
        asset_href: str,
        item: pystac.Item,
        resolution_to_shape: Dict[int, Tuple[int, int]],
        proj_bbox: List[float],
        media_type: Optional[str] = None) -> Tuple[str, pystac.Asset]:
    logger.debug(f'Creating asset for image {asset_href}')

    _, ext = os.path.splitext(asset_href)
    if media_type is not None:
        asset_media_type = media_type
    else:
        if ext.lower() == '.jp2':
            asset_media_type = pystac.MediaType.JPEG2000
        elif ext.lower() in ['.tiff', '.tif']:
            asset_media_type = pystac.MediaType.GEOTIFF
        else:
            raise Exception(
                f'Must supply a media type for asset : {asset_href}')

    # Handle preview image

    if '_PVI' in asset_href:
        asset = pystac.Asset(href=asset_href,
                             media_type=asset_media_type,
                             title='True color preview',
                             roles=['data'])
        item.ext.eo.set_bands([
            SENTINEL_BANDS['B04'], SENTINEL_BANDS['B03'], SENTINEL_BANDS['B02']
        ], asset)
        return ('preview', asset)

    # Extract gsd and proj info
    gsd = extract_gsd(asset_href)
    shape = list(resolution_to_shape[int(gsd)])
    transform = transform_from_bbox(proj_bbox, shape)

    def set_asset_properties(asset):
        item.common_metadata.set_gsd(gsd, asset)
        item.ext.projection.set_shape(shape, asset)
        item.ext.projection.set_bbox(proj_bbox, asset)
        item.ext.projection.set_transform(transform, asset)

    # Handle band image

    band_id_search = re.search(r'_(B\w{2})_', asset_href)
    if band_id_search is not None:
        band_id = band_id_search.group(1)
        band = SENTINEL_BANDS[band_id]
        asset = pystac.Asset(href=asset_href,
                             media_type=asset_media_type,
                             title=band.description,
                             roles=['data'])
        item.ext.eo.set_bands([SENTINEL_BANDS[band_id]], asset)
        set_asset_properties(asset)
        return (band_id, asset)

    # Handle auxiliary images

    if '_TCI_' in asset_href:
        # True color
        asset = pystac.Asset(href=asset_href,
                             media_type=asset_media_type,
                             title='True color image',
                             roles=['data'])
        item.ext.eo.set_bands([
            SENTINEL_BANDS['B04'], SENTINEL_BANDS['B03'], SENTINEL_BANDS['B02']
        ], asset)
        set_asset_properties(asset)
        return (f'visual-{asset_href[-7:-4]}', asset)

    if '_AOT_' in asset_href:
        # Aerosol
        asset = pystac.Asset(href=asset_href,
                             media_type=asset_media_type,
                             title='Aerosol optical thickness (AOT)',
                             roles=['data'])
        set_asset_properties(asset)
        return (f'AOT-{asset_href[-7:-4]}', asset)

    if '_WVP_' in asset_href:
        # Water vapor
        asset = pystac.Asset(href=asset_href,
                             media_type=asset_media_type,
                             title='Water vapour (WVP)',
                             roles=['data'])
        set_asset_properties(asset)
        return (f'WVP-{asset_href[-7:-4]}', asset)

    if '_SCL_' in asset_href:
        # Classification map
        asset = pystac.Asset(href=asset_href,
                             media_type=asset_media_type,
                             title='Scene classfication map (SCL)',
                             roles=['data'])
        set_asset_properties(asset)
        return (f'SCL-{asset_href[-7:-4]}', asset)

    raise ValueError(f'Unexpected asset: {asset_href}')
Ejemplo n.º 11
0
def create_item(
        granule_href: str,
        additional_providers: Optional[List[pystac.Provider]] = None,
        read_href_modifier: Optional[ReadHrefModifier] = None) -> pystac.Item:
    """Create a STC Item from a Sentinel 2 granule.

    Arguments:
        granule_href: The HREF to the granule. This is expected to be a path
            to a SAFE archive, e.g. : https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/01/C/CV/2016/03/27/S2A_MSIL2A_20160327T204522_N0212_R128_T01CCV_20210214T042702.SAFE
        additional_providers: Optional list of additional providers to set into the Item
        read_href_modifier: A function that takes an HREF and returns a modified HREF.
            This can be used to modify a HREF to make it readable, e.g. appending
            an Azure SAS token or creating a signed URL.

    Returns:
        pystac.Item: An item representing the Sentinel 2 scene
    """ # noqa

    safe_manifest = SafeManifest(granule_href, read_href_modifier)

    product_metadata = ProductMetadata(safe_manifest.product_metadata_href,
                                       read_href_modifier)
    granule_metadata = GranuleMetadata(safe_manifest.granule_metadata_href,
                                       read_href_modifier)

    item = pystac.Item(id=product_metadata.product_id,
                       geometry=product_metadata.geometry,
                       bbox=product_metadata.bbox,
                       datetime=product_metadata.datetime,
                       properties={})

    # --Common metadata--

    item.common_metadata.providers = [SENTINEL_PROVIDER]

    if additional_providers is not None:
        item.common_metadata.providers.extend(additional_providers)

    item.common_metadata.platform = product_metadata.platform
    item.common_metadata.constellation = SENTINEL_CONSTELLATION
    item.common_metadata.instruments = SENTINEL_INSTRUMENTS

    # --Extensions--

    # eo

    item.ext.enable('eo')
    item.ext.eo.cloud_cover = granule_metadata.cloudiness_percentage

    # sat

    item.ext.enable('sat')
    item.ext.sat.orbit_state = OrbitState(product_metadata.orbit_state.lower())
    item.ext.sat.relative_orbit = product_metadata.relative_orbit

    # proj
    item.ext.enable('projection')
    item.ext.projection.epsg = granule_metadata.epsg
    if item.ext.projection.epsg is None:
        raise ValueError(
            f'Could not determine EPSG code for {granule_href}; which is required.'
        )

    # s2 properties
    item.properties.update({
        **product_metadata.metadata_dict,
        **granule_metadata.metadata_dict
    })

    # --Assets--

    # Metadata

    item.add_asset(*safe_manifest.create_asset())
    item.add_asset(*product_metadata.create_asset())
    item.add_asset(*granule_metadata.create_asset())
    item.add_asset(
        INSPIRE_METADATA_ASSET_KEY,
        pystac.Asset(href=safe_manifest.inspire_metadata_href,
                     media_type=pystac.MediaType.XML,
                     roles=['metadata']))
    item.add_asset(
        DATASTRIP_METADATA_ASSET_KEY,
        pystac.Asset(href=safe_manifest.datastrip_metadata_href,
                     media_type=pystac.MediaType.XML,
                     roles=['metadata']))

    # Image assets
    proj_bbox = granule_metadata.proj_bbox

    image_assets = dict([
        image_asset_from_href(os.path.join(granule_href, image_path), item,
                              granule_metadata.resolution_to_shape, proj_bbox,
                              product_metadata.image_media_type)
        for image_path in product_metadata.image_paths
    ])

    for key, asset in image_assets.items():
        assert key not in item.assets
        item.add_asset(key, asset)

    # Thumbnail

    if safe_manifest.thumbnail_href is not None:
        item.add_asset(
            "preview",
            pystac.Asset(href=safe_manifest.thumbnail_href,
                         media_type=pystac.MediaType.COG,
                         roles=['thumbnail']))

    # --Links--

    item.links.append(SENTINEL_LICENSE)

    return item
Ejemplo n.º 12
0
    def test_asset_bands(self) -> None:
        item = pystac.Item.from_file(self.PLANET_EXAMPLE_URI)
        item2 = pystac.Item.from_file(self.SENTINEL2_EXAMPLE_URI)

        # Get
        data_asset = item.assets["data"]
        asset_bands = RasterExtension.ext(data_asset).bands
        assert asset_bands is not None
        self.assertEqual(len(asset_bands), 4)
        self.assertEqual(asset_bands[0].nodata, 0)
        self.assertEqual(asset_bands[0].sampling, Sampling.AREA)
        self.assertEqual(asset_bands[0].unit, "W⋅sr−1⋅m−2⋅nm−1")
        self.assertEqual(asset_bands[0].data_type, DataType.UINT16)
        self.assertEqual(asset_bands[0].scale, 0.01)
        self.assertEqual(asset_bands[0].offset, 0)
        self.assertEqual(asset_bands[0].spatial_resolution, 3)

        band0_stats = asset_bands[0].statistics
        assert band0_stats is not None
        self.assertEqual(band0_stats.minimum, 1962)
        self.assertEqual(band0_stats.maximum, 32925)
        self.assertEqual(band0_stats.mean, 8498.9400644319)
        self.assertEqual(band0_stats.stddev, 5056.1292002722)
        self.assertEqual(band0_stats.valid_percent, 61.09)

        band0_hist = asset_bands[0].histogram
        assert band0_hist is not None
        self.assertEqual(band0_hist.count, 256)
        self.assertEqual(band0_hist.min, 1901.288235294118)
        self.assertEqual(band0_hist.max, 32985.71176470588)
        self.assertEqual(len(band0_hist.buckets), band0_hist.count)

        index_asset = item.assets["metadata"]
        asset_bands = RasterExtension.ext(index_asset).bands
        self.assertIs(None, asset_bands)

        b09_asset = item2.assets["B09"]
        b09_bands = RasterExtension.ext(b09_asset).bands
        assert b09_bands is not None
        self.assertEqual(b09_bands[0].nodata, "nan")

        # Set
        b2_asset = item2.assets["B02"]
        self.assertEqual(
            get_opt(
                get_opt(RasterExtension.ext(b2_asset).bands)
                [0].statistics).maximum,
            19264,
        )
        b1_asset = item2.assets["B01"]
        RasterExtension.ext(b2_asset).bands = RasterExtension.ext(
            b1_asset).bands

        new_b2_asset_bands = RasterExtension.ext(item2.assets["B02"]).bands

        self.assertEqual(
            get_opt(get_opt(new_b2_asset_bands)[0].statistics).maximum, 20567)

        new_b2_asset_band0 = get_opt(new_b2_asset_bands)[0]
        new_b2_asset_band0.nodata = NoDataStrings.INF

        item2.validate()

        # Check adding a new asset
        new_stats = [
            Statistics.create(minimum=0,
                              maximum=10000,
                              mean=5000,
                              stddev=10,
                              valid_percent=88),
            Statistics.create(minimum=-1,
                              maximum=1,
                              mean=0,
                              stddev=1,
                              valid_percent=100),
            Statistics.create(minimum=1,
                              maximum=255,
                              mean=200,
                              stddev=3,
                              valid_percent=100),
        ]
        # new_histograms = []
        with open(self.GDALINFO_EXAMPLE_URI) as gdaljson_file:
            gdaljson_data = json.load(gdaljson_file)
            new_histograms = list(
                map(
                    lambda band: Histogram.from_dict(band["histogram"]),
                    gdaljson_data["bands"],
                ))
        new_bands = [
            RasterBand.create(
                nodata=1,
                unit="test1",
                statistics=new_stats[0],
                histogram=new_histograms[0],
            ),
            RasterBand.create(
                nodata=2,
                unit="test2",
                statistics=new_stats[1],
                histogram=new_histograms[1],
            ),
            RasterBand.create(
                nodata=NoDataStrings.NINF,
                unit="test3",
                statistics=new_stats[2],
                histogram=new_histograms[2],
            ),
        ]
        asset = pystac.Asset(href="some/path.tif",
                             media_type=pystac.MediaType.GEOTIFF)
        RasterExtension.ext(asset).bands = new_bands
        item.add_asset("test", asset)

        self.assertEqual(len(item.assets["test"].extra_fields["raster:bands"]),
                         3)
        self.assertEqual(
            item.assets["test"].extra_fields["raster:bands"][1]["statistics"]
            ["minimum"],
            -1,
        )
        self.assertEqual(
            item.assets["test"].extra_fields["raster:bands"][1]["histogram"]
            ["min"],
            3848.354901960784,
        )
        self.assertEqual(
            item.assets["test"].extra_fields["raster:bands"][2]["nodata"],
            "-inf")

        for s in new_stats:
            s.minimum = None
            s.maximum = None
            s.mean = None
            s.stddev = None
            s.valid_percent = None
            self.assertEqual(len(s.properties), 0)

        for b in new_bands:
            b.bits_per_sample = None
            b.data_type = None
            b.histogram = None
            b.nodata = None
            b.sampling = None
            b.scale = None
            b.spatial_resolution = None
            b.statistics = None
            b.unit = None
            b.offset = None
            self.assertEqual(len(b.properties), 0)

        new_stats[2].apply(minimum=0,
                           maximum=10000,
                           mean=5000,
                           stddev=10,
                           valid_percent=88)
        new_stats[1].apply(minimum=-1,
                           maximum=1,
                           mean=0,
                           stddev=1,
                           valid_percent=100)
        new_stats[0].apply(minimum=1,
                           maximum=255,
                           mean=200,
                           stddev=3,
                           valid_percent=100)
        new_bands[2].apply(
            nodata=1,
            unit="test1",
            statistics=new_stats[2],
            histogram=new_histograms[0],
        )
        new_bands[1].apply(
            nodata=2,
            unit="test2",
            statistics=new_stats[1],
            histogram=new_histograms[1],
        )
        new_bands[0].apply(
            nodata=NoDataStrings.NAN,
            unit="test3",
            statistics=new_stats[0],
            histogram=new_histograms[2],
        )
        RasterExtension.ext(item.assets["test"]).apply(new_bands)
        self.assertEqual(
            item.assets["test"].extra_fields["raster:bands"][0]["statistics"]
            ["minimum"],
            1,
        )
        self.assertEqual(
            item.assets["test"].extra_fields["raster:bands"][0]["nodata"],
            "nan")
Ejemplo n.º 13
0
def download_gls(year: str, s3_dst: str, workdir: Path, overwrite: bool = False):
    log = setup_logging()
    assets = {}
    out_stac = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}.stac-item.json"

    if s3_head_object(str(out_stac)) is not None and not overwrite:
        log.info(f"{out_stac} exists, skipping")
        return

    # Download the files
    for name, file in FILES.items():
        # Create a temporary directory to work with
        with TemporaryDirectory(prefix=workdir) as tmpdir:
            log.info(f"Working on {file}")
            url = URL(
                BASE_URL.format(
                    record_id=YEARS[year][1], year_key=YEARS[year][0], file=file
                )
            )

            dest_url = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}_{name}.tif"

            if s3_head_object(str(dest_url)) is None or overwrite:
                log.info(f"Downloading {url}")

                try:
                    local_file = Path(tmpdir) / str(url.name)
                    # Download the file
                    download_file(url, local_file)

                    log.info(f"Downloaded file to {local_file}")
                    local_file_small = translate_file_deafrica_extent(local_file)
                    log.info(f"Clipped Africa out and saved to {local_file_small}")
                    resampling = "nearest" if name in DO_NEAREST else "bilinear"

                    # Create a COG in memory and upload to S3
                    with MemoryFile() as mem_dst:
                        # Creating the COG, with a memory cache and no download. Shiny.
                        cog_translate(
                            local_file_small,
                            mem_dst.name,
                            cog_profiles.get("deflate"),
                            in_memory=True,
                            nodata=255,
                            overview_resampling=resampling,
                        )
                        mem_dst.seek(0)
                        s3_dump(mem_dst, str(dest_url), ACL="bucket-owner-full-control")
                        log.info(f"File written to {dest_url}")
                except Exception:
                    log.exception(f"Failed to process {url}")
                    exit(1)
            else:
                log.info(f"{dest_url} exists, skipping")

            assets[name] = pystac.Asset(
                href=str(dest_url), roles=["data"], media_type=pystac.MediaType.COG
            )

    # Write STAC document from the last-written file
    source_doc = f"https://zenodo.org/record/{YEARS[year][1]}"
    item = create_stac_item(
        str(dest_url),
        id=str(odc_uuid("Copernicus Global Land Cover", "3.0.1", [source_doc])),
        assets=assets,
        with_proj=True,
        properties={
            "odc:product": PRODUCT_NAME,
            "start_datetime": f"{year}-01-01T00:00:00Z",
            "end_datetime": f"{year}-12-31T23:59:59Z",
        },
    )
    item.add_links(
        [
            pystac.Link(
                target=source_doc,
                title="Source",
                rel=pystac.RelType.DERIVED_FROM,
                media_type="text/html",
            )
        ]
    )
    s3_dump(
        json.dumps(item.to_dict(), indent=2),
        str(out_stac),
        ContentType="application/json",
        ACL="bucket-owner-full-control",
    )
    log.info(f"STAC written to {out_stac}")
Ejemplo n.º 14
0
 def create_asset(self):
     asset = pystac.Asset(href=self.href,
                          media_type=pystac.MediaType.XML,
                          roles=['metadata'])
     return (GRANULE_METADATA_ASSET_KEY, asset)
Ejemplo n.º 15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--pipeline-uri",
                        type=str,
                        help="A URI to JSON with instructions")
    parser.add_argument("--pipeline", type=str, help="JSON with instructions")
    parser.add_argument(
        "--aviris-stac-id",
        type=str,
        help="STAC Item ID to process from the STAC collection")
    parser.add_argument(
        "--aviris-collection-id",
        type=str,
        default=AVIRIS_ARCHIVE_COLLECTION_ID,
    )
    parser.add_argument(
        "--stac-api-uri",
        type=str,
        default=os.environ.get("STAC_API_URI", "http://franklin:9090"),
    )
    parser.add_argument("--s3-bucket",
                        type=str,
                        default=os.environ.get("S3_BUCKET", "aviris-data"))
    parser.add_argument(
        "--s3-prefix",
        type=str,
        default=os.environ.get("S3_PREFIX"),
    )
    parser.add_argument("--temp-dir",
                        type=str,
                        default=os.environ.get("TEMP_DIR", None))
    parser.add_argument("--output-format",
                        type=str,
                        default=os.environ.get("GDAL_OUTPUT_FORMAT", "COG"))
    parser.add_argument(
        "--keep-temp-dir",
        action="store_true",
        help=
        "If provided, script does not delete temporary directory before script exits. Useful for debugging.",
    )
    parser.add_argument(
        "--skip-large",
        action="store_true",
        help=
        "If provided, script will not process any COG > 200 MB to keep processing times reasonable. Useful for debugging.",
    )
    parser.add_argument(
        "--force",
        action="store_true",
        help=
        "If provided, force reingest StacItem even though this it is already present in the catalog.",
    )
    parser.add_argument(
        "--l2",
        action="store_true",
        help="If provided, use L2 imagery instead of L1.",
    )

    try:
        warpMemoryLimit = int(os.environ.get("GDAL_WARP_MEMORY_LIMIT", None))
    except TypeError:
        warpMemoryLimit = None

    # TODO: replace it with parser.parse_args() later
    cli_args, cli_unknown = parser.parse_known_args()

    # parse all cli arguments
    args = CliConfig(cli_args, cli_unknown)

    s3 = boto3.client("s3")
    stac_client = STACClient(args.stac_api_uri)

    cog_collection = get_aviris_cog_collection(args.level)

    # GET STAC Item from AVIRIS Catalog
    item = stac_client.get_collection_item(args.aviris_collection_id,
                                           args.aviris_stac_id)

    asset_key = 'https_refl' if args.l2 else 'https'
    asset = item.assets.get(asset_key, None)
    if asset is None:
        raise ValueError(
            f'STAC Item {args.aviris_stac_id} from {args.stac_api_uri} has no asset "{asset_key}"!'
        )
    scene_name = item.properties.get("Name")

    # Create new COG STAC Item
    cog_item_id = "{}_{}_{}".format(
        cog_collection.id,
        item.properties.get("Name"),
        item.properties.get("Scene"),
    )

    item.properties['eo:bands'] = cog_collection.properties['eo:bands']
    item.properties['hsi:wavelength_min'] = cog_collection.properties[
        'hsi:wavelength_min']
    item.properties['hsi:wavelength_max'] = cog_collection.properties[
        'hsi:wavelength_max']
    item.properties.pop('layer:ids', None)

    cog_item = pystac.Item(
        cog_item_id,
        item.geometry,
        item.bbox,
        item.datetime,
        item.properties,
        stac_extensions=COG_ITEM_EXTENSIONS,
        collection=cog_collection.id,
    )

    # Create COG Collection if it doesn't exist
    if not stac_client.has_collection(cog_collection.id):
        stac_client.post_collection(cog_collection)

    if not args.force:
        # Exit early if COG STAC Item already exists
        try:
            stac_client.get_collection_item(cog_collection.id, cog_item_id)
            print(cog_collection.id)
            print(cog_item_id)
            logger.info(f'STAC Item {cog_item_id} already exists. Exiting.')
            activation_output(cog_item_id, cog_collection.id)
            return
        except requests.exceptions.HTTPError:
            pass

    # Create tmpdir
    temp_dir = Path(args.temp_dir if args.temp_dir is not None else mkdtemp())
    temp_dir.mkdir(parents=True, exist_ok=True)
    try:
        # Retrieve AVIRIS GZIP for matching scene name
        local_archive = Path(temp_dir, Path(asset.href).name)
        if local_archive.exists():
            logger.info(f'Using existing archive: {local_archive}')
        else:
            logger.info(f'Downloading {asset.href} archive {local_archive}...')
            gzip_https_url = asset.href
            with DownloadProgressBar(unit='B',
                                     unit_scale=True,
                                     miniters=1,
                                     desc=gzip_https_url.split('/')[-1]) as t:
                urllib.request.urlretrieve(gzip_https_url,
                                           filename=local_archive,
                                           reporthook=t.update_to)

        # Retrieve file names from archive and extract if not already extracted to temp_dir
        extract_path = Path(temp_dir, f'{scene_name}_{args.level}')
        with tarfile.open(local_archive, mode="r") as tar_gz_fp:
            logger.info(f'Retrieving filenames from {local_archive}')
            with timing("Query archive"):
                tar_files = tar_gz_fp.getnames()
            logger.info(f"Files: {tar_files}")

            if extract_path.exists():
                logger.info(f'Skipping extract, exists at {extract_path}')
            else:
                logger.info(f"Extracting {local_archive} to {extract_path}")
                with timing("Extract"):
                    tar_gz_fp.extractall(extract_path)

        # Find HDR data files in unzipped package
        hdr_ext = '.hdr' if args.l2 else 'ort_img.hdr'
        hdr_files = [tf for tf in tar_files if tf.endswith(hdr_ext)]
        logger.info("HDR Files: {}".format(hdr_files))
        for idx, hdr_file_w_ext in enumerate(hdr_files):
            hdr_file_w_ext_path = Path(hdr_file_w_ext)
            hdr_path = Path(extract_path, hdr_file_w_ext_path.with_suffix(""))
            cog_path = Path(
                f'{hdr_path.with_suffix("")}_{args.output_asset_name}.tiff')

            if args.skip_large and os.path.getsize(hdr_path) > 0.2 * GB:
                file_mb = floor(os.path.getsize(hdr_path) / 1024 / 1024)
                logger.info(
                    "--skip-large provided. Skipping {} with size {}mb".format(
                        hdr_path, file_mb))
                continue

            # Convert HDR data to pixel interleaved COG with GDAL
            # NUM_THREADS only speeds up compression and overview generation
            # gdal.Warp is used to fix rasters rotation
            # NOTE:
            # We can't directly write TIFFs on S3 as the result of the gdal.Warp operation
            # see: https://github.com/OSGeo/gdal/issues/1189
            warp_opts = gdal.WarpOptions(callback=warp_callback,
                                         warpOptions=["NUM_THREADS=ALL_CPUS"],
                                         creationOptions=[
                                             "NUM_THREADS=ALL_CPUS",
                                             "COMPRESS=DEFLATE", "BIGTIFF=YES",
                                             "TILED=YES"
                                         ],
                                         multithread=True,
                                         warpMemoryLimit=warpMemoryLimit,
                                         format=args.output_format)
            logger.info(f"Converting {hdr_path} to {cog_path}...")
            with timing("GDAL Warp"):
                gdal.Warp(str(cog_path), str(hdr_path), options=warp_opts)

            # read metadata from the transformed TIFF
            cog_ds = gdal.Open(str(cog_path))
            cog_proj = osr.SpatialReference(wkt=cog_ds.GetProjection())
            cog_proj.AutoIdentifyEPSG()

            # set projection
            cog_item.properties['proj:epsg'] = int(
                cog_proj.GetAttrValue('AUTHORITY', 1))

            # Upload  COG and metadata, if written, to S3 bucket + key
            key = Path(
                args.s3_prefix,
                str(item.properties.get("Year")),
                str(item.properties.get("Name")),
                cog_path.name,
            )
            s3_uri = f's3://{args.s3_bucket}/{key}'
            logger.info(f"Uploading {cog_path} to {s3_uri}")
            s3.upload_file(
                str(cog_path),
                args.s3_bucket,
                str(key),
                Callback=ProgressPercentage(str(cog_path)),
                Config=TransferConfig(multipart_threshold=1 * GB),
            )
            cog_metadata_path = cog_path.with_suffix(".tiff.aux.xml")
            if cog_metadata_path.exists():
                metadata_key = Path(args.s3_prefix, cog_metadata_path.name)
                metadata_s3_uri = f's3://{args.s3_bucket}/{metadata_key}'
                logger.info(
                    f'Uploading {cog_metadata_path} to {metadata_s3_uri}')
                s3.upload_file(str(cog_metadata_path), args.s3_bucket,
                               str(metadata_key))

            # Add assets to COG STAC Item
            cog_item.add_asset(
                f'{args.output_asset_name}_{idx}',
                pystac.Asset(s3_uri,
                             media_type=pystac.MediaType.COG,
                             roles=["data"]),
            )
            if cog_metadata_path.exists():
                cog_item.add_asset(
                    f'metadata_{idx}',
                    pystac.Asset(
                        metadata_s3_uri,
                        media_type=pystac.MediaType.XML,
                        roles=["metadata"],
                    ),
                )
    finally:
        if not args.keep_temp_dir:
            logger.info(f"Removing temp dir: {temp_dir}")
            shutil.rmtree(temp_dir, ignore_errors=True)

    # Add COG Item to AVIRIS L2 STAC Collection
    logger.info(f"POST Item {cog_item.id} to {args.stac_api_uri}")
    item_data = stac_client.post_collection_item(cog_collection.id, cog_item)
    if item_data.get('id', None):
        logger.info(f"Success: {item_data['id']}")
        activation_output(item_data['id'], cog_collection.id)
    else:
        logger.error(f"Failure: {item_data}")
        return -1
Ejemplo n.º 16
0
    def to_stac(self):
        props = deepcopy(self.item_metadata['properties'])

        # Core Item properties
        item_id = self.item_metadata['id']
        geom = self.item_metadata['geometry']
        bbox = list(shape(geom).bounds)
        datetime = str_to_datetime(props.pop('acquired'))

        item = pystac.Item(id=item_id,
                           geometry=geom,
                           bbox=bbox,
                           datetime=datetime,
                           properties={})

        # Common metadata
        item.common_metadata.providers = [PLANET_PROVIDER]
        item.common_metadata.gsd = props.pop('gsd')
        item.common_metadata.created = str_to_datetime(props.pop('published'))
        item.common_metadata.updated = str_to_datetime(props.pop('updated'))
        item.common_metadata.constellation = props.pop('provider')
        item.common_metadata.platform = props.pop('satellite_id')
        # Some do not have instrument (e.g. REOrthoTile)
        instrument = props.pop('instrument', None)
        if instrument is not None:
            item.common_metadata.instruments = [instrument]

        # eo
        item.ext.enable('eo')
        # STAC uses 0-100, planet 0-1
        item.ext.eo.cloud_cover = props.pop('cloud_cover') * 100

        # view
        item.ext.enable('view')
        item.ext.view.off_nadir = props.pop('view_angle')
        if 'satellite_azimuth' in props:
            item.ext.view.azimuth = props.pop('satellite_azimuth')
        item.ext.view.sun_azimuth = props.pop('sun_azimuth')
        item.ext.view.sun_elevation = props.pop('sun_elevation')

        # Add all additional properties with Planet extension designation.
        whitelisted_props = [
            'anomalous_pixels', 'ground_control', 'item_type',
            'pixel_resolution', 'quality_category', 'strip_id',
            'publishing_stage', 'clear_percent'
        ]
        for name in whitelisted_props:
            if name in props:
                item.properties['{}:{}'.format(PLANET_EXTENSION_PREFIX,
                                               name)] = props[name]

        item_type = props.pop('item_type')
        planet_url = f'https://api.planet.com/data/v1/item-types/{item_type}/items/{item_id}'
        via_link = Link('via', planet_url)
        item.add_link(via_link)

        geotransform = None
        for planet_asset in self.item_assets:
            href = make_absolute_href(planet_asset['path'],
                                      start_href=self.base_dir,
                                      start_is_dir=True)

            media_type = planet_asset['media_type']

            asset_type = planet_asset['annotations']['planet/asset_type']
            bundle_type = planet_asset['annotations']['planet/bundle_type']

            # Planet data is delivered as COGs
            if media_type == 'image/tiff' and asset_type not in [
                    "udm", "udm2"
            ]:
                media_type = pystac.MediaType.COG
                roles = ['visual']
                thumbnail_path = f"{os.path.splitext(href)[0]}.thumbnail.png"
                with rasterio.open(href) as dataset:
                    height, width = dataset.shape
                    geotransform = dataset.transform
                    if width > height:
                        width, height = 256, int(height / width * 256)
                    else:
                        width, height = int(width / height * 256), 256

                    profile = dataset.profile
                    profile.update(driver='PNG')
                    profile.update(width=width)
                    profile.update(height=height)

                    if "analytic" in asset_type:
                        data = dataset.read(indexes=[3, 2, 1],
                                            out_shape=(3, height, width),
                                            resampling=Resampling.cubic)
                        profile.update(count=3)
                    else:
                        data = dataset.read(out_shape=(int(dataset.count),
                                                       height, width),
                                            resampling=Resampling.cubic)

                    with rasterio.open(thumbnail_path, 'w', **profile) as dst:
                        dst.write(data)

                item.add_asset(
                    'thumbnail',
                    pystac.Asset(href=thumbnail_path,
                                 media_type=pystac.MediaType.PNG,
                                 roles=['thumbnail']))
            else:
                roles = ['metadata']

            # Use the asset type as the key if it's the same as the bundle
            # type, as this appears to be the 'main' asset of the bundle type.
            # If not, use a key that combines the bundle type and asset type.
            key = asset_type
            if asset_type != bundle_type:
                key = '{}:{}'.format(bundle_type, asset_type)

            item.add_asset(
                key, pystac.Asset(href=href,
                                  media_type=media_type,
                                  roles=roles))
            asset = pystac.Asset(href=href, media_type=media_type)

            if media_type == pystac.MediaType.COG:
                # add bands to asset
                if item_type.startswith('SkySat'):
                    if "panchro" in asset_type:
                        bands = [SKYSAT_BANDS['PAN']]
                    elif "analytic" in asset_type:
                        bands = [
                            SKYSAT_BANDS['BLUE'], SKYSAT_BANDS['GREEN'],
                            SKYSAT_BANDS['RED'], SKYSAT_BANDS['NIR']
                        ]
                    else:
                        bands = [
                            SKYSAT_BANDS['RED'], SKYSAT_BANDS['GREEN'],
                            SKYSAT_BANDS['BLUE']
                        ]
                    item.ext.eo.set_bands(bands, asset)

            item.add_asset(key, asset)

        # proj
        if 'epsg_code' in props:
            item.ext.enable('projection')
            item.ext.projection.epsg = props.pop('epsg_code')
            if geotransform is not None:
                item.ext.projection.transform = geotransform
                item.ext.projection.shape = [height, width]

        if self.metadata_href:
            item.add_asset(
                'metadata',
                pystac.Asset(href=self.metadata_href,
                             media_type=pystac.MediaType.JSON,
                             roles=['metadata']))

        return item
Ejemplo n.º 17
0
def download_and_cog_chirps(
    year: str,
    month: str,
    s3_dst: str,
    day: str = None,
    overwrite: bool = False,
    slack_url: str = None,
):
    # Cleaning and sanity checks
    s3_dst = s3_dst.rstrip("/")

    # Set up file strings
    if day is not None:
        # Set up a daily process
        in_file = f"chirps-v2.0.{year}.{month}.{day}.tif.gz"
        in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file)
        in_data = f"/vsigzip//vsicurl/{in_href}"
        if not check_for_url_existence(in_href):
            log.warning("Couldn't find the gzipped file, trying the .tif")
            in_file = f"chirps-v2.0.{year}.{month}.{day}.tif"
            in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file)
            in_data = f"/vsicurl/{in_href}"

            if not check_for_url_existence(in_href):
                log.error("Couldn't find the .tif file either, aborting")
                sys.exit(1)

        file_base = f"{s3_dst}/{year}/{month}/chirps-v2.0_{year}.{month}.{day}"
        out_data = f"{file_base}.tif"
        out_stac = f"{file_base}.stac-item.json"

        start_datetime = f"{year}-{month}-{day}T00:00:00Z"
        end_datetime = f"{year}-{month}-{day}T23:59:59Z"
        product_name = "rainfall_chirps_daily"
    else:
        # Set up a monthly process
        in_file = f"chirps-v2.0.{year}.{month}.tif.gz"
        in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file)
        in_data = f"/vsigzip//vsicurl/{in_href}"
        if not check_for_url_existence(in_href):
            log.warning("Couldn't find the gzipped file, trying the .tif")
            in_file = f"chirps-v2.0.{year}.{month}.tif"
            in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file)
            in_data = f"/vsicurl/{in_href}"

            if not check_for_url_existence(in_href):
                log.error("Couldn't find the .tif file either, aborting")
                sys.exit(1)

        file_base = f"{s3_dst}/chirps-v2.0_{year}.{month}"
        out_data = f"{file_base}.tif"
        out_stac = f"{file_base}.stac-item.json"

        _, end = calendar.monthrange(int(year), int(month))
        start_datetime = f"{year}-{month}-01T00:00:00Z"
        end_datetime = f"{year}-{month}-{end}T23:59:59Z"
        product_name = "rainfall_chirps_monthly"

        # Set to 15 for the STAC metadata
        day = 15

    try:
        # Check if file already exists
        log.info(f"Working on {in_file}")
        if not overwrite and s3_head_object(out_stac) is not None:
            log.warning(f"File {out_stac} already exists. Skipping.")
            return

        # COG and STAC
        with MemoryFile() as mem_dst:
            # Creating the COG, with a memory cache and no download. Shiny.
            cog_translate(
                in_data,
                mem_dst.name,
                cog_profiles.get("deflate"),
                in_memory=True,
                nodata=-9999,
            )
            # Creating the STAC document with appropriate date range
            _, end = calendar.monthrange(int(year), int(month))
            item = create_stac_item(
                mem_dst,
                id=str(odc_uuid("chirps", "2.0", [in_file])),
                with_proj=True,
                input_datetime=datetime(int(year), int(month), int(day)),
                properties={
                    "odc:processing_datetime": datetime_to_str(datetime.now()),
                    "odc:product": product_name,
                    "start_datetime": start_datetime,
                    "end_datetime": end_datetime,
                },
            )
            item.set_self_href(out_stac)
            # Manually redo the asset
            del item.assets["asset"]
            item.assets["rainfall"] = pystac.Asset(
                href=out_data,
                title="CHIRPS-v2.0",
                media_type=pystac.MediaType.COG,
                roles=["data"],
            )
            # Let's add a link to the source
            item.add_links([
                pystac.Link(
                    target=in_href,
                    title="Source file",
                    rel=pystac.RelType.DERIVED_FROM,
                    media_type="application/gzip",
                )
            ])

            # Dump the data to S3
            mem_dst.seek(0)
            log.info(f"Writing DATA to: {out_data}")
            s3_dump(mem_dst, out_data, ACL="bucket-owner-full-control")
            # Write STAC to S3
            log.info(f"Writing STAC to: {out_stac}")
            s3_dump(
                json.dumps(item.to_dict(), indent=2),
                out_stac,
                ContentType="application/json",
                ACL="bucket-owner-full-control",
            )
            # All done!
            log.info(f"Completed work on {in_file}")

    except Exception as e:
        message = f"Failed to handle {in_file} with error {e}"

        if slack_url is not None:
            send_slack_notification(slack_url, "Chirps Rainfall Monthly",
                                    message)
        log.exception(message)

        exit(1)
Ejemplo n.º 18
0
def create_stac_item(
    source: Union[str, DatasetReader, DatasetWriter, WarpedVRT, MemoryFile],
    input_datetime: Optional[datetime.datetime] = None,
    extensions: Optional[List[str]] = None,
    collection: Optional[str] = None,
    properties: Optional[Dict] = None,
    id: Optional[str] = None,
    assets: Optional[Dict[str, pystac.Asset]] = None,
    asset_name: str = "asset",
    asset_roles: Optional[List[str]] = None,
    asset_media_type: Optional[Union[str, pystac.MediaType]] = None,
    asset_href: Optional[str] = None,
) -> pystac.Item:
    """Create a Stac Item.

    Args:
        source (str or rasterio openned dataset): input path or rasterio dataset.
        input_datetime (datetime.datetime, optional): datetime associated with the item.
        extensions (list of str): input list of extensions to use in the item.
        collection (str, optional): collection's name the item belong to.
        properties (dict, optional): additional properties to add in the item.
        id (str, optional): id to assign to the item (default to the source basename).
        assets (dict, optional): Assets to set in the item. If set we won't create one from the source.
        asset_name (str, optional): asset name in the Assets object.
        asset_roles (list of str, optional): list of asset's role.
        asset_media_type (str or pystac.MediaType, optional): asset's media type.
        asset_href (str, optional): asset's URI (default to input path).

    Returns:
        pystac.Item: valid STAC Item.

    """
    with ExitStack() as ctx:
        if isinstance(source, (DatasetReader, DatasetWriter, WarpedVRT)):
            src_dst = source
        else:
            src_dst = ctx.enter_context(rasterio.open(source))

        meta = get_metadata(src_dst)

        media_type = (
            get_media_type(src_dst) if asset_media_type == "auto" else asset_media_type
        )

    properties = properties or {}

    extensions = extensions or []

    if "proj" in extensions:
        properties.update(
            {
                f"proj:{name}": value
                for name, value in meta["proj"].items()
                if value is not None
            }
        )

    # item
    item = pystac.Item(
        id=id or os.path.basename(meta["name"]),
        geometry=meta["footprint"],
        bbox=meta["bbox"],
        collection=collection,
        stac_extensions=extensions,
        datetime=input_datetime,
        properties=properties,
    )

    # item.assets
    if assets:
        for key, asset in assets.items():
            item.add_asset(
                key=key, asset=asset,
            )

    else:
        item.add_asset(
            key=asset_name,
            asset=pystac.Asset(href=asset_href or meta["name"], media_type=media_type),
        )

    return item
Ejemplo n.º 19
0
def create_item(state,
                year,
                cog_href,
                fgdc_metadata_href: Optional[str],
                thumbnail_href=None,
                additional_providers=None):
    """Creates a STAC Item from NAIP data.

    Args:
        state (str): The 2-letter state code for the state this item belongs to.
        year (str): The NAIP year.
        fgdc_metadata_href (str): The href to the FGDC metadata
            for this NAIP scene. Optional, a some NAIP scenes to not have this
            (e.g. 2010)
        cog_href (str): The href to the image as a COG. This needs
            to be an HREF that rasterio is able to open.
        thumbnail_href (str): Optional href for a thumbnail for this scene.
        additional_providers(List[pystac.Provider]): Optional list of additional
            providers to the USDA that will be included on this Item.

    This function will read the metadata file for information to place in
    the STAC item.

    Returns:
        pystac.Item: A STAC Item representing this NAIP scene.
    """

    with rio.open(cog_href) as ds:
        gsd = ds.res[0]
        epsg = int(ds.crs.to_authority()[1])
        image_shape = list(ds.shape)
        original_bbox = list(ds.bounds)
        transform = list(ds.transform)
        geom = reproject_geom(ds.crs,
                              'epsg:4326',
                              mapping(box(*ds.bounds)),
                              precision=6)

    if fgdc_metadata_href is not None:
        fgdc_metadata_text = pystac.STAC_IO.read_text(fgdc_metadata_href)
        fgdc = parse_fgdc_metadata(fgdc_metadata_text)
    else:
        fgdc = {}

    if 'Distribution_Information' in fgdc:
        resource_desc = fgdc['Distribution_Information'][
            'Resource_Description']
    else:
        resource_desc = os.path.basename(cog_href)
    item_id = naip_item_id(state, resource_desc)

    bounds = list(shape(geom).bounds)

    if any(fgdc):
        dt = str_to_datetime(
            fgdc['Identification_Information']['Time_Period_of_Content']
            ['Time_Period_Information']['Single_Date/Time']['Calendar_Date'])
    else:
        fname = os.path.splitext(os.path.basename(cog_href))[0]
        fname_date = fname.split('_')[5]
        dt = dateutil.parser.isoparse(fname_date)

    properties = {'naip:state': state, 'naip:year': year}

    item = pystac.Item(id=item_id,
                       geometry=geom,
                       bbox=bounds,
                       datetime=dt,
                       properties=properties)

    # Common metadata
    item.common_metadata.providers = [constants.USDA_PROVIDER]
    if additional_providers is not None:
        item.common_metadata.providers.extend(additional_providers)
    item.common_metadata.gsd = gsd

    # eo, for asset bands
    item.ext.enable('eo')

    # proj
    item.ext.enable('projection')
    item.ext.projection.epsg = epsg
    item.ext.projection.shape = image_shape
    item.ext.projection.bbox = original_bbox
    item.ext.projection.transform = transform

    # COG
    item.add_asset(
        'image',
        pystac.Asset(href=cog_href,
                     media_type=pystac.MediaType.COG,
                     roles=['data'],
                     title="RGBIR COG tile"))

    # Metadata
    if any(fgdc):
        item.add_asset(
            'metadata',
            pystac.Asset(href=fgdc_metadata_href,
                         media_type=pystac.MediaType.TEXT,
                         roles=['metadata'],
                         title='FGDC Metdata'))

    if thumbnail_href is not None:
        media_type = pystac.MediaType.JPEG
        if thumbnail_href.lower().endswith('png'):
            media_type = pystac.MediaType.PNG
        item.add_asset(
            'thumbnail',
            pystac.Asset(href=thumbnail_href,
                         media_type=media_type,
                         roles=['thumbnail'],
                         title='Thumbnail'))

    item.ext.eo.set_bands(constants.NAIP_BANDS, item.assets['image'])

    return item
Ejemplo n.º 20
0
def create_item(xml_href: str,
                vnir_cog_href: Optional[str],
                swir_cog_href: Optional[str],
                tir_cog_href: Optional[str],
                hdf_href: Optional[str] = None,
                vnir_browse_href: Optional[str] = None,
                tir_browse_href: Optional[str] = None,
                qa_browse_href: Optional[str] = None,
                qa_txt_href: Optional[str] = None,
                additional_providers=None,
                read_href_modifier: Optional[ReadHrefModifier] = None):
    """Creates and item from ASTER Assets."""

    if vnir_cog_href is None and \
        swir_cog_href is None and \
            tir_cog_href is None and \
            hdf_href is None:
        raise ValueError('Need to supply at least one data asset.')

    file_name = os.path.basename(xml_href)
    scene_id = AsterSceneId.from_path(file_name)

    xml_metadata = XmlMetadata.from_file(xml_href, read_href_modifier)

    geom, bounds = xml_metadata.geometries
    datetime = xml_metadata.item_datetime

    item = pystac.Item(
        id=scene_id.item_id,
        geometry=geom,
        bbox=bounds,
        datetime=datetime,
        properties={'aster:processing_number': scene_id.processing_number})

    # Common metadata
    item.common_metadata.providers = [ASTER_PROVIDER]
    if additional_providers is not None:
        item.common_metadata.providers.extend(additional_providers)
    item.common_metadata.created = xml_metadata.created
    item.common_metadata.platform = ASTER_PLATFORM
    item.common_metadata.instruments = [ASTER_INSTRUMENT]

    # eo
    item.ext.enable('eo')
    item.ext.eo.cloud_cover = xml_metadata.cloud_cover

    # sat
    item.ext.enable('sat')
    item.ext.sat.orbit_state = xml_metadata.orbit_state

    # view
    item.ext.enable('view')
    item.ext.view.sun_azimuth = xml_metadata.sun_azimuth
    sun_elevation = xml_metadata.sun_elevation
    # Sun elevation can be negative; if so, will break validation; leave out.
    # See https://github.com/radiantearth/stac-spec/issues/853
    # This is fixed in 1.0.0-RC1; store as an aster property
    #  to be updated once upgrade to 1.0.0-RC1 happens.
    if sun_elevation >= 0.0:
        item.ext.view.sun_elevation = sun_elevation
    else:
        item.ext.view.sun_elevation = 0.0
        item.properties['aster:sun_elevation'] = str(sun_elevation)

    # proj
    item.ext.enable('projection')
    item.ext.projection.epsg = xml_metadata.epsg

    # ASTER-specific properties
    item.properties.update(xml_metadata.aster_properties)

    # -- ASSETS

    # Create XML asset
    item.add_asset(
        XML_ASSET_KEY,
        pystac.Asset(href=xml_href,
                     media_type=pystac.MediaType.XML,
                     roles=['metadata'],
                     title='XML metadata'))

    # Create Assets for each of VIR, SWIR, and TIR
    _add_cog_assets(item=item,
                    xml_metadata=xml_metadata,
                    vnir_cog_href=vnir_cog_href,
                    swir_cog_href=swir_cog_href,
                    tir_cog_href=tir_cog_href,
                    read_href_modifier=read_href_modifier)

    # Create HDF EOS asset, if available
    if hdf_href is not None:
        hdf_asset = pystac.Asset(href=hdf_href,
                                 media_type=pystac.MediaType.HDF,
                                 roles=['data'],
                                 title="ASTER L1T 003 HDF-EOS")

        item.ext.eo.set_bands(ASTER_BANDS, hdf_asset)

        item.add_asset(HDF_ASSET_KEY, hdf_asset)

    # Create assets for browse files, if available
    if vnir_browse_href is not None:
        item.add_asset(
            VNIR_BROWSE_ASSET_KEY,
            pystac.Asset(href=vnir_browse_href,
                         media_type=pystac.MediaType.JPEG,
                         roles=['thumbnail'],
                         title="VNIR browse file",
                         description='Standalone reduced resolution VNIR'))

    if tir_browse_href is not None:
        item.add_asset(
            TIR_BROWSE_ASSET_KEY,
            pystac.Asset(href=tir_browse_href,
                         media_type=pystac.MediaType.JPEG,
                         roles=['thumbnail'],
                         title='Standalone reduced resolution TIR'))

    if qa_browse_href is not None:
        item.add_asset(
            QA_BROWSE_ASSET_KEY,
            pystac.Asset(
                href=qa_browse_href,
                media_type=pystac.MediaType.JPEG,
                roles=['thumbnail'],
                title='QA browse file',
                description=(
                    "Single-band black and white reduced resolution browse "
                    "overlaid with red, green, and blue (RGB) markers for GCPs "
                    "used during the geometric verification quality check.")))

    # Create an asset for the QA text report, if available
    if qa_txt_href:
        item.add_asset(
            QA_TXT_ASSET_KEY,
            pystac.Asset(href=qa_txt_href,
                         media_type=pystac.MediaType.TEXT,
                         roles=['metadata'],
                         title='QA browse file',
                         description="Geometric quality assessment report."))

    return item
Ejemplo n.º 21
0
 def create_asset(self):
     asset = pystac.Asset(href=self.href,
                          media_type=pystac.MediaType.XML,
                          roles=['metadata'])
     return (SAFE_MANIFEST_ASSET_KEY, asset)
Ejemplo n.º 22
0
def download_cci_lc(year: str,
                    s3_dst: str,
                    workdir: str,
                    overwrite: bool = False):
    log = setup_logging()
    assets = {}

    cci_lc_version = get_version_from_year(year)
    name = f"{PRODUCT_NAME}_{year}_{cci_lc_version}"

    out_cog = URL(s3_dst) / year / f"{name}.tif"
    out_stac = URL(s3_dst) / year / f"{name}.stac-item.json"

    if s3_head_object(str(out_stac)) is not None and not overwrite:
        log.info(f"{out_stac} exists, skipping")
        return

    workdir = Path(workdir)
    if not workdir.exists():
        workdir.mkdir(parents=True, exist_ok=True)

    # Create a temporary directory to work with
    tmpdir = mkdtemp(prefix=str(f"{workdir}/"))
    log.info(f"Working on {year} in the path {tmpdir}")

    if s3_head_object(str(out_cog)) is None or overwrite:
        log.info(f"Downloading {year}")
        try:
            local_file = Path(tmpdir) / f"{name}.zip"
            if not local_file.exists():
                # Download the file
                c = cdsapi.Client()

                # We could also retrieve the object metadata from the CDS.
                # e.g. f = c.retrieve("series",{params}) | f.location = URL to download
                c.retrieve(
                    "satellite-land-cover",
                    {
                        "format": "zip",
                        "variable": "all",
                        "version": cci_lc_version,
                        "year": str(year),
                    },
                    local_file,
                )

                log.info(f"Downloaded file to {local_file}")
            else:
                log.info(
                    f"File {local_file} exists, continuing without downloading"
                )

            # Unzip the file
            log.info(f"Unzipping {local_file}")
            unzipped = None
            with zipfile.ZipFile(local_file, "r") as zip_ref:
                unzipped = local_file.parent / zip_ref.namelist()[0]
                zip_ref.extractall(tmpdir)

            # Process data
            ds = xr.open_dataset(unzipped)
            # Subset to Africa
            ulx, uly, lrx, lry = AFRICA_BBOX
            # Note: lats are upside down!
            ds_small = ds.sel(lat=slice(uly, lry), lon=slice(ulx, lrx))
            ds_small = assign_crs(ds_small, crs="epsg:4326")

            # Create cog (in memory - :mem: returns bytes object)
            mem_dst = write_cog(
                ds_small.lccs_class,
                ":mem:",
                nodata=0,
                overview_resampling="nearest",
            )

            # Write to s3
            s3_dump(mem_dst, str(out_cog), ACL="bucket-owner-full-control")
            log.info(f"File written to {out_cog}")

        except Exception:
            log.exception(f"Failed to process {name}")
            exit(1)
    else:
        log.info(f"{out_cog} exists, skipping")

    assets["classification"] = pystac.Asset(href=str(out_cog),
                                            roles=["data"],
                                            media_type=pystac.MediaType.COG)

    # Write STAC document
    source_doc = (
        "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover"
    )
    item = create_stac_item(
        str(out_cog),
        id=str(
            odc_uuid("Copernicus Land Cover", cci_lc_version,
                     [source_doc, name])),
        assets=assets,
        with_proj=True,
        properties={
            "odc:product": PRODUCT_NAME,
            "start_datetime": f"{year}-01-01T00:00:00Z",
            "end_datetime": f"{year}-12-31T23:59:59Z",
        },
    )
    item.add_links([
        pystac.Link(
            target=source_doc,
            title="Source",
            rel=pystac.RelType.DERIVED_FROM,
            media_type="text/html",
        )
    ])
    s3_dump(
        json.dumps(item.to_dict(), indent=2),
        str(out_stac),
        ContentType="application/json",
        ACL="bucket-owner-full-control",
    )
    log.info(f"STAC written to {out_stac}")
Ejemplo n.º 23
0
    def to_representation(self, instance: models.RasterMeta) -> dict:
        item = pystac.Item(
            id=instance.pk,
            geometry=json.loads(instance.footprint.json),
            bbox=instance.extent,
            datetime=(instance.acquisition_date or instance.modified
                      or instance.created),
            properties=dict(
                datetime=str(instance.acquisition_date),
                platform=instance.instrumentation,
            ),
        )
        # 'proj' extension
        item.ext.enable('projection')
        item.ext.projection.apply(
            epsg=CRS.from_proj4(instance.crs).to_epsg(),
            transform=instance.transform,
        )
        # 'eo' extension
        item.ext.enable('eo')
        item.ext.eo.apply(cloud_cover=instance.cloud_cover, bands=[])
        # Add assets
        for image in instance.parent_raster.image_set.images.all():
            if image.file.type != FileSourceType.URL:
                # TODO: we need fix this
                raise ValueError(
                    'Files must point to valid URL resources, not internal storage.'
                )
            bands = []
            for bandmeta in image.bandmeta_set.filter(
                    band_range__contained_by=(None, None)):
                band = pystac.extensions.eo.Band.create(
                    name=f'B{bandmeta.band_number}',
                    description=bandmeta.description,
                )
                # The wavelength statistics is described by either the
                # common_name or via center_wavelength and full_width_half_max.
                # We can derive our bandmeta.band_range.lower,
                # bandmeta.band_range.upper from the center_wavelength
                # and full_width_half_max.
                if (
                        bandmeta.band_range.lower,
                        bandmeta.band_range.upper,
                ) in BAND_RANGE_BY_COMMON_NAMES.inverse:
                    band.common_name = BAND_RANGE_BY_COMMON_NAMES.inverse[(
                        bandmeta.band_range.lower, bandmeta.band_range.upper)]
                else:
                    with decimal.localcontext(decimal.BasicContext):
                        band.center_wavelength = float(
                            (bandmeta.band_range.lower +
                             bandmeta.band_range.upper) / 2)
                        band.full_width_half_max = float(
                            bandmeta.band_range.upper -
                            bandmeta.band_range.lower)

                bands.append(band)
            asset = pystac.Asset(
                href=image.file.get_url(),
                title=image.file.name,
                roles=[
                    'data',
                ],
            )
            item.add_asset(f'image-{image.pk}', asset)
            item.ext.eo.set_bands(
                bands=bands or [
                    pystac.extensions.eo.Band.create(
                        name=image.file.name,
                        description=image.bandmeta_set.first().description,
                    )
                ],
                asset=asset,
            )

        for ancillary_file in instance.parent_raster.ancillary_files.all():
            asset = pystac.Asset(
                href=ancillary_file.get_url(),
                title=ancillary_file.name,
                roles=[
                    'metadata',
                ],
            )
            item.add_asset(f'ancillary-{ancillary_file.pk}', asset)

        return item.to_dict()
Ejemplo n.º 24
0
def create_item(metadata_href):
    """Creates a STAC Item from CORINE data.
    Args:
        metadata_href (str): The href to the metadata for this tif.
    This function will read the metadata file for information to place in
    the STAC item.
    Returns:
        pystac.Item: A STAC Item representing this CORINE Land Cover.
    """

    metadata_root = ET.parse(metadata_href).getroot()

    # Item id
    image_name_node = 'Esri/DataProperties/itemProps/itemName'
    image_name = metadata_root.find(image_name_node).text
    item_id = os.path.splitext(image_name)[0]

    # Bounding box
    bounding_box_node = 'dataIdInfo/dataExt/geoEle/GeoBndBox/{}'
    west_long = float(
        metadata_root.find(bounding_box_node.format('westBL')).text)
    east_long = float(
        metadata_root.find(bounding_box_node.format('eastBL')).text)
    south_lat = float(
        metadata_root.find(bounding_box_node.format('southBL')).text)
    north_lat = float(
        metadata_root.find(bounding_box_node.format('northBL')).text)

    geom = mapping(box(west_long, south_lat, east_long, north_lat))
    bounds = shape(geom).bounds

    # EPSG
    epsg_element = 'refSysInfo/RefSystem/refSysID/identCode'
    epsg = int(
        metadata_root.find(epsg_element).attrib['code'].replace('EPSG:', ''))

    # Item date
    id_dt_node = 'dataIdInfo/idCitation/date/pubDate'
    id_dt_text = metadata_root.find(id_dt_node).text
    id_dt = str_to_datetime(id_dt_text)

    # Title
    title_node = 'dataIdInfo/idCitation/resTitle'
    title_text = metadata_root.find(title_node).text

    item = pystac.Item(id=item_id,
                       geometry=geom,
                       bbox=bounds,
                       datetime=id_dt,
                       properties={'corine:title': title_text})

    # Common metadata
    item.common_metadata.providers = [COPERNICUS_PROVIDER]

    # proj
    item.ext.enable('projection')
    item.ext.projection.epsg = epsg

    # Tif
    item.add_asset(
        ITEM_TIF_IMAGE_NAME,
        pystac.Asset(href=image_name,
                     media_type=pystac.MediaType.TIFF,
                     roles=['data'],
                     title="tif image"))

    # Metadata
    item.add_asset(
        ITEM_METADATA_NAME,
        pystac.Asset(href=metadata_href,
                     media_type=pystac.MediaType.TEXT,
                     roles=['metadata'],
                     title='FGDC Metdata'))

    return item
Ejemplo n.º 25
0
    def render_metadata(
        product: OutputProduct,
        geobox: GeoBox,
        tile_index: TileIdx_xy,
        time_range: DateTimeRange,
        uuid: UUID,
        paths: Dict[str, str],
        metadata_path: str,
        processing_dt: Optional[datetime] = None,
    ) -> Dict[str, Any]:
        """
        Put together STAC metadata document for the output from the task info.
        """
        if processing_dt is None:
            processing_dt = datetime.utcnow()

        region_code = product.region_code(tile_index)
        inputs: List[str] = []

        properties: Dict[str, Any] = deepcopy(product.properties)
        properties["dtr:start_datetime"] = format_datetime(time_range.start)
        properties["dtr:end_datetime"] = format_datetime(time_range.end)
        properties["odc:processing_datetime"] = format_datetime(
            processing_dt, timespec="seconds")
        properties["odc:region_code"] = region_code
        properties["odc:lineage"] = dict(inputs=inputs)
        properties["odc:product"] = product.name

        geobox_wgs84 = geobox.extent.to_crs("epsg:4326",
                                            resolution=math.inf,
                                            wrapdateline=True)
        bbox = geobox_wgs84.boundingbox

        item = pystac.Item(
            id=str(uuid),
            geometry=geobox_wgs84.json,
            bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top],
            datetime=time_range.start.replace(tzinfo=timezone.utc),
            properties=properties,
        )

        # Enable the Projection extension
        item.ext.enable("projection")
        item.ext.projection.epsg = geobox.crs.epsg

        # Add all the assets
        for band, path in paths.items():
            asset = pystac.Asset(
                href=path,
                media_type="image/tiff; application=geotiff",
                roles=["data"],
                title=band,
            )
            item.add_asset(band, asset)

            item.ext.projection.set_transform(geobox.transform, asset=asset)
            item.ext.projection.set_shape(geobox.shape, asset=asset)

        # Add links
        item.links.append(
            pystac.Link(
                rel="product_overview",
                media_type="application/json",
                target=product.href,
            ))
        item.links.append(
            pystac.Link(
                rel="self",
                media_type="application/json",
                target=metadata_path,
            ))

        return item.to_dict()
Ejemplo n.º 26
0
def write_stac(s3_destination: str, file_path: str, file_key: str, year: str,
               log: Logger) -> str:
    region_code = file_key.split("_")[0]
    stac_href = f"s3://{s3_destination}/{file_key}.stac-item.json"
    log.info(f"Creating STAC file in memory, targeting here: {stac_href}")

    if int(year) > 2010:
        hhpath = f"{file_key}_sl_HH_F02DAR.tif"
        hvpath = f"{file_key}_sl_HV_F02DAR.tif"
        lincipath = f"{file_key}_sl_linci_F02DAR.tif"
        maskpath = f"{file_key}_sl_mask_F02DAR.tif"
        datepath = f"{file_key}_sl_date_F02DAR.tif"
        launch_date = "2014-05-24"
        shortname = "alos"
    else:
        hhpath = f"{file_key}_sl_HH.tif"
        hvpath = f"{file_key}_sl_HV.tif"
        lincipath = f"{file_key}_sl_linci.tif"
        maskpath = f"{file_key}_sl_mask.tif"
        datepath = f"{file_key}_sl_date.tif"
        if int(year) > 2000:
            launch_date = "2006-01-24"
            shortname = "alos"
        else:
            launch_date = "1992-02-11"
            shortname = "jers"
    if shortname == "alos":
        product_name = "alos_palsar_mosaic"
        platform = "ALOS/ALOS-2"
        instrument = "PALSAR/PALSAR-2"
        cf = "83.0 dB"
        bandpaths = {
            "hh": hhpath,
            "hv": hvpath,
            "linci": lincipath,
            "mask": maskpath,
            "date": datepath,
        }
    else:
        product_name = "jers_sar_mosaic"
        platform = "JERS-1"
        instrument = "SAR"
        cf = "84.66 dB"
        bandpaths = {
            "hh": hhpath,
            "linci": lincipath,
            "mask": maskpath,
            "date": datepath,
        }

    properties = {
        "odc:product": product_name,
        "odc:region_code": region_code,
        "platform": platform,
        "instruments": [instrument],
        "cf": cf,
        "launchdate": launch_date,
        "start_datetime": f"{year}-01-01T00:00:00Z",
        "end_datetime": f"{year}-12-31T23:59:59Z",
    }

    assets = {}
    for name, path in bandpaths.items():
        href = f"s3://{s3_destination}/{path}"
        assets[name] = pystac.Asset(href=href,
                                    media_type=pystac.MediaType.COG,
                                    roles=["data"])

    item = create_stac_item(
        file_path,
        id=str(
            odc_uuid(shortname,
                     "1", [],
                     year=year,
                     tile=file_key.split("_")[0])),
        properties=properties,
        assets=assets,
        with_proj=True,
    )
    item.set_self_href(stac_href)

    s3_dump(
        json.dumps(item.to_dict(), indent=2),
        item.self_href,
        ContentType="application/json",
        ACL="bucket-owner-full-control",
    )
    log.info(f"STAC written to {item.self_href}")
Ejemplo n.º 27
0
def create_cogs(item, cog_directory=None):
    """Create COGs from the HDF asset contained in the passed in STAC item.

    Args:
        item (pystac.Item): ASTER L1T 003 Item that contains an asset
            with key equal to stactools.aster.constants.HDF_ASSET_KEY,
            which will be converted to COGs.
        cog_dir (str): A URI of a directory to store COGs. This will be used
            in conjunction with the file names based on the COG asset to store
            the COG data. If not supplied, the directory of the Item's self HREF
            will be used.

    Returns:
        pystac.Item: The same item, mutated to include assets for the
            new COGs.
    """
    if cog_directory is None:
        cog_directory = os.path.dirname(item.get_self_href())

    hdf_asset = item.assets.get(HDF_ASSET_KEY)
    if hdf_asset is None:
        raise ValueError(
            'Item does not have a asset with key {}.'.format(HDF_ASSET_KEY))

    hdf_href = hdf_asset.href
    with rio.open(hdf_href) as ds:
        subdatasets = ds.subdatasets

    # Gather the subdatasets by sensor, sorted by band number
    sensor_to_subdatasets = defaultdict(list)
    for sd in subdatasets:
        m = re.search(r':?([\w]+)_Swath:ImageData([\d]+)', sd)
        if m is None:
            raise ValueError(
                'Unexpected subdataset {} - is this a non-standard ASTER L1T 003 HDF-EOS file?'
                .format(sd))
        sensor_to_subdatasets[m.group(1)].append((sd, m.group(2)))

    for k in sensor_to_subdatasets:
        sensor_to_subdatasets[k] = [
            x[0] for x in sorted(sensor_to_subdatasets[k], key=lambda x: x[1])
        ]

    sensor_to_bands = defaultdict(list)

    # Gather the bands for each sensor, sorted by band number
    for band in ASTER_BANDS:
        sensor_to_bands[band.description.split('_')[0]].append(band)
    for sensor in sensor_to_bands:
        sensor_to_bands[sensor] = sorted(
            sensor_to_bands[sensor],
            key=lambda b: re.search('([d]+)', b.description).group(1))

    # Use subdataset keys, as data might be missing some sensors.
    for sensor in sensor_to_subdatasets:
        href = os.path.join(cog_directory, '{}-cog.tif'.format(sensor))
        _create_cog(item, href, sensor_to_subdatasets[sensor],
                    sensor_to_bands[sensor])

        asset = pystac.Asset(href=href,
                             media_type=pystac.MediaType.COG,
                             roles=['data'],
                             title='{} Swath data'.format(sensor))

        item.ext.eo.set_bands(sensor_to_bands[sensor], asset)

        item.assets[sensor] = asset
Ejemplo n.º 28
0
    def render_metadata(
            self,
            ext: str = EXT_TIFF,
            processing_dt: Optional[datetime] = None) -> Dict[str, Any]:
        """
        Put together STAC metadata document for the output of this task.
        """
        if processing_dt is None:
            processing_dt = datetime.utcnow()

        product = self.product
        geobox = self.geobox
        region_code = product.region_code(self.tile_index)
        inputs = list(map(str, self._lineage()))

        properties: Dict[str, Any] = deepcopy(product.properties)

        properties["dtr:start_datetime"] = format_datetime(
            self.time_range.start)
        properties["dtr:end_datetime"] = format_datetime(self.time_range.end)
        properties["odc:processing_datetime"] = format_datetime(
            processing_dt, timespec="seconds")
        properties["odc:region_code"] = region_code
        properties["odc:product"] = product.name
        properties["odc:dataset_version"] = product.version

        geobox_wgs84 = geobox.extent.to_crs("epsg:4326",
                                            resolution=math.inf,
                                            wrapdateline=True)
        bbox = geobox_wgs84.boundingbox

        item = pystac.Item(
            id=str(self.uuid),
            geometry=geobox_wgs84.json,
            bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top],
            datetime=self.time_range.start.replace(tzinfo=timezone.utc),
            properties=properties,
            stac_extensions=["projection"],
        )

        item.ext.projection.epsg = geobox.crs.epsg
        # Lineage last
        item.properties["odc:lineage"] = dict(inputs=inputs)

        # Add all the assets
        for band, path in self.paths(ext=ext).items():
            asset = pystac.Asset(
                href=path,
                media_type="image/tiff; application=geotiff",
                roles=["data"],
                title=band,
            )
            item.add_asset(band, asset)

            item.ext.projection.set_transform(geobox.transform, asset=asset)
            item.ext.projection.set_shape(geobox.shape, asset=asset)

        # Add links
        item.links.append(
            pystac.Link(
                rel="product_overview",
                media_type="application/json",
                target=product.href,
            ))
        item.links.append(
            pystac.Link(
                rel="self",
                media_type="application/json",
                target=self.metadata_path("absolute", ext="json"),
            ))

        return item.to_dict()
Ejemplo n.º 29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--pipeline-uri",
                        type=str,
                        help="A URI to JSON with instructions")
    parser.add_argument("--pipeline", type=str, help="JSON with instructions")
    parser.add_argument(
        "--sentinel-stac-id",
        type=str,
        help="STAC Item ID to process from the STAC collection")
    parser.add_argument(
        "--sentinel-collection-id",
        type=str,
        default=SENTINEL_ARCHIVE_COLLECTION_ID,
    )
    parser.add_argument(
        "--stac-api-uri",
        type=str,
        default=os.environ.get("STAC_API_URI", "http://franklin:9090"),
    )
    parser.add_argument(
        "--stac-api-uri-sentinel",
        type=str,
        default=os.environ.get("STAC_API_URI_SENTINEL",
                               "https://earth-search.aws.element84.com/v0"),
    )

    parser.add_argument("--s3-bucket",
                        type=str,
                        default=os.environ.get("S3_BUCKET",
                                               "sentinel-s2-data"))
    parser.add_argument(
        "--s3-prefix",
        type=str,
        default=os.environ.get("S3_PREFIX", "aviris-scene-cogs-l2"),
    )
    parser.add_argument("--temp-dir",
                        type=str,
                        default=os.environ.get("TEMP_DIR", None))
    parser.add_argument("--output-format",
                        type=str,
                        default=os.environ.get("GDAL_OUTPUT_FORMAT", "COG"))
    parser.add_argument(
        "--keep-temp-dir",
        action="store_true",
        help=
        "If provided, script does not delete temporary directory before script exits. Useful for debugging.",
    )
    parser.add_argument(
        "--force",
        action="store_true",
        help=
        "If provided, force reingest StacItem even though this it is already present in the catalog.",
    )

    try:
        warpMemoryLimit = int(os.environ.get("GDAL_WARP_MEMORY_LIMIT", None))
    except TypeError:
        warpMemoryLimit = None

    # TODO: replace it with parser.parse_args() later
    cli_args, cli_unknown = parser.parse_known_args()

    # parse all cli arguments
    args = CliConfig(cli_args, cli_unknown)

    s3 = boto3.client("s3")
    stac_client_sentinel = STACClient(args.stac_api_uri_sentinel)
    stac_client = STACClient(args.stac_api_uri)

    collection = stac_client_sentinel.get_collection(
        args.sentinel_collection_id)

    SENTINEL_COG_COLLECTION = pystac.Collection(
        SENTINEL_COG_COLLECTION_ID,
        "Sentinel-2a and Sentinel-2b imagery, processed to Level 2A (Surface Reflectance) and converted to Cloud-Optimized GeoTIFFs",
        collection.extent,
        stac_extensions=COG_COLLECTION_EXTENSIONS)
    SENTINEL_COG_COLLECTION.links = []
    SENTINEL_COG_COLLECTION.properties = {}
    SENTINEL_COG_COLLECTION.properties['eo:bands'] = SENTINEL_BANDS

    SENTINEL_COG_COLLECTION.properties[
        'hsi:wavelength_min'] = SENTINEL_WAVELENGTH_MIN
    SENTINEL_COG_COLLECTION.properties[
        'hsi:wavelength_max'] = SENTINEL_WAVELENGTH_MAX

    # GET STAC Item from SENTINEL Catalog
    item = stac_client_sentinel.get_collection_item(
        args.sentinel_collection_id, args.sentinel_stac_id)
    assets = item.assets
    bands_map = {
        'B01': vsis3(strip_scheme(assets['B01'].href)),
        'B02': vsis3(strip_scheme(assets['B02'].href)),
        'B03': vsis3(strip_scheme(assets['B03'].href)),
        'B04': vsis3(strip_scheme(assets['B04'].href)),
        'B05': vsis3(strip_scheme(assets['B05'].href)),
        'B06': vsis3(strip_scheme(assets['B06'].href)),
        'B07': vsis3(strip_scheme(assets['B07'].href)),
        'B08': vsis3(strip_scheme(assets['B08'].href)),
        'B8A': vsis3(strip_scheme(assets['B8A'].href)),
        'B09': vsis3(strip_scheme(assets['B09'].href)),
        'B11': vsis3(strip_scheme(assets['B11'].href)),
        'B12': vsis3(strip_scheme(assets['B12'].href)),
        'AOT': vsis3(strip_scheme(assets['AOT'].href)),
        # 'WVP': vsis3(strip_scheme(assets['WVP'].href)),
        # 'SCL': vsis3(strip_scheme(assets['SCL'].href))
    }

    # we don't need assets here, since the gather scripts knows what and how to download by the sentinel path
    properties = item.properties
    datetime = dateutil.parser.isoparse(properties['datetime'])

    # here "href": "s3://sentinel-s2-l2a/tiles/31/V/CE/2021/8/19/0/R60m/B01.jp2"
    # path is tiles/31/V/CE/2021/8/19/0
    sentintel_path = 'tiles/{}/{}/{}/{}/{}/{}/{}'.format(
        properties['sentinel:utm_zone'], properties['sentinel:latitude_band'],
        properties['sentinel:grid_square'], str(datetime.year),
        str(datetime.month), str(datetime.day),
        properties['sentinel:sequence'])

    # Create new COG STAC Item
    cog_item_id = "{}_{}".format(SENTINEL_COG_COLLECTION.id, item.id)

    cog_item = pystac.Item(
        cog_item_id,
        item.geometry,
        item.bbox,
        item.datetime,
        item.properties,
        stac_extensions=COG_ITEM_EXTENSIONS,
        collection=SENTINEL_COG_COLLECTION.id,
    )

    cog_item.properties['eo:bands'] = SENTINEL_COG_COLLECTION.properties[
        'eo:bands']
    cog_item.properties[
        'hsi:wavelength_min'] = SENTINEL_COG_COLLECTION.properties[
            'hsi:wavelength_min']
    cog_item.properties[
        'hsi:wavelength_max'] = SENTINEL_COG_COLLECTION.properties[
            'hsi:wavelength_max']
    cog_item.properties['proj:epsg'] = '4326'

    # Create COG Collection if it doesn't exist
    if not stac_client.has_collection(SENTINEL_COG_COLLECTION.id):
        stac_client.post_collection(SENTINEL_COG_COLLECTION)

    if not args.force:
        # Exit early if COG STAC Item already exists
        try:
            stac_client.get_collection_item(SENTINEL_COG_COLLECTION.id,
                                            cog_item_id)
            logger.info(f'STAC Item {cog_item_id} already exists. Exiting.')
            activation_output(cog_item_id)
            return
        except requests.exceptions.HTTPError:
            pass

    _, s3_uri = gather_sentinel(
        f'{cog_item_id}.tiff',
        f's3://{args.s3_bucket}/{args.s3_prefix}/{sentintel_path}/', bands_map)

    # Add assets to COG STAC Item
    idx = 0
    cog_item.add_asset(
        f'{args.output_asset_name}_{idx}',
        pystac.Asset(s3_uri, media_type=pystac.MediaType.COG, roles=["data"]),
    )

    # Add COG Item to AVIRIS L2 STAC Collection
    logger.info(f"POST Item {cog_item.id} to {args.stac_api_uri}")
    item_data = stac_client.post_collection_item(SENTINEL_COG_COLLECTION.id,
                                                 cog_item)
    if item_data.get('id', None):
        logger.info(f"Success: {item_data['id']}")
        activation_output(item_data['id'])
    else:
        logger.error(f"Failure: {item_data}")
        return -1
Ejemplo n.º 30
0
def create_item(tif_href, additional_providers=None):
    """Creates a STAC Item from Copernicus Global Land Cover Layers data.
    Args:
        tif_href (str): The href to the metadata for this tif.
    This function will read the metadata file for information to place in
    the STAC item.
    Returns:
        pystac.Item: A STAC Item representing this Copernicus Global Land Cover Layers data.
    """

    with rio.open(tif_href) as f:
        tags = f.tags()
        band_tags = f.tags(1)
        bounds = f.bounds

    # Item id
    item_id = os.path.basename(tif_href).replace('.tif', '')

    # Bounds
    geom = mapping(box(bounds.left, bounds.bottom, bounds.right, bounds.top))
    bounds = shape(geom).bounds

    start_dt = str_to_datetime(tags.pop('time_coverage_start'))
    end_dt = str_to_datetime(tags.pop('time_coverage_end'))
    file_creation_dt = str_to_datetime(tags.pop('file_creation'))

    item = pystac.Item(id=item_id,
                       geometry=geom,
                       bbox=bounds,
                       datetime=None,
                       properties={
                           'start_datetime':
                           start_dt,
                           'end_datetime':
                           end_dt,
                           'discrete_classification_class_names':
                           DISCRETE_CLASSIFICATION_CLASS_NAMES,
                           'discrete_classification_class_palette':
                           DISCRETE_CLASSIFICATION_CLASS_PALETTE
                       })

    # Common metadata
    copernicus_provider = pystac.Provider(name=PROVIDER_NAME,
                                          url=(tags.pop('doi')),
                                          roles=['producer', 'licensor'])

    item.common_metadata.providers = [copernicus_provider]
    if additional_providers is not None:
        item.common_metadata.providers.extend(additional_providers)

    item.common_metadata.start_datetime = start_dt
    item.common_metadata.end_datetime = end_dt
    item.common_metadata.created = file_creation_dt

    item.common_metadata.description = tags.pop('Info')
    item.common_metadata.platform = tags.pop('platform')
    item.common_metadata.title = tags.pop('title')

    # proj
    item.ext.enable('projection')
    item.ext.projection.epsg = int(
        tags.pop('delivered_product_crs').replace('WGS84 (EPSG:',
                                                  '').replace(')', ''))

    # Extra fields
    for k, v in tags.items():
        item.extra_fields[k] = v

    # Bands
    long_name = band_tags.pop('long_name')
    band = pystac.extensions.eo.Band.create(
        name=long_name,
        common_name=band_tags.pop('short_name'),
        description=long_name)

    item.ext.enable('eo')
    item.ext.eo.bands = [band]

    # Tif
    item.add_asset(
        ITEM_TIF_IMAGE_NAME,
        pystac.Asset(href=tif_href,
                     media_type=pystac.MediaType.TIFF,
                     roles=['data'],
                     title="tif image"))
    return item