Ejemplo n.º 1
0
    def test_multiple_extents(self):
        cat1 = TestCases.test_case_1()
        col1 = cat1.get_child('country-1').get_child('area-1-1')
        col1.validate()
        self.assertIsInstance(col1, Collection)
        validate_dict(col1.to_dict(), STACObjectType.COLLECTION)

        multi_ext_uri = TestCases.get_path(
            'data-files/collections/multi-extent.json')
        with open(multi_ext_uri) as f:
            multi_ext_dict = json.load(f)
        validate_dict(multi_ext_dict, STACObjectType.COLLECTION)
        self.assertIsInstance(Collection.from_dict(multi_ext_dict), Collection)

        multi_ext_col = Collection.from_file(multi_ext_uri)
        multi_ext_col.validate()
        ext = multi_ext_col.extent
        extent_dict = multi_ext_dict['extent']
        self.assertIsInstance(ext, Extent)
        self.assertIsInstance(ext.spatial.bboxes[0], list)
        self.assertEqual(len(ext.spatial.bboxes), 2)
        self.assertDictEqual(ext.to_dict(), extent_dict)

        cloned_ext = ext.clone()
        self.assertDictEqual(cloned_ext.to_dict(), multi_ext_dict['extent'])
Ejemplo n.º 2
0
    def test_eo_items_are_heritable(self):
        item1 = Item(id='test-item-1',
                     geometry=RANDOM_GEOM,
                     bbox=RANDOM_BBOX,
                     datetime=datetime.utcnow(),
                     properties={'key': 'one'},
                     stac_extensions=['eo', 'commons'])

        item2 = Item(id='test-item-2',
                     geometry=RANDOM_GEOM,
                     bbox=RANDOM_BBOX,
                     datetime=datetime.utcnow(),
                     properties={'key': 'two'},
                     stac_extensions=['eo', 'commons'])

        wv3_bands = [
            Band.create(name='Coastal', description='Coastal: 400 - 450 nm', common_name='coastal'),
            Band.create(name='Blue', description='Blue: 450 - 510 nm', common_name='blue'),
            Band.create(name='Green', description='Green: 510 - 580 nm', common_name='green'),
            Band.create(name='Yellow', description='Yellow: 585 - 625 nm', common_name='yellow'),
            Band.create(name='Red', description='Red: 630 - 690 nm', common_name='red'),
            Band.create(name='Red Edge',
                        description='Red Edge: 705 - 745 nm',
                        common_name='rededge'),
            Band.create(name='Near-IR1', description='Near-IR1: 770 - 895 nm', common_name='nir08'),
            Band.create(name='Near-IR2', description='Near-IR2: 860 - 1040 nm', common_name='nir09')
        ]

        spatial_extent = SpatialExtent(bboxes=[RANDOM_BBOX])
        temporal_extent = TemporalExtent(intervals=[[item1.datetime, None]])

        collection_extent = Extent(spatial=spatial_extent, temporal=temporal_extent)

        common_properties = {
            'eo:bands': [b.to_dict() for b in wv3_bands],
            'gsd': 0.3,
            'eo:platform': 'Maxar',
            'eo:instrument': 'WorldView3'
        }

        collection = Collection(id='test',
                                description='test',
                                extent=collection_extent,
                                properties=common_properties,
                                stac_extensions=['commons'],
                                license='CC-BY-SA-4.0')

        collection.add_items([item1, item2])

        with TemporaryDirectory() as tmp_dir:
            collection.normalize_hrefs(tmp_dir)
            collection.save(catalog_type=CatalogType.SELF_CONTAINED)

            read_col = Collection.from_file('{}/collection.json'.format(tmp_dir))
            items = list(read_col.get_all_items())

            self.assertEqual(len(items), 2)
            self.assertTrue(items[0].ext.implements('eo'))
            self.assertTrue(items[1].ext.implements('eo'))
Ejemplo n.º 3
0
    def test_from_dict_preserves_dict(self) -> None:
        path = TestCases.get_path("data-files/collections/with-assets.json")
        with open(path) as f:
            collection_dict = json.load(f)
        param_dict = deepcopy(collection_dict)

        # test that the parameter is preserved
        _ = Collection.from_dict(param_dict)
        self.assertEqual(param_dict, collection_dict)

        # assert that the parameter is not preserved with
        # non-default parameter
        _ = Collection.from_dict(param_dict, preserve_dict=False)
        self.assertNotEqual(param_dict, collection_dict)
Ejemplo n.º 4
0
    def test_label_classes_summary(self) -> None:
        label_classes = [
            LabelClasses({
                "name": "road_type",
                "classes": ["1", "2", "3", "4", "5", "6"]
            }),
            LabelClasses({
                "name": "lane_number",
                "classes": ["1", "2", "3", "4", "5"]
            }),
            LabelClasses({
                "name": "paved",
                "classes": ["0", "1"]
            }),
        ]
        collection = Collection.from_file(self.EXAMPLE_COLLECTION)
        label_ext_summaries = LabelExtension.summaries(collection, True)

        label_ext_summaries.label_classes = label_classes

        summaries = collection.summaries
        assert summaries is not None
        label_classes_summary = summaries.get_list("label:classes")
        assert label_classes_summary is not None
        self.assertListEqual([lc.to_dict() for lc in label_classes],
                             label_classes_summary)

        label_classes_summary_ext = label_ext_summaries.label_classes
        assert label_classes_summary_ext is not None
        self.assertListEqual(label_classes, label_classes_summary_ext)
Ejemplo n.º 5
0
    def add_item(self, path_pro: Optional[str], path_tiff: Optional[str],
                 path_img: Optional[str]) -> None:
        assert path_pro.endswith(".pro")
        file_name = path_pro.split("\\")[-1].rstrip(".pro")
        print(file_name)
        b0, data = parse(path_pro)
        item: Item = stac.create_item(i_id=file_name, metadata=b0)
        assets: List[Asset] = [
            Asset(href=path_pro, media_type="pro")
        ]
        if path_tiff is not None:
            assets.append(Asset(href=path_tiff, media_type="geotiff"))
        if path_img is not None:
            assets.append(Asset(href=path_img, media_type="img"))
        stac.add_assets(item, assets)

        catalog = self.root_catalog.get_child(str(b0["b0_common"]["satId"][0]))
        if catalog is None:
            extent = Extent(spatial=SpatialExtent([[-180, -90, 180, 90]]),  # TODO: Реальный Extent
                            temporal=TemporalExtent([[
                                datetime.strptime("2009-01-01T00:00:00.000000", "%Y-%m-%dT%H:%M:%S.%f"),
                                None]]))
            catalog = Collection(id=str(b0["b0_common"]["satId"][0]),
                                 title=b0["b0_common"]["satName"][0].decode("utf-8"),
                                 description=f"Catalog for satellite "
                                             f"{b0['b0_common']['satName'][0].decode('utf-8')}",
                                 extent=extent)
            self.root_catalog.add_child(catalog, catalog.title)

        # update_collection_extent(item, catalog)

        catalog.add_item(item)
Ejemplo n.º 6
0
 def test_from_dict_set_root(self) -> None:
     path = TestCases.get_path("data-files/examples/hand-0.8.1/collection.json")
     with open(path) as f:
         collection_dict = json.load(f)
     catalog = pystac.Catalog(id="test", description="test desc")
     collection = Collection.from_dict(collection_dict, root=catalog)
     self.assertIs(collection.get_root(), catalog)
Ejemplo n.º 7
0
    def test_supplying_href_in_init_does_not_fail(self) -> None:
        test_href = "http://example.com/collection.json"
        spatial_extent = SpatialExtent(bboxes=[ARBITRARY_BBOX])
        temporal_extent = TemporalExtent(intervals=[[TEST_DATETIME, None]])

        collection_extent = Extent(spatial=spatial_extent, temporal=temporal_extent)
        collection = Collection(
            id="test", description="test desc", extent=collection_extent, href=test_href
        )

        self.assertEqual(collection.get_self_href(), test_href)
Ejemplo n.º 8
0
 def get_collections(self, collection_id=None, headers=None, **kwargs):
     """get all collections or get collection by ID
     :param collection_id: ID of collection (optional)
     :param headers: headers (optional)
     :param kwargs: search parameters (optional)
     :returns list with pystac.collections"""
     url = urljoin(
         self.url,
         f"collections/{collection_id}" if collection_id else "collections")
     res = self._handle_query(url=url, headers=headers, **kwargs)
     if isinstance(res, dict):
         res = res.get("collections", [res])
     return [Collection.from_dict(c) for c in res]
Ejemplo n.º 9
0
    def create_catalog_command(destination, source, id, quiet):
        """Creates a relative published 3DEP catalog in DESTINATION.

        If SOURCE is not provided, will use the metadata in AWS. SOURCE is
        expected to be a directory tree mirroring the structure on USGS, so
        it is best created using `stac threedep download-metadata`.
        """
        base_ids = id  # not sure how to rename arguments in click
        collections = {}
        items = {}
        for product in PRODUCTS:
            items[product] = []
            if base_ids:
                ids = base_ids
            else:
                ids = utils.fetch_ids(product)
            for id in ids:
                item = stac.create_item_from_product_and_id(
                    product, id, source)
                items[product].append(item)
                if not quiet:
                    print(item.id)
            extent = Extent.from_items(items[product])
            if product == "1":
                title = "1 arc-second"
                description = "USGS 3DEP 1 arc-second DEMs"
            elif product == "13":
                title = "1/3 arc-second"
                description = "USGS 3DEP 1/3 arc-second DEMs"
            else:
                raise NotImplementedError
            collection = Collection(
                id=f"{USGS_3DEP_ID}-{product}",
                title=title,
                keywords=["USGS", "3DEP", "NED", "DEM", "elevation"],
                providers=[USGS_PROVIDER],
                description=description,
                extent=extent,
                license="PDDL-1.0")
            collections[product] = collection
        catalog = Catalog(id=USGS_3DEP_ID,
                          description=DESCRIPTION,
                          title="USGS 3DEP DEMs",
                          catalog_type=CatalogType.RELATIVE_PUBLISHED)
        for product, collection in collections.items():
            catalog.add_child(collection)
            collection.add_items(items[product])
        catalog.generate_subcatalogs("${threedep:region}")
        catalog.normalize_hrefs(destination)
        catalog.save()
        catalog.validate()
Ejemplo n.º 10
0
    def test_supplying_href_in_init_does_not_fail(self):
        test_href = "http://example.com/collection.json"
        spatial_extent = SpatialExtent(bboxes=[RANDOM_BBOX])
        temporal_extent = TemporalExtent(intervals=[[TEST_DATETIME, None]])

        collection_extent = Extent(spatial=spatial_extent,
                                   temporal=temporal_extent)
        collection = Collection(id='test',
                                description='test desc',
                                extent=collection_extent,
                                properties={},
                                href=test_href)

        self.assertEqual(collection.get_self_href(), test_href)
Ejemplo n.º 11
0
def lambda_handler(event, context={}):
    logger.debug('Event: %s' % json.dumps(event))

    # check if collection and if so, add to Cirrus
    if 'extent' in event:
        stac.add_collections([Collection.from_dict(event)])

    # check if URL to catalog - ingest all collections
    if 'catalog_url' in event:
        collections = []
        cat = Catalog.from_file(event['catalog_url'])
        for child in cat.get_children():
            if isinstance(child, Collection):
                collections.append(child)
        stac.add_collections(collections)
Ejemplo n.º 12
0
    def test_spatial_allows_single_bbox(self) -> None:
        temporal_extent = TemporalExtent(intervals=[[TEST_DATETIME, None]])

        # Pass in a single BBOX
        spatial_extent = SpatialExtent(bboxes=ARBITRARY_BBOX)

        collection_extent = Extent(spatial=spatial_extent, temporal=temporal_extent)

        collection = Collection(
            id="test", description="test desc", extent=collection_extent
        )

        # HREF required by validation
        collection.set_self_href("https://example.com/collection.json")

        collection.validate()
Ejemplo n.º 13
0
    def test_label_properties_summary(self) -> None:
        label_properties = ["road_type", "lane_number", "paved"]
        collection = Collection.from_file(self.EXAMPLE_COLLECTION)
        label_ext_summaries = LabelExtension.summaries(collection, True)

        label_ext_summaries.label_properties = label_properties

        summaries = collection.summaries
        assert summaries is not None
        label_properties_summary = summaries.get_list("label:properties")
        assert label_properties_summary is not None
        self.assertListEqual(label_properties, label_properties_summary)

        label_properties_summary_ext = label_ext_summaries.label_properties
        assert label_properties_summary_ext is not None
        self.assertListEqual(label_properties, label_properties_summary_ext)
Ejemplo n.º 14
0
    def test_label_task_summary(self) -> None:
        label_tasks: List[Union[LabelTask, str]] = [LabelTask.REGRESSION]
        collection = Collection.from_file(self.EXAMPLE_COLLECTION)
        label_ext_summaries = LabelExtension.summaries(collection, True)

        label_ext_summaries.label_tasks = label_tasks

        summaries = collection.summaries
        assert summaries is not None
        label_tasks_summary = summaries.get_list("label:tasks")
        assert label_tasks_summary is not None
        self.assertListEqual(label_tasks, label_tasks_summary)

        label_tasks_summary_ext = label_ext_summaries.label_tasks
        assert label_tasks_summary_ext is not None
        self.assertListEqual(label_tasks, label_tasks_summary_ext)
Ejemplo n.º 15
0
    def test_label_methods_summary(self) -> None:
        label_methods: List[Union[LabelMethod, str]] = [LabelMethod.AUTOMATED]
        collection = Collection.from_file(self.EXAMPLE_COLLECTION)
        label_ext_summaries = LabelExtension.summaries(collection, True)

        label_ext_summaries.label_methods = label_methods

        summaries = collection.summaries
        assert summaries is not None
        label_methods_summary = summaries.get_list("label:methods")
        assert label_methods_summary is not None
        self.assertListEqual(label_methods, label_methods_summary)

        label_methods_summary_ext = label_ext_summaries.label_methods
        assert label_methods_summary_ext is not None
        self.assertListEqual(label_methods, label_methods_summary_ext)
Ejemplo n.º 16
0
    def test_label_type_summary(self) -> None:
        label_types = [LabelType.VECTOR]
        collection = Collection.from_file(self.EXAMPLE_COLLECTION)
        label_ext_summaries = LabelExtension.summaries(collection, True)

        label_ext_summaries.label_type = label_types

        summaries = collection.summaries
        assert summaries is not None
        label_type_summary = summaries.get_list("label:type")
        assert label_type_summary is not None
        self.assertListEqual(label_types, label_type_summary)

        label_type_summary_ext = label_ext_summaries.label_type
        assert label_type_summary_ext is not None
        self.assertListEqual(label_types, label_type_summary_ext)
Ejemplo n.º 17
0
    def test_spatial_allows_single_bbox(self):
        temporal_extent = TemporalExtent(intervals=[[TEST_DATETIME, None]])

        # Pass in a single BBOX
        spatial_extent = SpatialExtent(bboxes=RANDOM_BBOX)

        collection_extent = Extent(spatial=spatial_extent,
                                   temporal=temporal_extent)

        collection = Collection(id='test',
                                description='test desc',
                                extent=collection_extent)

        # HREF required by validation
        collection.set_self_href('https://example.com/collection.json')

        collection.validate()
Ejemplo n.º 18
0
def stac_object_from_dict(d, href=None, root=None):
    """Determines how to deserialize a dictionary into a STAC object.

    Args:
        d (dict): The dict to parse.
        href (str): Optional href that is the file location of the object being
            parsed.
        root (Catalog or Collection): Optional root of the catalog for this object.
            If provided, the root's resolved object cache can be used to search for
            previously resolved instances of the STAC object.

    Note: This is used internally in STAC_IO to deserialize STAC Objects.
    It is in the top level __init__ in order to avoid circular dependencies.
    """
    if identify_stac_object_type(d) == STACObjectType.ITEM:
        collection_cache = None
        if root is not None:
            collection_cache = root._resolved_objects.as_collection_cache()

        merge_common_properties(d, json_href=href, collection_cache=collection_cache)

    info = identify_stac_object(d)

    d = migrate_to_latest(d, info)

    if info.object_type == STACObjectType.CATALOG:
        return Catalog.from_dict(d, href=href, root=root)

    if info.object_type == STACObjectType.COLLECTION:
        return Collection.from_dict(d, href=href, root=root)

    if info.object_type == STACObjectType.ITEMCOLLECTION:
        if Extension.SINGLE_FILE_STAC in info.common_extensions:
            return SingleFileSTAC.from_dict(d, href=href, root=root)

        return ItemCollection.from_dict(d, href=href, root=root)

    if info.object_type == STACObjectType.ITEM:
        if Extension.EO in info.common_extensions:
            return EOItem.from_dict(d, href=href, root=root)

        if Extension.LABEL in info.common_extensions:
            return LabelItem.from_dict(d, href=href, root=root)

        return Item.from_dict(d, href=href, root=root)
Ejemplo n.º 19
0
    def test_clone_preserves_assets(self) -> None:
        path = TestCases.get_path("data-files/collections/with-assets.json")
        original_collection = Collection.from_file(path)
        assert len(original_collection.assets) > 0
        assert all(
            asset.owner is original_collection
            for asset in original_collection.assets.values()
        )

        cloned_collection = original_collection.clone()

        for key in original_collection.assets:
            with self.subTest(f"Preserves {key} asset"):
                self.assertIn(key, cloned_collection.assets)
            cloned_asset = cloned_collection.assets.get(key)
            if cloned_asset is not None:
                with self.subTest(f"Sets owner for {key}"):
                    self.assertIs(cloned_asset.owner, cloned_collection)
Ejemplo n.º 20
0
    spatial_extent = SpatialExtent([[
        29.038948834106055,
        -92.72807246278022,
        42.55475543734189,
        -88.02592402528022,
    ]])

    # The JRC water dataset examines imagery from march, 1984 to december, 2019
    start_dt = datetime.combine(date(1984, 3, 1), time.min)
    end_dt = datetime.combine(date(2019, 12, 1), time.min)
    collection_temporal_extent = TemporalExtent(intervals=[[start_dt, end_dt]])

    collection = Collection(
        id="jrc-monthly-water-mississippi-river",
        description=collection_description,
        extent=Extent(spatial_extent, collection_temporal_extent),
        title=collection_title,
    )

    s3 = boto3.resource("s3")
    bucket = s3.Bucket(bucket)
    prefix = parsed_s3_path.path.lstrip("/")
    filtered_objects = bucket.objects.filter(Prefix=prefix)

    for obj_summary in filtered_objects:
        extension = obj_summary.key.split(".")[-1]
        if extension == "tif":
            item_id = obj_summary.key.split("/")[-1].split(".")[0]

            year = int(item_id.split("_")[-2])
            month = int(item_id.split("_")[-1])
Ejemplo n.º 21
0
 def test_case_8() -> Collection:
     """Planet disaster data example catalog, 1.0.0-beta.2"""
     return Collection.from_file(
         TestCases.get_path("data-files/catalogs/"
                            "planet-example-v1.0.0-beta.2/collection.json"))
Ejemplo n.º 22
0
    TemporalExtent(
        [
            [
                datetime.strptime("2005-01-01", "%Y-%m-%d"),
                datetime.strptime("2010-01-01", "%Y-%m-%d"),
            ]
        ]
    ),
)

OrthoCollection = Collection(
    id="canada_spot_orthoimages",
    description="Orthoimages of Canada 2005-2010",
    extent=SpotExtents,
    title=None,
    stac_extensions=None,
    license="Proprietery",
    keywords="SPOT, Geobase, orthoimages",
    version="0.0.1",
    providers=SpotProviders,
)

GeobaseLicense = Link(
    "license",
    "https://open.canada.ca/en/open-government-licence-canada",
    "text",
    "Open Government Licence Canada",
)


def build_catalog():
Ejemplo n.º 23
0
        "The bucket that should contain the output of the SentinelHub Batch Ingests",
    )
    return parser


if __name__ == "__main__":
    parser = make_parser()
    args = parser.parse_args()

    # Register methods for IO to/from S3
    register_s3_io()

    session = get_sentinel_hub_session(args.oauth_id, args.oauth_secret)

    # Read STAC from S3
    usfimr_collection = Collection.from_file(
        "s3://usfimr-data/collection.json")
    usfimr_floods = usfimr_collection.get_items()

    # Iterate through GLOFIMR flood events
    flood_with_results = []
    for flood in usfimr_floods:
        # geom bounds
        flood_bounds = flood.bbox

        # temporal bounds
        date_min, date_max = get_flood_temporal_bounds(flood)
        search_results = search_sentinelhub_s1(date_min, date_max,
                                               flood_bounds, session)
        search_results = search_results.json()

        result_count = search_results["context"]["returned"]
Ejemplo n.º 24
0
def add_collection(collection):
    cat = get_root_catalog()
    col = Collection(**collection)
    cat.add_child(col)
    cat.normalize_and_save(ROOT_URL, CatalogType=CatalogType.ABSOLUTE_PUBLISHED)
    return cat
Ejemplo n.º 25
0
                media_type="text/csv",
            )
            huc_item.add_asset(key="hydrogeo", asset=hydrogeo_asset)

            items.append(huc_item)

    overall_extent = Extent(
        SpatialExtent(
            [running_extent[0], running_extent[1], running_extent[2], running_extent[3]]
        ),
        TemporalExtent([[version_dt, None]]),
    )

    # Root Collection
    root_collection = Collection(
        id="hand_021",
        description="The continental flood inundation mapping (CFIM) framework is a high-performance computing (HPC)-based computational framework for the Height Above Nearest Drainage (HAND)-based inundation mapping methodology. Using the 10m Digital Elevation Model (DEM) data produced by U.S. Geological Survey (USGS) 3DEP (the 3-D Elevation Program) and the NHDPlus hydrography dataset produced by USGS and the U.S. Environmental Protection Agency (EPA), a hydrological terrain raster called HAND is computed for HUC6 units in the conterminous U.S. (CONUS). The value of each raster cell in HAND is an approximation of the relative elevation between the cell and its nearest water stream. Derived from HAND, a hydraulic property table is established to calculate river geometry properties for each of the 2.7 million river reaches covered by NHDPlus (5.5 million kilometers in total length). This table is a lookup table for water depth given an input stream flow value. Such lookup is available between water depth 0m and 25m at 1-foot interval. The flood inundation map can then be computed by using HAND and this lookup table based on the near real-time water forecast from the National Water Model (NWM) at the National Oceanic and Atmospheric Administration (NOAA).",
        title="HAND and the Hydraulic Property Table version 0.2.1",
        extent=overall_extent,
        license="CC-BY-4.0",
    )
    for item in items:
        root_collection.add_item(item)

    # Save Complete Catalog
    root_path = "./data/catalog"
    root_collection.normalize_and_save(
        root_path, catalog_type=CatalogType.SELF_CONTAINED
    )
    print("Saved STAC Catalog {} to {}...".format(root_collection.id, root_path))
def main():
    parser = make_parser()
    args = parser.parse_args()

    valid_set = set()
    with open(args.valid_csv) as csvfile:
        for row in csv.reader(csvfile):
            name = row[0].split("/")[1]
            name = "_".join(name.split("_")[0:-1])
            valid_set.add(name)

    test_sets = []
    any_test_set = set()
    for test_csv in args.test_csvs:
        test_set = set()
        with open(test_csv) as csvfile:
            for row in csv.reader(csvfile):
                name = row[0].split("/")[1]
                name = "_".join(name.split("_")[0:-1])
                test_set.add(name)
                any_test_set.add(name)
        test_sets.append(test_set)

    def yes_validation(item):
        id = item.id
        id = "_".join(id.split("_")[0:-1])
        return id in valid_set and "Bolivia" not in item.id

    def yes_test_i(i, item):
        id = item.id
        id = "_".join(id.split("_")[0:-1])
        return id in test_sets[i]

    def yes_any_test(item):
        id = item.id
        id = "_".join(id.split("_")[0:-1])
        return id in any_test_set

    def yes_training(item):
        return (not yes_any_test(item) and not yes_validation(item)
                and "Bolivia" not in item.id)

    catalog = Catalog.from_file("./data/catalog/catalog.json")

    experiment = args.experiment
    label_collection_id = EXPERIMENT[experiment]
    label_collection = catalog.get_child(label_collection_id)
    test_label_collection_id = EXPERIMENT["hand"]
    test_label_collection = catalog.get_child(test_label_collection_id)

    # Top-Level
    mldata_catalog = Catalog(
        "{}_mldata".format(experiment),
        "Training/Validation/Test split for {} experiment in sen1floods11".
        format(experiment),
    )

    # Training Imagery and Labels
    training_imagery_collection = Collection("training_imagery",
                                             "training items for experiment",
                                             label_collection.extent)
    training_labels_collection = Collection(
        "training_labels",
        "labels for scenes in the training collection",
        label_collection.extent,
    )
    training_label_items = [
        i.clone() for i in label_collection.get_items() if yes_training(i)
    ]
    mldata_catalog.add_child(training_labels_collection)
    training_labels_collection.add_items(
        [i.clone() for i in label_collection.get_items() if yes_training(i)])
    mldata_catalog.add_child(training_imagery_collection)
    training_imagery_items = np.array(list(map(
        mapper, training_label_items))).flatten()
    training_imagery_collection.add_items(training_imagery_items)
    print("Added {} items to training catalog".format(
        len(training_label_items)))

    # Validation Imagery and Labels
    validation_imagery_collection = Collection(
        "validation_imagery",
        "validation items for experiment",
        test_label_collection.extent,
    )
    validation_labels_collection = Collection(
        "validation_labels",
        "labels for scenes in the validation collection",
        test_label_collection.extent,
    )
    validation_label_items = [
        i.clone() for i in test_label_collection.get_items()
        if yes_validation(i)
    ]
    mldata_catalog.add_child(validation_labels_collection)
    validation_labels_collection.add_items(
        [i.clone() for i in label_collection.get_items() if yes_validation(i)])
    mldata_catalog.add_child(validation_imagery_collection)
    validation_imagery_items = np.array(
        list(map(mapper, validation_label_items))).flatten()
    validation_imagery_collection.add_items(validation_imagery_items)
    print("Added {} items to validation catalog".format(
        len(validation_label_items)))

    # Test Imagery and Labels
    for i in range(len(test_sets)):
        test_imagery_collection = Collection(
            "test_imagery_{}".format(i),
            "test items for experiment",
            test_label_collection.extent,
        )
        test_labels_collection = Collection(
            "test_labels_{}".format(i),
            "labels for scenes in the test collection",
            test_label_collection.extent,
        )
        test_label_items = [
            j.clone() for j in test_label_collection.get_items()
            if yes_test_i(i, j)
        ]
        mldata_catalog.add_child(test_labels_collection)
        test_labels_collection.add_items([
            j.clone() for j in label_collection.get_items()
            if yes_test_i(i, j)
        ])
        mldata_catalog.add_child(test_imagery_collection)
        test_imagery_items = np.array(list(map(mapper,
                                               test_label_items))).flatten()
        test_imagery_collection.add_items(test_imagery_items)
        print("Added {} items to test catalog {}".format(
            len(test_label_items), i))

    print("Saving catalog...")
    mldata_catalog.normalize_hrefs("./data/mldata_{}".format(experiment))
    mldata_catalog.save(CatalogType.SELF_CONTAINED)
def main():
    """

# The Data

446 qc'ed chips containing flood events, hand-labeled flood classifications
4385 non-qc'ed chips containing water exported only with sentinel 1 and 2 flood classifications

# The Catalog Outline

** We want to generate a root catalog that is all, or only training, or only validation items **
^^^ Script should support this

- Root Catalog
    - Collection: Sentinel 1 data chips
        - Item: The Item
    - Collection: Sentinel 2 data chips
        - Item: The Item
    - Collection: Sentinel 1 weak labels
        - Item: The Item
    - Collection: Sentinel 2 weak labels
        - Item: The Item
    - Collection: Hand labels
        - Item: The Item
    - Collection: Permanent water labels
        - Item: The Item
    - Collection: Traditional otsu algo labels
        - Item: The Item

## Alternate catalog structure

This structure was considered but rejected in the interest of facilitating collections for each
of the label datasets.

- Root Catalog
    - Collection: Sentinel 1
        - Catalog: Country
            - Catalog: Event ID
                (Note: Catalog will always have the first item. Then it will either have the second
                       item or all the others depending on which dir the first item came from)
                - Item: (dir: S1 + S1_NoQC) Sentinel 1 data chip
                - Item: (dir: S1Flood_NoQC) Labels from "weak" classification algorithm applied to S1
                - Item: (dir: QC_v2) Labels from hand classification (ORed with item below)
                - Item: (dir: S1Flood) Labels from traditional Otsu algorithm
                - Item: (dir: Perm) Labels from perm water dataset (this is a Byte tiff, only 1 or 0
                        for yes or no perm water)
    - Collection: Sentinel 2
        - Catalog: Country
            - Catalog: Event ID
                - Item: (dir: S2 + S2_NoQC) Sentinel 2 data chip
                - Item: (dir: S2Flood) Labels from traditional Otsu algorithm applied to S2
    - Collection: PermJRC
        - Catalog: Lat 10
            - Catalog: Lon 10
                - Item: (dir: PermJRC)
    """
    parser = argparse.ArgumentParser(
        description="Build STAC Catalog for sen1floods11")
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()
    debug = args.debug

    storage = S3Storage("sen1floods11-data")

    catalog_description = "Bonafilia, D., Tellman, B., Anderson, T., Issenberg, E. 2020. Sen1Floods11: a georeferenced dataset to train and test deep learning flood algorithms for Sentinel-1. The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, 2020, pp. 210-211. Available Open access at: http://openaccess.thecvf.com/content_CVPRW_2020/html/w11/Bonafilia_Sen1Floods11_A_Georeferenced_Dataset_to_Train_and_Test_Deep_Learning_CVPRW_2020_paper.html"  # noqa: E501
    catalog_title = "A georeferenced dataset to train and test deep learning flood algorithms for Sentinel-1"  # noqa: E501

    catalog = Catalog("sen1floods11", catalog_description, title=catalog_title)
    print("Created Catalog {}".format(catalog.id))

    # Build Sentinel 1 Collection
    sentinel1 = Collection(
        "S1",
        "Sentinel-1 GRD Chips overlapping labeled data. IW mode, GRD product. See https://developers.google.com/earth-engine/sentinel1 for information on preprocessing",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
    )
    collection_add_sentinel_chips(sentinel1,
                                  storage.ls("S1/"),
                                  "s1",
                                  debug=debug)
    collection_add_sentinel_chips(sentinel1,
                                  storage.ls("S1_NoQC/"),
                                  "s1",
                                  debug=debug)
    collection_update_extents(sentinel1)
    catalog.add_child(sentinel1)

    # Build Sentinel 2 Collection
    sentinel2 = Collection(
        "S2",
        "Sentinel-2 MSI L1C chips overlapping labeled data. Contains all spectral bands (1 - 12). Does not contain QA mask.",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
    )
    collection_add_sentinel_chips(sentinel2,
                                  storage.ls("S2/"),
                                  "s2",
                                  debug=debug)
    collection_add_sentinel_chips(sentinel2,
                                  storage.ls("S2_NoQC/"),
                                  "s2",
                                  debug=debug)
    collection_update_extents(sentinel2)
    catalog.add_child(sentinel2)

    # Build S1 Weak Labels Collection
    s1weak_labels = Collection(
        "S1Flood_NoQC",
        "Chips of water/nowater labels derived from standard OTSU thresholding of Sentinel-1 VH band overlapping weakly-labeled data.",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
        stac_extensions=[Extensions.LABEL],
    )
    label_collection_add_items(
        s1weak_labels,
        catalog,
        storage.ls("S1Flood_NoQC/"),
        sentinel1_links_func,
        "0: Not Water. 1: Water.",
        LabelType.RASTER,
        label_classes=[LabelClasses([0, 1])],
        label_tasks=["classification"],
        debug=debug,
    )
    collection_update_extents(s1weak_labels)
    catalog.add_child(s1weak_labels)

    # Build S2 Weak Labels Collection
    s2weak_labels = Collection(
        "NoQC",
        "Weakly-labeled chips derived from traditional Sentinel-2 Classification",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
        stac_extensions=[Extensions.LABEL],
    )
    label_collection_add_items(
        s2weak_labels,
        catalog,
        storage.ls("NoQC/"),
        sentinel2_links_func,
        "-1: No Data / Not Valid. 0: Not Water. 1: Water.",  # noqa: E501
        LabelType.RASTER,
        label_classes=[LabelClasses([-1, 0, 1])],
        label_tasks=["classification"],
        debug=debug,
    )
    collection_update_extents(s2weak_labels)
    catalog.add_child(s2weak_labels)

    # Build Hand Labels Collection
    hand_labels = Collection(
        "QC_v2",
        "446 hand labeled chips of surface water from selected flood events",
        extent=Extent(SpatialExtent([None, None, None, None]), None),
        stac_extensions=[Extensions.LABEL],
    )
    label_collection_add_items(
        hand_labels,
        catalog,
        storage.ls("QC_v2/"),
        sentinel1_sentinel2_links_func,
        "Hand labeled chips containing ground truth. -1: No Data / Not Valid. 0: Not Water. 1: Water.",  # noqa: E501
        LabelType.RASTER,
        label_classes=[LabelClasses([-1, 0, 1])],
        label_tasks=["classification"],
        debug=debug,
    )
    collection_update_extents(hand_labels)
    catalog.add_child(hand_labels)

    # Build Permanent Labels collection
    permanent_labels = Collection(
        "Perm",
        "Permanent water chips generated from the 'transition' layer of the JRC (European Commission Joint Research Centre) dataset",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
        stac_extensions=[Extensions.LABEL],
    )
    label_collection_add_items(
        permanent_labels,
        catalog,
        storage.ls("Perm/"),
        lambda *_: [
        ],  # No easy way to map JRC source files to the label chips...
        "0: Not Water. 1: Water.",
        LabelType.RASTER,
        label_classes=[LabelClasses([0, 1])],
        label_tasks=["classification"],
        debug=debug,
    )
    collection_update_extents(permanent_labels)
    catalog.add_child(permanent_labels)

    # Build Otsu algorithm Labels collection
    otsu_labels = Collection(
        "S1Flood",
        "Chips of water/nowater derived from standard OTSU thresholding of Sentinel-1 VH band overlapping labeled data",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
        stac_extensions=[Extensions.LABEL],
    )
    label_collection_add_items(
        otsu_labels,
        catalog,
        storage.ls("S1Flood/"),
        sentinel1_links_func,
        "0: Not Water. 1: Water.",
        LabelType.RASTER,
        label_classes=[LabelClasses([0, 1])],
        label_tasks=["classification"],
        debug=debug,
    )
    collection_update_extents(otsu_labels)
    catalog.add_child(otsu_labels)

    # Save Complete Catalog
    root_path = "./catalog"
    catalog.normalize_and_save(root_path,
                               catalog_type=CatalogType.SELF_CONTAINED)
    print("Saved STAC Catalog {} to {}...".format(catalog.id, root_path))
Ejemplo n.º 28
0
                datetime=start_time,
                properties={},
            )
            for asset in assets:
                image_item.add_asset(
                    asset.href.split("/")[-1].split(".")[0], asset)

            stac_items.append(image_item)
        aggregate_spatial_extent = SpatialExtent([[
            aggregate_bounds.bottom,
            aggregate_bounds.left,
            aggregate_bounds.top,
            aggregate_bounds.right,
        ]])
        aggregate_extent = Extent(aggregate_spatial_extent, temporal_extent)
        collection = Collection(
            flood_id,
            "Imagery coextensive with GLOFIMR flood {}".format(flood_id),
            extent=aggregate_extent,
        )
        for stac_item in stac_items:
            collection.add_item(stac_item)

        catalog.add_child(collection)

    # Save Complete Catalog
    root_path = "./data/catalog"
    catalog.normalize_and_save(root_path,
                               catalog_type=CatalogType.SELF_CONTAINED)
    print("Saved STAC Catalog {} to {}...".format(catalog.id, root_path))
Ejemplo n.º 29
0
 def setUp(self) -> None:
     self.maxDiff = None
     self.collection = Collection.from_file(
         TestCases.get_path("data-files/item-assets/example-landsat8.json"))
            with open("{}/{}/{}-usfimr.wkb".format(root_path, fid, fid),
                      "wb") as wkb_file:
                wkb_file.write(shapely_geom.wkb)

            with open("{}/{}/{}-usfimr.geojson".format(root_path, fid, fid),
                      "w") as geojson_file:
                geojson_file.write(json.dumps(geom))

    overall_extent = Extent(
        SpatialExtent(running_spatial_extent),
        TemporalExtent([[running_start_dt, running_end_dt]]),
    )

    root_collection = Collection(
        id="USFIMR",
        description=
        "GloFIMR is an extension of the USFIMR project that commenced in August 2016 with funding from NOAA. The project’s main goal is to provide high-resolution inundation extent maps of flood events to be used by scientists and practitioners for model calibration and flood susceptibility evaluation. The maps are based on analysis of Remote Sensing imagery from a number of Satellite sensors (e.g. Landsat, Sentinel-1, Sentinel-2). The maps are accessible via the online map repository below. The repository is under development and new maps are added upon request.",
        title="U.S. Flood Inundation Mapping Repository",
        extent=overall_extent,
    )

    for item in items:
        root_collection.add_item(item)

    # Save Complete Catalog
    root_collection.normalize_and_save(root_path,
                                       catalog_type=CatalogType.SELF_CONTAINED)
    print("Saved STAC Catalog {} to {}...".format(root_collection.id,
                                                  root_path))