def main():
    register_s3_io()

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--mldata-catalog",
        default=
        "s3://usfimr-s1-mldata/usfimr-s1-mldata-catalog_seed42/catalog.json",
        type=str,
    )
    parser.add_argument(
        "--chip-label-dir",
        default="s3://jrc-fimr-rasterized-labels/version2",
        type=str,
    )
    args = parser.parse_args()

    catalog = pystac.Catalog.from_file(args.mldata_catalog)
    chip_label_dir = args.chip_label_dir.rstrip("/") + "/"

    train = catalog.get_child("train")
    test = catalog.get_child("test")
    validation = catalog.get_child("validation")

    mldata_catalog = pystac.Catalog(
        "usfimr_jrc-s1-mldata-rasterized",
        "MLData STAC Catalog for usfimr+jrc labels of flood and permanent water with S1 imagery",
    )

    label_items = []
    train_collection = pystac.Collection("train", "Training collection",
                                         train.extent)
    for t in train.get_items():
        train_collection.add_item(t)
        label_items.append(construct_label_item(t, chip_label_dir))
    test_collection = pystac.Collection("test", "Test collection", test.extent)
    for t in test.get_items():
        test_collection.add_item(t)
        label_items.append(construct_label_item(t, chip_label_dir))
    val_collection = pystac.Collection("validation", "Validation collection",
                                       validation.extent)
    for v in validation.get_items():
        val_collection.add_item(v)
        label_items.append(construct_label_item(v, chip_label_dir))

    label_catalog = pystac.Catalog("usfimr_sar_labels_tif",
                                   "USFIMR + JRC labels for flood detection")
    for l in label_items:
        label_catalog.add_item(l)

    mldata_catalog.add_child(label_catalog)
    mldata_catalog.add_child(train_collection)
    mldata_catalog.add_child(test_collection)
    mldata_catalog.add_child(val_collection)

    mldata_catalog.normalize_and_save(
        "./data/catalog",
        catalog_type=pystac.CatalogType.SELF_CONTAINED,
    )
Ejemplo n.º 2
0
def save_catalog(dbname=DBNAME, catalog_type='SELF_CONTAINED', root_href=''):
    """ make a STAC catalog for all the data in the database, with collections organized by source, region, and year """
    db = StackDB(dbname)
    summary = db.summary()

    if summary['Total Items'] < 1:
        print('No data to save')
        db.close()
        return

    # if we don't get a root reference, assume we will use the current directory
    if root_href == '':
        root_href = os.getcwd()

    # the master catalog
    catalog = pystac.Catalog('icecharts',
                             'Weekly Ice Charts from NIC and CIS',
                             catalog_type=catalog_type)
    for source in summary['Sources']:
        print(source)
        sroot_href = '/'.join([root_href, source])
        srccat = pystac.Catalog(source + '-icecharts',
                                'Weekly icecharts from ' + source,
                                catalog_type=catalog_type)
        for region in summary[source + ' Regions']:
            print(region)
            rsroot_href = '/'.join([sroot_href, region])
            rgncat = pystac.Catalog('-'.join([source, region,
                                              'icecharts']).strip(),
                                    'Weekly icecharts for ' + region,
                                    catalog_type=catalog_type)
            for yr in range(
                    summary['{0} {1} Date Range'.format(source, region)][0],
                    summary['{0} {1} Date Range'.format(source, region)][1] +
                    1):
                print(yr)
                yrsroot_href = '/'.join([rsroot_href, str(yr)])
                collid = ''.join([source, region,
                                  str(yr), 'icecharts']).strip()
                coll = make_collection(db, source, region, str(yr),
                                       yrsroot_href, collid,
                                       'Icecharts from ' + source)
                if coll:
                    rgncat.add_child(coll, title=collid)
            srccat.add_child(rgncat)
        catalog.add_child(srccat)

    catalog.normalize_and_save(root_href, catalog_type=catalog_type)
    db.close()
Ejemplo n.º 3
0
 def test_from_dict_set_root(self) -> None:
     path = TestCases.get_path("data-files/examples/hand-0.8.1/collection.json")
     with open(path) as f:
         collection_dict = json.load(f)
     catalog = pystac.Catalog(id="test", description="test desc")
     collection = Collection.from_dict(collection_dict, root=catalog)
     self.assertIs(collection.get_root(), catalog)
Ejemplo n.º 4
0
 def test_produces_layout_for_catalog_with_filename(self) -> None:
     template = "cat/${id}/${description}/${id}.json"
     strategy = TemplateLayoutStrategy(catalog_template=template)
     cat = pystac.Catalog(id="test", description="test-desc")
     href = strategy.get_href(cat, parent_dir="http://example.com")
     self.assertEqual(href,
                      "http://example.com/cat/test/test-desc/test.json")
Ejemplo n.º 5
0
 def test_produces_layout_for_catalog(self) -> None:
     strategy = TemplateLayoutStrategy(
         catalog_template=self.TEST_CATALOG_TEMPLATE)
     cat = pystac.Catalog(id="test", description="test-desc")
     href = strategy.get_href(cat, parent_dir="http://example.com")
     self.assertEqual(href,
                      "http://example.com/cat/test/test-desc/catalog.json")
Ejemplo n.º 6
0
async def test_fetches_valid_item(app_client, load_test_data: Callable,
                                  load_test_collection):
    coll = load_test_collection

    in_json = load_test_data("test_item.json")
    in_item = Item.parse_obj(in_json)
    resp = await app_client.post(
        "/collections/{coll.id}/items",
        json=in_json,
    )
    assert resp.status_code == 200

    post_item = Item.parse_obj(resp.json())
    assert in_item.dict(exclude={"links"}) == post_item.dict(exclude={"links"})

    resp = await app_client.get(f"/collections/{coll.id}/items/{post_item.id}")

    assert resp.status_code == 200
    item_dict = resp.json()
    # Mock root to allow validation
    mock_root = pystac.Catalog(id="test",
                               description="test desc",
                               href="https://example.com")
    item = pystac.Item.from_dict(item_dict,
                                 preserve_dict=False,
                                 root=mock_root)
    item.validate()
Ejemplo n.º 7
0
 def test_produces_layout_for_catalog(self) -> None:
     strategy = CustomLayoutStrategy(
         catalog_func=self.get_custom_catalog_func())
     cat = pystac.Catalog(id="test", description="test desc")
     href = strategy.get_href(cat,
                              parent_dir="http://example.com",
                              is_root=True)
     self.assertEqual(href, "http://example.com/cat/True/test.json")
Ejemplo n.º 8
0
    def test_link_does_not_fail_if_href_is_none(self) -> None:
        """Test to ensure get_href does not fail when the href is None."""
        catalog = pystac.Catalog(id="test", description="test desc")
        catalog.add_item(self.item)
        catalog.set_self_href("/some/href")

        link = catalog.get_single_link("item")
        assert link is not None
        self.assertIsNone(link.get_href())
Ejemplo n.º 9
0
    def test_link_does_not_fail_if_href_is_none(self):
        """Test to ensure get_href does not fail when the href is None."""
        catalog = pystac.Catalog(id='test', description='test desc')
        catalog.add_item(self.item)
        catalog.set_self_href('/some/href')
        catalog.make_all_links_relative()

        link = catalog.get_single_link('item')
        self.assertIsNone(link.get_href())
Ejemplo n.º 10
0
def info(args):

    keys = list(get_resources(args.bucket, delimiter='/'))
    logger.debug('Querying boundaries for %d keys' % (len(keys)))

    queue = Process()


    catalog = pystac.Catalog('3dep',
                         'A catalog of USGS 3DEP Lidar hosted on AWS s3.',
                         href=f'{args.stac_base_url}catalog.json',
                         stac_extensions=['POINTCLOUD'])

    base = Path(args.stac_directory)
    base.mkdir(exist_ok=True, parents=True)

    count = 0
    for k in keys:

        if count == args.limit and count != 0:
            break

        t = Task(args.bucket, k, args.resolution)
        queue.put(t)

#        logger.debug(t)

        count += 1

    queue.do(count=20)

    l = Layer(args)
    stac_items = []
    for r in queue.results:
        if not r.error:
            l.add(r)

            with open(base / f"{r.name}.json", 'w') as f:
                d = l.add_stac(r).to_dict()
                json.dump(d, f)

            link = pystac.Link('item', f'{args.stac_base_url}{r.name}.json')
            catalog.add_link(link)


    errors = []
    for r in queue.results:
        if r.error:
            errors.append(r.error)

    f = open('errors.json','wb')
    f.write(json.dumps(errors).encode('utf-8'))
    f.close()


    with open(base / "catalog.json", 'w') as f:
        json.dump(catalog.to_dict(), f)
Ejemplo n.º 11
0
def create_catalog(suffix: Any, include_href: bool = True) -> pystac.Catalog:
    return pystac.Catalog(
        id="test {}".format(suffix),
        description="test desc {}".format(suffix),
        href=(
            "http://example.com/catalog_{}.json".format(suffix)
            if include_href
            else None
        ),
    )
Ejemplo n.º 12
0
 def test_resolved_self_href(self) -> None:
     catalog = pystac.Catalog(id="test", description="test desc")
     with TemporaryDirectory() as temporary_directory:
         catalog.normalize_and_save(temporary_directory)
         path = os.path.join(temporary_directory, "catalog.json")
         catalog = pystac.Catalog.from_file(path)
         link = catalog.get_single_link(pystac.RelType.SELF)
         assert link
         link.resolve_stac_object()
         self.assertEqual(link.get_absolute_href(), path)
Ejemplo n.º 13
0
 def test_produces_fallback_layout_for_catalog(self) -> None:
     fallback = BestPracticesLayoutStrategy()
     strategy = CustomLayoutStrategy(
         collection_func=self.get_custom_collection_func(),
         item_func=self.get_custom_item_func(),
         fallback_strategy=fallback,
     )
     cat = pystac.Catalog(id="test", description="test desc")
     href = strategy.get_href(cat, parent_dir="http://example.com")
     expected = fallback.get_href(cat, parent_dir="http://example.com")
     self.assertEqual(href, expected)
Ejemplo n.º 14
0
 def test_produces_fallback_layout_for_catalog(self) -> None:
     fallback = BestPracticesLayoutStrategy()
     strategy = TemplateLayoutStrategy(
         collection_template=self.TEST_COLLECTION_TEMPLATE,
         item_template=self.TEST_ITEM_TEMPLATE,
         fallback_strategy=fallback,
     )
     cat = pystac.Catalog(id="test", description="test desc")
     href = strategy.get_href(cat, parent_dir="http://example.com")
     expected = fallback.get_href(cat, parent_dir="http://example.com")
     self.assertEqual(href, expected)
Ejemplo n.º 15
0
async def test_returns_valid_links_in_collections(app_client, load_test_data):
    """Test links from listing collections"""
    in_json = load_test_data("test_collection.json")
    resp = await app_client.post(
        "/collections",
        json=in_json,
    )
    assert resp.status_code == 200

    # Get collection by ID
    resp = await app_client.get(f"/collections/{in_json['id']}")
    assert resp.status_code == 200
    resp_json = resp.json()

    # Mock root to allow validation
    mock_root = pystac.Catalog(
        id="test", description="test desc", href="https://example.com"
    )
    collection = pystac.Collection.from_dict(
        resp_json, root=mock_root, preserve_dict=False
    )
    assert collection.validate()

    # List collections
    resp = await app_client.get("/collections")
    assert resp.status_code == 200
    resp_json = resp.json()
    collections = resp_json["collections"]
    # Find collection in list by ID
    single_coll = next(coll for coll in collections if coll["id"] == in_json["id"])
    is_coll_from_list_valid = False
    single_coll_mocked_link = dict()
    if single_coll is not None:
        single_coll_mocked_link = pystac.Collection.from_dict(
            single_coll, root=mock_root, preserve_dict=False
        )
        is_coll_from_list_valid = single_coll_mocked_link.validate()

    assert is_coll_from_list_valid

    # Check links from the collection GET and list
    assert [
        i
        for i in collection.to_dict()["links"]
        if i not in single_coll_mocked_link.to_dict()["links"]
    ] == []
Ejemplo n.º 16
0
def main():
    register_s3_io()

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--usfimr-collection", default="s3://usfimr-data/collection.json"
    )
    parser.add_argument("--sar-catalog", required=True, type=str)
    parser.add_argument("--random-seed", default=42, type=int)
    args = parser.parse_args()

    usfimr_collection = pystac.Collection.from_file(args.usfimr_collection)
    sar_catalog = pystac.Catalog.from_file(args.sar_catalog)

    mldata_catalog = pystac.Catalog(
        "usfimr-s1-mldata", "MLData STAC Catalog for usfimr-s1 dataset"
    )

    image_items, labels_collection = collect_items(sar_catalog, usfimr_collection)

    training, testing, validation = train_test_val_split(image_items, 0.2, 0.2, random_state=args.random_seed)

    train_collection = pystac.Collection(
        "train", "train", usfimr_collection.extent
    )
    for t in training:
        train_collection.add_item(t)
    test_collection = pystac.Collection(
        "test", "test", usfimr_collection.extent
    )
    for t in testing:
        test_collection.add_item(t)
    val_collection = pystac.Collection(
        "validation", "validation", usfimr_collection.extent
    )
    for v in validation:
        val_collection.add_item(v)

    mldata_catalog.add_child(labels_collection)
    mldata_catalog.add_child(train_collection)
    mldata_catalog.add_child(test_collection)
    mldata_catalog.add_child(val_collection)

    mldata_catalog.normalize_and_save(
        "./data/mldata-catalog_seed{}".format(args.random_seed), catalog_type=pystac.CatalogType.SELF_CONTAINED
    )
Ejemplo n.º 17
0
def test_returns_valid_collection(app_client, load_test_data):
    """Test validates fetched collection with jsonschema"""
    test_collection = load_test_data("test_collection.json")
    resp = app_client.put("/collections", json=test_collection)
    assert resp.status_code == 200

    resp = app_client.get(f"/collections/{test_collection['id']}")
    assert resp.status_code == 200
    resp_json = resp.json()

    # Mock root to allow validation
    mock_root = pystac.Catalog(id="test",
                               description="test desc",
                               href="https://example.com")
    collection = pystac.Collection.from_dict(resp_json,
                                             root=mock_root,
                                             preserve_dict=False)
    collection.validate()
Ejemplo n.º 18
0
    def generate_subcatalogs(self, template, defaults=None, **kwargs):
        """Walks through the catalog and generates subcatalogs
        for items based on the template string. See :class:`~pystac.layout.LayoutTemplate`
        for details on the construction of template strings. This template string
        will be applied to the items, and subcatalogs will be created that separate
        and organize the items based on template values.

        Args:
            template (str):   A template string that
                can be consumed by a :class:`~pystac.layout.LayoutTemplate`
            defaults (dict):  Default values for the template variables
                that will be used if the property cannot be found on
                the item.

        Returns:
            [catalog]: List of new catalogs created
        """
        result = []
        for child in self.get_children():
            result.extend(
                child.generate_subcatalogs(template, defaults=defaults))

        layout_template = LayoutTemplate(template, defaults=defaults)
        subcat_id_to_cat = {}
        items = list(self.get_items())
        for item in items:
            item_parts = layout_template.get_template_values(item)
            curr_parent = self
            for k, v in item_parts.items():
                subcat_id = '{}'.format(v)
                subcat = subcat_id_to_cat.get(subcat_id)
                if subcat is None:
                    subcat_desc = 'Catalog of items from {} with {} of {}'.format(
                        curr_parent.id, k, v)
                    subcat = pystac.Catalog(id=subcat_id,
                                            description=subcat_desc)
                    curr_parent.add_child(subcat)
                    subcat_id_to_cat[subcat_id] = subcat
                    result.append(subcat)
                curr_parent = subcat
            self.remove_item(item.id)
            curr_parent.add_item(item)

        return result
Ejemplo n.º 19
0
def test_returns_valid_item(app_client, load_test_data):
    """Test validates fetched item with jsonschema"""
    test_item = load_test_data("test_item.json")
    resp = app_client.post(f"/collections/{test_item['collection']}/items",
                           json=test_item)
    assert resp.status_code == 200

    get_item = app_client.get(
        f"/collections/{test_item['collection']}/items/{test_item['id']}")
    assert get_item.status_code == 200
    item_dict = get_item.json()
    # Mock root to allow validation
    mock_root = pystac.Catalog(id="test",
                               description="test desc",
                               href="https://example.com")
    item = pystac.Item.from_dict(item_dict,
                                 preserve_dict=False,
                                 root=mock_root)
    item.validate()
Ejemplo n.º 20
0
    def catalog(self):
        """Check if catalog exists and create it otherwise."""
        if self.catalog_path is not None and self._catalog is None:
            if os.path.isfile(self.catalog_path):
                os.remove(self.catalog_path)
            if self.with_bbox:
                self._catalog = pystac.Collection(id="Sen2Like_catalog" if self.sid is None else self.sid,
                                                  title="Sen2Like Catalog" if self.title is None else self.title,
                                                  href=self.catalog_path,
                                                  description="Catalog containing Sen2Like generated products",
                                                  extent=pystac.Extent(pystac.SpatialExtent([180, -56, 180, 83]),
                                                                       pystac.TemporalExtent([None, None])))
            else:
                self._catalog = pystac.Catalog(id="Sen2Like_catalog" if self.sid is None else self.sid,
                                               title="Sen2Like Catalog" if self.title is None else self.title,
                                               href=self.catalog_path,
                                               description="Catalog containing Sen2Like generated products")

        return self._catalog
Ejemplo n.º 21
0
async def test_returns_valid_collection(app_client, load_test_data):
    """Test updating a collection which already exists"""
    in_json = load_test_data("test_collection.json")
    resp = await app_client.post(
        "/collections",
        json=in_json,
    )
    assert resp.status_code == 200

    resp = await app_client.get(f"/collections/{in_json['id']}")
    assert resp.status_code == 200
    resp_json = resp.json()

    # Mock root to allow validation
    mock_root = pystac.Catalog(id="test",
                               description="test desc",
                               href="https://example.com")
    collection = pystac.Collection.from_dict(resp_json,
                                             root=mock_root,
                                             preserve_dict=False)
    collection.validate()
Ejemplo n.º 22
0
def main():
    df = AvirisClassic.as_df("aviris-flight-lines.csv")
    collection = pystac.Collection(
        AvirisClassic.COLLECTION_NAME,
        AVIRIS_DESCRIPTION,
        pystac.Extent(
            spatial=pystac.SpatialExtent([[None, None, None, None]]),
            temporal=pystac.TemporalExtent(
                [[datetime(1970, 1, 1, tzinfo=timezone.utc), None]]),
        ),
    )
    stacframes.df_to(collection, df)

    df_ng = AvirisNg.as_df("aviris-ng-flight-lines.csv")
    collection_ng = pystac.Collection(
        AvirisNg.COLLECTION_NAME,
        AVIRIS_DESCRIPTION,
        pystac.Extent(
            spatial=pystac.SpatialExtent([[None, None, None, None]]),
            temporal=pystac.TemporalExtent(
                [[datetime(1970, 1, 1, tzinfo=timezone.utc), None]]),
        ),
    )
    stacframes.df_to(collection_ng, df_ng)

    # Normalize before validation to set all the required object links
    catalog = pystac.Catalog("aviris", AVIRIS_DESCRIPTION)
    catalog.add_child(collection)
    catalog.add_child(collection_ng)
    catalog_path = "./data/catalog"
    catalog.normalize_hrefs(catalog_path)
    logger.info("Validating catalog...")
    catalog.validate_all()
    logger.info("Saving catalog to {}...".format(catalog_path))
    catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED)
    logger.info("Done!")
Ejemplo n.º 23
0
 def test_from_dict_sets_root(self) -> None:
     param_dict = deepcopy(self.item_collection_dict)
     catalog = pystac.Catalog(id="test", description="test desc")
     item_collection = ItemCollection.from_dict(param_dict, root=catalog)
     for item in item_collection.items:
         self.assertEqual(item.get_root(), catalog)
Ejemplo n.º 24
0
 def test_produces_layout_for_child_catalog(self) -> None:
     cat = pystac.Catalog(id="test", description="test desc")
     href = self.strategy.get_href(cat, parent_dir="http://example.com")
     self.assertEqual(href, "http://example.com/test/catalog.json")
Ejemplo n.º 25
0
def create_catalog(suffix, include_href=True):
    return pystac.Catalog(
        id='test {}'.format(suffix),
        description='test desc {}'.format(suffix),
        href=('http://example.com/catalog_{}.json'.format(suffix)
              if include_href else None))
Ejemplo n.º 26
0
    def generate_subcatalogs(
        self,
        template: str,
        defaults: Optional[Dict[str, Any]] = None,
        parent_ids: Optional[List[str]] = None,
    ) -> List["Catalog"]:
        """Walks through the catalog and generates subcatalogs
        for items based on the template string.

        See :class:`~pystac.layout.LayoutTemplate`
        for details on the construction of template strings. This template string
        will be applied to the items, and subcatalogs will be created that separate
        and organize the items based on template values.

        Args:
            template :   A template string that
                can be consumed by a :class:`~pystac.layout.LayoutTemplate`
            defaults :  Default values for the template variables
                that will be used if the property cannot be found on
                the item.
            parent_ids : Optional list of the parent catalogs'
                identifiers. If the bottom-most subcatalogs already match the
                template, no subcatalog is added.

        Returns:
            [catalog]: List of new catalogs created
        """
        result: List[Catalog] = []
        parent_ids = parent_ids or list()
        parent_ids.append(self.id)
        for child in self.get_children():
            result.extend(
                child.generate_subcatalogs(
                    template, defaults=defaults, parent_ids=parent_ids.copy()
                )
            )

        layout_template = LayoutTemplate(template, defaults=defaults)

        keep_item_links: List[Link] = []
        item_links = [lk for lk in self.links if lk.rel == pystac.RelType.ITEM]
        for link in item_links:
            link.resolve_stac_object(root=self.get_root())
            item = cast(pystac.Item, link.target)
            subcat_ids = layout_template.substitute(item).split("/")
            id_iter = reversed(parent_ids)
            if all(
                ["{}".format(id) == next(id_iter, None) for id in reversed(subcat_ids)]
            ):
                # Skip items for which the sub-catalog structure already
                # matches the template. The list of parent IDs can include more
                # elements on the root side, so compare the reversed sequences.
                keep_item_links.append(link)
                continue
            curr_parent = self
            for subcat_id in subcat_ids:
                subcat = curr_parent.get_child(subcat_id)
                if subcat is None:
                    subcat_desc = "Catalog of items from {} with id {}".format(
                        curr_parent.id, subcat_id
                    )
                    subcat = pystac.Catalog(id=subcat_id, description=subcat_desc)
                    curr_parent.add_child(subcat)
                    result.append(subcat)
                curr_parent = subcat

            # resolve collection link so when added back points to correct location
            col_link = item.get_single_link(pystac.RelType.COLLECTION)
            if col_link is not None:
                col_link.resolve_stac_object()

            curr_parent.add_item(item)

        # keep only non-item links and item links that have not been moved elsewhere
        self.links = [
            lk for lk in self.links if lk.rel != pystac.RelType.ITEM
        ] + keep_item_links

        return result
Ejemplo n.º 27
0
def main():
    """ Pull Copernicus EU Rapid Mapping Activations data from the GeoRSS feed """
    sentinel_oauth_id = os.environ.get("SENTINELHUB_OAUTH_ID")
    sentinel_oauth_secret = os.environ.get("SENTINELHUB_OAUTH_SECRET")
    if sentinel_oauth_id is None:
        raise ValueError("Must set SENTINELHUB_OAUTH_ID")
    if sentinel_oauth_secret is None:
        raise ValueError("Must set SENTINELHUB_OAUTH_SECRET")

    events_xml_url = "https://emergency.copernicus.eu/mapping/activations-rapid/feed"
    events_xml_file = Path("./data/copernicus-rapid-mapping-activations.xml")
    if not events_xml_file.is_file():
        logger.info("Pulling {}...".format(events_xml_url))
        urlretrieve(events_xml_url, str(events_xml_file))

    event_xml_dir = Path("./data/event-xml")
    os.makedirs(event_xml_dir, exist_ok=True)

    # Generate a list of all unique CEMS products (combination of event, aoi,
    # monitoring type, revision and version) for all flood events in 2019 and 2020
    products = []
    events_root = ET.parse(events_xml_file).getroot()
    for event in events_root.iter("item"):
        category = event.find("category").text.strip().lower()
        if category != "flood":
            continue

        event_id = event.find("guid").text
        title = event.find("title").text
        rss_url = event.find("{http://www.iwg-sem.org/}activationRSS").text
        logger.info(title)

        description = event.find("description").text
        event_dts = re.findall(
            r"Date\/Time of Event \(UTC\):[</b>\s]*?(\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{2}:\d{2})",
            description,
            flags=re.MULTILINE,
        )
        if len(event_dts) != 1:
            logger.warning("{}: Available event date times {}".format(
                title, event_dts))
            raise AssertionError()
        event_datetime = datetime.strptime(
            event_dts[0], "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
        if event_datetime < datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc):
            continue

        event_country = event.find(
            "{http://www.iwg-sem.org/}activationAffectedCountries").text

        event_xml_file = Path(event_xml_dir, event_id).with_suffix(".xml")
        if not event_xml_file.is_file():
            logger.info("\tPulling {} GeoRSS: {}...".format(
                event_id, event_xml_file))
            urlretrieve(rss_url, event_xml_file)

        event_root = ET.parse(event_xml_file).getroot()

        for item in event_root.iter("item"):
            try:
                data_type = item.find("{http://www.gdacs.org/}cemsctype").text
            except AttributeError:
                data_type = ""
            try:
                product_type = item.find(
                    "{http://www.gdacs.org/}cemsptype").text
            except AttributeError:
                product_type = ""

            # Only care about downloading VECTOR data for Delineation product
            # More info at https://emergency.copernicus.eu/mapping/ems/rapid-mapping-portfolio
            if not (data_type == "VECTOR" and
                    (product_type == "DEL" or product_type == "GRA")):
                continue

            item_url = urlparse(item.find("link").text)
            _, _, product_id, version_id = item_url.path.lstrip("/").split("/")
            (
                product_event_id,
                aoi_id,
                product_type_id,
                monitoring_type,
                revision_id,
                data_type_id,
            ) = product_id.split("_")

            # Some sanity checks to ensure we've parsed our product id string correctly
            assert event_id == product_event_id
            assert product_type_id == product_type
            assert data_type_id == "VECTORS"

            georss_polygon = item.find(
                "{http://www.georss.org/georss}polygon").text
            # Split string, group number pairs, convert to float and swap pairs to lon first
            polygon = Polygon(
                map(
                    lambda x: (float(x[1]), float(x[0])),
                    grouper(georss_polygon.split(" "), 2),
                ))

            event_product = EventProduct(
                # Rebuild product_id from scratch because we need to include version
                "_".join([
                    event_id,
                    aoi_id,
                    product_type_id,
                    monitoring_type,
                    revision_id,
                    version_id,
                    data_type_id,
                ]),
                event_id,
                event_country,
                aoi_id,
                event_datetime.timestamp(),
                polygon,
                data_type_id,
                product_type_id,
                monitoring_type,
                revision_id,
                version_id,
                urlunparse(item_url),
            )
            products.append(event_product)

    df = gpd.GeoDataFrame(products)
    geojson_file = "./data/cems-rapid-mapping-flood-products-2019-2020.geojson"
    logger.info(
        "Writing GeoJSON of flood event products to {}".format(geojson_file))
    df.to_file(geojson_file, driver="GeoJSON")

    sentinel_session = get_session(sentinel_oauth_id, sentinel_oauth_secret)

    catalog = pystac.Catalog(
        "copernicus-rapid-mapping-floods-2019-2020",
        "Copernicus Rapid Mapping provisions geospatial information within hours or days from the activation in support of emergency management activities immediately following a disaster. Standardised mapping products are provided: e.g. to ascertain the situation before the event (reference product), to roughly identify and assess the most affected locations (first estimate product), assess the geographical extent of the event (delineation product) or to evaluate the intensity and scope of the damage resulting from the event (grading product). This catalog contains a subset of products for flood events from 2019-2020 that intersect with Sentinel 2 L2A Chips.",
        title="Copernicus Rapid Mapping Floods 2019-2020",
    )
    s2_collection = pystac.Collection(
        "Sentinel-2-L2A",
        "Sentinel 2 L2A images corresponding to CEMS rapid mapping floods",
        pystac.Extent(
            pystac.SpatialExtent([None, None, None, None]),
            pystac.TemporalExtent([(
                # TODO: Make this more specific by looping actual dts
                #       after ingest
                datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
                datetime(2020, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
            )]),
        ),
    )
    catalog.add_child(s2_collection)

    # Loop Products grouped by event id, lookup Sentinel 2 matches for each
    # Product, and create STAC Items in catalog for any matches
    sorted_products = sorted(products, key=lambda x: x.event_id)
    for event_id, event_products in groupby(sorted_products,
                                            key=lambda x: x.event_id):
        for p in event_products:
            event_datetime = datetime.fromtimestamp(p.event_time,
                                                    tz=timezone.utc)

            # Check for sentinel 2 results before anything else, so we
            # don't do unnecessary work. We'll use these results later
            # after we've created our STAC Item
            response = stac_search(
                p.geometry.bounds,
                "sentinel-2-l2a",
                event_datetime - timedelta(hours=12),
                event_datetime + timedelta(hours=12),
                sentinel_session,
            ).json()

            if len(response["features"]) < 1:
                logger.debug("No Sentinel 2 results for {}".format(
                    p.product_id))
                continue

            event_collection = catalog.get_child(event_id)
            if event_collection is None:
                event_collection = pystac.Collection(
                    event_id,
                    "",
                    pystac.Extent(
                        pystac.SpatialExtent([None, None, None, None]),
                        pystac.TemporalExtent([(event_datetime, None)]),
                    ),
                )
                catalog.add_child(event_collection)

            pystac_item = pystac.Item(
                p.product_id,
                mapping(p.geometry),
                p.geometry.bounds,
                event_datetime,
                properties={
                    "aoi_id": p.aoi_id,
                    "country": p.event_country,
                    "event_id": p.event_id,
                    "product_type": p.product_type,
                    "data_type": p.data_type,
                    "monitoring_type": p.monitoring_type,
                    "revision": p.revision,
                    "version": p.version,
                },
            )
            event_collection.add_item(pystac_item)
            url_link = pystac.Link("alternate",
                                   p.product_link,
                                   media_type="text/html")
            pystac_item.add_link(url_link)

            # Get or create Item in S2 collection for each match from
            # SentinelHub and add as links to our Product Item
            for feature in response["features"]:
                s2_item = s2_collection.get_item(feature["id"])
                if s2_item is None:
                    s2_item = pystac.Item.from_dict(feature)
                    s2_collection.add_item(s2_item)

                s2_link = pystac.Link(
                    "data", s2_item,
                    link_type=pystac.LinkType.RELATIVE).set_owner(pystac_item)
                pystac_item.add_link(s2_link)

            logger.info("Created STAC Item {} with {} Sentinel 2 links".format(
                p.product_id, len(response["features"])))

    # Set spatial extents
    for collection in catalog.get_children():
        if not isinstance(collection, pystac.Collection):
            continue
        bounds = GeometryCollection(
            [shape(s.geometry) for s in collection.get_all_items()]).bounds
        collection.extent.spatial = pystac.SpatialExtent(bounds)

    catalog_root = "./data/catalog"
    logger.info("Writing STAC Catalog to {}...".format(catalog_root))
    catalog.normalize_and_save(catalog_root, pystac.CatalogType.SELF_CONTAINED)
Ejemplo n.º 28
0
 def test_from_dict_set_root(self) -> None:
     item_dict = self.get_example_item_dict()
     catalog = pystac.Catalog(id="test", description="test desc")
     item = Item.from_dict(item_dict, root=catalog)
     self.assertIs(item.get_root(), catalog)