def test_determine_type_for_unknown(self): catalog = Catalog(id='test', description='test desc') subcat = Catalog(id='subcat', description='subcat desc') catalog.add_child(subcat) catalog.normalize_hrefs('http://example.com') d = catalog.to_dict(include_self_link=False) self.assertIsNone(CatalogType.determine_type(d))
def test_map_items_multiple_2(self): catalog = Catalog(id='test-1', description='Test1') item1 = Item(id='item1', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={}) item1.add_asset('ortho', Asset(href='/some/ortho.tif')) catalog.add_item(item1) kitten = Catalog(id='test-kitten', description='A cuter version of catalog') catalog.add_child(kitten) item2 = Item(id='item2', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={}) item2.add_asset('ortho', Asset(href='/some/other/ortho.tif')) kitten.add_item(item2) def modify_item_title(item): item.title = 'Some new title' return item def create_label_item(item): # Assumes the GEOJSON labels are in the # same location as the image img_href = item.assets['ortho'].href label_href = '{}.geojson'.format(os.path.splitext(img_href)[0]) label_item = Item(id='Labels', geometry=item.geometry, bbox=item.bbox, datetime=datetime.utcnow(), properties={}) label_item.ext.enable(Extensions.LABEL) label_ext = label_item.ext.label label_ext.apply(label_description='labels', label_type='vector', label_properties=['label'], label_classes=[ LabelClasses.create(classes=['one', 'two'], name='label') ], label_tasks=['classification']) label_ext.add_source(item, assets=['ortho']) label_ext.add_geojson_labels(label_href) return [item, label_item] c = catalog.map_items(modify_item_title) c = c.map_items(create_label_item) new_catalog = c items = new_catalog.get_all_items() self.assertTrue(len(list(items)) == 4)
def __init__(self, old_catalog_path: str = None, new_path: str = None) -> None: if old_catalog_path is None: if new_path is None: new_path = "" self.path: str = new_path self.root_catalog: Catalog = Catalog(id="GisSpot-root-catalog", title="GisSpot-root-catalog", description="Root catalog on GisSpot server") else: old_catalog_path = normalize_stac_path(old_catalog_path) print(old_catalog_path) stac_obj = read_file(old_catalog_path) if type(stac_obj) is Catalog: self.root_catalog: Catalog = stac_obj else: raise TypeError("old_catalog_path must be path to STAC catalog") if new_path is None: self.path: str = self.root_catalog.get_self_href() else: self.path: str = new_path
def main(ctx, ndvi_threshold, ndwi_threshold, pre_event, post_event): dump(ctx) os.environ["PREFIX"] = "/opt/anaconda/envs/env_burned_area" os.environ["PROJ_LIB"] = os.path.join(os.environ["PREFIX"], "share/proj") os.environ["GDAL_DATA"] = os.path.join(os.environ["PREFIX"], "share/gdal") burned_area_item = burned( pre_item=get_item(os.path.join(pre_event, "catalog.json")), post_item=get_item(os.path.join(post_event, "catalog.json")), ndvi_threshold=ndvi_threshold, ndwi_threshold=ndwi_threshold, ) logging.info("Output catalog") catalog = Catalog(id="catalog", description="Results") catalog.clear_items() catalog.clear_children() catalog.add_items([burned_area_item]) catalog.describe() catalog.normalize_and_save(root_href="./", catalog_type=CatalogType.SELF_CONTAINED)
def test_full_copy_2(self): with TemporaryDirectory() as tmp_dir: cat = Catalog(id='test', description='test catalog') image_item = Item(id='Imagery', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={}) for key in ['ortho', 'dsm']: image_item.add_asset( key, Asset(href='some/{}.tif'.format(key), media_type=MediaType.GEOTIFF)) label_item = LabelItem( id='Labels', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={}, label_description='labels', label_type='vector', label_properties='label', label_classes=[LabelClasses(classes=['one', 'two'], name='label')], label_tasks=['classification']) label_item.add_source(image_item, assets=['ortho']) cat.add_items([image_item, label_item]) cat.normalize_hrefs(os.path.join(tmp_dir, 'catalog-full-copy-2-source')) cat.save(catalog_type=CatalogType.ABSOLUTE_PUBLISHED) cat2 = cat.full_copy() cat2.normalize_hrefs(os.path.join(tmp_dir, 'catalog-full-copy-2-dest')) cat2.save(catalog_type=CatalogType.ABSOLUTE_PUBLISHED) self.check_catalog(cat, 'source') self.check_catalog(cat2, 'dest')
def handle_root(dataset_prefix: str) -> None: """Handle writing a new dataset to the root catalog""" results = S3_CLIENT.list_objects( Bucket=ResourceName.STORAGE_BUCKET_NAME.value, Prefix=CATALOG_KEY) # create root catalog if it doesn't exist if CONTENTS_KEY in results: root_catalog = Catalog.from_file( f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{CATALOG_KEY}" ) else: root_catalog = Catalog( id=ROOT_CATALOG_ID, title=ROOT_CATALOG_TITLE, description=ROOT_CATALOG_DESCRIPTION, catalog_type=CatalogType.SELF_CONTAINED, ) root_catalog.set_self_href( f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{CATALOG_KEY}" ) dataset_path = f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{dataset_prefix}" dataset_catalog = Catalog.from_file(f"{dataset_path}/{CATALOG_KEY}") root_catalog.add_child(dataset_catalog, strategy=GeostoreSTACLayoutStrategy()) root_catalog.normalize_hrefs( f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}", strategy=GeostoreSTACLayoutStrategy(), ) root_catalog.save(catalog_type=CatalogType.SELF_CONTAINED)
def main(data_dir, input_references, store_username, store_apikey): if store_username is not None: os.environ['STAGEIN_USERNAME'] = store_username os.environ['STAGEIN_PASSWORD'] = store_apikey STAC_IO.read_text_method = my_read_method items = [] for input_reference in input_references: thing = pystac.read_file(input_reference) if isinstance(thing, pystac.item.Item): items.append(thing) elif isinstance(thing, pystac.catalog.Catalog): for item in thing.get_items(): items.append(item) # create catalog catalog = Catalog(id='catalog', description='staged STAC catalog') catalog.add_items(items) catalog.normalize_and_save(root_href=data_dir, catalog_type=CatalogType.RELATIVE_PUBLISHED) catalog.describe()
def test_case_3(): root_cat = Catalog(id='test3', description='test case 3 catalog', title='test case 3 title') image_item = Item(id='imagery-item', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={}) image_item.add_asset('ortho', Asset(href='some/geotiff.tiff', media_type=MediaType.GEOTIFF)) overviews = [LabelOverview('label', counts=[LabelCount('one', 1), LabelCount('two', 2)])] label_item = LabelItem(id='label-items', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={}, label_description='ML Labels', label_type='vector', label_properties=['label'], label_classes=[LabelClasses(classes=['one', 'two'], name='label')], label_tasks=['classification'], label_methods=['manual'], label_overviews=overviews) label_item.add_source(image_item, assets=['ortho']) root_cat.add_item(image_item) root_cat.add_item(label_item) return root_cat
def main(ctx, input_path): # dump the CWL and params (if requested) dump(ctx) if 'TMPDIR' in os.environ: os.chdir(os.environ['TMPDIR']) logging.info(os.path.join(input_path, 'catalog.json')) item = get_item(os.path.join(input_path, 'catalog.json')) output_dir = f'{item.id}' calibrator = Calibrator() item_out = calibrator.calibrate(item) logging.info('STAC') cat = Catalog(id='catalog', description="Calibrated sar product") cat.add_items([item_out]) cat.normalize_and_save(root_href='./', catalog_type=CatalogType.SELF_CONTAINED) logging.info('Done!') #os.mkdir(output_dir) sys.exit(0)
def setUp(self): self.workdir = mkdtemp() self.inputdir = mkdtemp() self.catalog = Catalog('test-id', 'test catalog') self.catalog.normalize_and_save(self.inputdir, CatalogType.SELF_CONTAINED) self.config = config_fixture() print(self.config)
def test_items_with_no_input_source_raise_exceptions(self): catalog = Catalog('0', 'Catalog 0') catalog.add_item( Item('1', None, [0, 0, 1, 1], '2020-01-01T00:00:00.000Z', {})) adapter = AdapterTester(Message(full_message), catalog, config=self.config) self.assertRaises(RuntimeError, adapter.invoke)
def test_getattribute_overload(self): catalog = Catalog(id='test', description='test') self.assertEqual(ExtensionIndex.__name__, 'ExtensionIndex') self.assertRaises(ExtensionError, catalog.ext.__getattr__, 'foo') self.assertRaises(ExtensionError, catalog.ext.__getattr__, 'eo') catalog.ext.enable('single-file-stac') self.assertTrue( catalog.ext.__getattr__('single-file-stac'), pystac.extensions.single_file_stac.SingleFileSTACCatalogExt)
def test_clear_children_sets_parent_and_root_to_None(self): catalog = Catalog(id='test', description='test') subcat1 = Catalog(id='subcat', description='test') subcat2 = Catalog(id='subcat2', description='test2') catalog.add_children([subcat1, subcat2]) self.assertIsNotNone(subcat1.get_parent()) self.assertIsNotNone(subcat2.get_parent()) self.assertIsNotNone(subcat1.get_root()) self.assertIsNotNone(subcat2.get_root()) children = list(catalog.get_children()) self.assertEqual(len(children), 2) catalog.clear_children() self.assertIsNone(subcat1.get_parent()) self.assertIsNone(subcat2.get_parent()) self.assertIsNone(subcat1.get_root()) self.assertIsNone(subcat2.get_root())
def test_case_3() -> Catalog: root_cat = Catalog(id="test3", description="test case 3 catalog", title="test case 3 title") image_item = Item( id="imagery-item", geometry=ARBITRARY_GEOM, bbox=ARBITRARY_BBOX, datetime=datetime.utcnow(), properties={}, ) image_item.add_asset( "ortho", Asset(href="some/geotiff.tiff", media_type=MediaType.GEOTIFF)) overviews = [ LabelOverview.create( "label", counts=[ LabelCount.create("one", 1), LabelCount.create("two", 2) ], ) ] label_item = Item( id="label-items", geometry=ARBITRARY_GEOM, bbox=ARBITRARY_BBOX, datetime=datetime.utcnow(), properties={}, ) LabelExtension.add_to(label_item) label_ext = LabelExtension.ext(label_item) label_ext.apply( label_description="ML Labels", label_type=LabelType.VECTOR, label_properties=["label"], label_classes=[ LabelClasses.create(classes=["one", "two"], name="label") ], label_tasks=["classification"], label_methods=["manual"], label_overviews=overviews, ) label_ext.add_source(image_item, assets=["ortho"]) root_cat.add_item(image_item) root_cat.add_item(label_item) return root_cat
def get_root_catalog() -> Dict: """Get Cirrus root catalog from s3 Returns: Dict: STAC root catalog """ if s3().exists(ROOT_URL): cat = Catalog.from_file(ROOT_URL) else: catid = DATA_BUCKET.split('-data-')[0] cat = Catalog(id=catid, description=DESCRIPTION) logger.debug(f"Fetched {cat.describe()}") return cat
def create_catalog_command(destination, source, id, quiet): """Creates a relative published 3DEP catalog in DESTINATION. If SOURCE is not provided, will use the metadata in AWS. SOURCE is expected to be a directory tree mirroring the structure on USGS, so it is best created using `stac threedep download-metadata`. """ base_ids = id # not sure how to rename arguments in click collections = {} items = {} for product in PRODUCTS: items[product] = [] if base_ids: ids = base_ids else: ids = utils.fetch_ids(product) for id in ids: item = stac.create_item_from_product_and_id( product, id, source) items[product].append(item) if not quiet: print(item.id) extent = Extent.from_items(items[product]) if product == "1": title = "1 arc-second" description = "USGS 3DEP 1 arc-second DEMs" elif product == "13": title = "1/3 arc-second" description = "USGS 3DEP 1/3 arc-second DEMs" else: raise NotImplementedError collection = Collection( id=f"{USGS_3DEP_ID}-{product}", title=title, keywords=["USGS", "3DEP", "NED", "DEM", "elevation"], providers=[USGS_PROVIDER], description=description, extent=extent, license="PDDL-1.0") collections[product] = collection catalog = Catalog(id=USGS_3DEP_ID, description=DESCRIPTION, title="USGS 3DEP DEMs", catalog_type=CatalogType.RELATIVE_PUBLISHED) for product, collection in collections.items(): catalog.add_child(collection) collection.add_items(items[product]) catalog.generate_subcatalogs("${threedep:region}") catalog.normalize_hrefs(destination) catalog.save() catalog.validate()
def test_clear_items_removes_from_cache(self): catalog = Catalog(id='test', description='test') subcat = Catalog(id='subcat', description='test') catalog.add_child(subcat) item = Item(id='test-item', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={'key': 'one'}) subcat.add_item(item) items = list(catalog.get_all_items()) self.assertEqual(len(items), 1) self.assertEqual(items[0].properties['key'], 'one') subcat.clear_items() item = Item(id='test-item', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={'key': 'two'}) subcat.add_item(item) items = list(catalog.get_all_items()) self.assertEqual(len(items), 1) self.assertEqual(items[0].properties['key'], 'two') subcat.remove_item('test-item') item = Item(id='test-item', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={'key': 'three'}) subcat.add_item(item) items = list(catalog.get_all_items()) self.assertEqual(len(items), 1) self.assertEqual(items[0].properties['key'], 'three')
def test_invocation_recurses_subcatalogs(self): catalog = Catalog('0', 'Catalog 0') catalog.add_link( Link('harmony_source', 'http://example.com/C0001-EXAMPLE')) catalog.add_child(Catalog('1a', 'Catalog 1a')) subcatalog = Catalog('1b', 'Catalog 1b') catalog.add_child(subcatalog) subsubcatalog_a = Catalog('2a', 'Catalog 2a') subsubcatalog_b = Catalog('2b', 'Catalog 2b') subsubcatalog_b.add_link( Link('harmony_source', 'http://example.com/C0002-EXAMPLE')) subcatalog.add_children([subsubcatalog_a, subsubcatalog_b]) message = Message(full_message) items_a = [ Item('3', None, [0, 0, 1, 3], '2020-01-01T00:00:00.000Z', {}), Item('4', None, [0, 0, 1, 4], '2020-01-01T00:00:00.000Z', {}) ] items_b = [ Item('5', None, [0, 0, 1, 5], '2020-01-01T00:00:00.000Z', {}), Item('6', None, [0, 0, 1, 6], '2020-01-01T00:00:00.000Z', {}) ] subsubcatalog_a.add_items(items_a) subsubcatalog_b.add_items(items_b) adapter = AdapterTester(message, catalog, config=self.config) adapter.invoke() self.assertEqual(AdapterTester.process_args[0][0].bbox, items_a[0].bbox) self.assertEqual(AdapterTester.process_args[1][0].bbox, items_a[1].bbox) self.assertEqual(AdapterTester.process_args[2][0].bbox, items_b[0].bbox) self.assertEqual(AdapterTester.process_args[3][0].bbox, items_b[1].bbox) self.assertEqual(AdapterTester.process_args[0][1], message.sources[0]) self.assertEqual(AdapterTester.process_args[1][1], message.sources[0]) self.assertEqual(AdapterTester.process_args[2][1], message.sources[1]) self.assertEqual(AdapterTester.process_args[3][1], message.sources[1])
def test_clear_children_removes_from_cache(self): catalog = Catalog(id='test', description='test') subcat = Catalog(id='subcat', description='test') catalog.add_child(subcat) children = list(catalog.get_children()) self.assertEqual(len(children), 1) self.assertEqual(children[0].description, 'test') catalog.clear_children() subcat = Catalog(id='subcat', description='test2') catalog.add_child(subcat) children = list(catalog.get_children()) self.assertEqual(len(children), 1) self.assertEqual(children[0].description, 'test2') catalog.remove_child('subcat') subcat = Catalog(id='subcat', description='test3') catalog.add_child(subcat) children = list(catalog.get_children()) self.assertEqual(len(children), 1) self.assertEqual(children[0].description, 'test3')
def get_root_catalog(): """Get Cirrus root catalog from s3 Returns: Dict: STAC root catalog """ caturl = f"{ROOT_URL}/catalog.json" if s3().exists(caturl): cat = Catalog.from_file(caturl) else: catid = DATA_BUCKET.split('-data-')[0] cat = Catalog(id=catid, description=DESCRIPTION) cat.normalize_and_save(ROOT_URL, CatalogType.ABSOLUTE_PUBLISHED) logger.debug(f"Fetched {cat.describe()}") return cat
def test_altered_ids_are_retained(self): catalog = Catalog('0', 'Catalog 0') catalog.add_link( Link('harmony_source', 'http://example.com/C0001-EXAMPLE')) message = Message(full_message) items = [ Item('mutate-me', None, [0, 0, 1, 1], '2020-01-01T00:00:00.000Z', {}), Item('2', None, [0, 0, 1, 1], '2020-01-01T00:00:00.000Z', {}) ] catalog.add_items(items) adapter = AdapterTester(message, catalog, config=self.config) (message, out_catalog) = adapter.invoke() out_items = [item for item in out_catalog.get_items()] self.assertEqual(out_items[0].id, 'i-mutated-you')
def test_invocation_processes_items_with_sources(self): catalog = Catalog('0', 'Catalog 0') catalog.add_link( Link('harmony_source', 'http://example.com/C0001-EXAMPLE')) message = Message(full_message) items = [ Item('1', None, [0, 0, 1, 1], '2020-01-01T00:00:00.000Z', {}), Item('2', None, [0, 0, 1, 2], '2020-01-01T00:00:00.000Z', {}) ] catalog.add_items(items) adapter = AdapterTester(message, catalog, config=self.config) adapter.invoke() self.assertEqual(AdapterTester.process_args[0][0].bbox, items[0].bbox) self.assertEqual(AdapterTester.process_args[1][0].bbox, items[1].bbox) self.assertEqual(AdapterTester.process_args[0][1], message.sources[0]) self.assertEqual(AdapterTester.process_args[1][1], message.sources[0])
def test_unaltered_ids_are_assigned_new_uuids(self): catalog = Catalog('0', 'Catalog 0') catalog.add_link( Link('harmony_source', 'http://example.com/C0001-EXAMPLE')) message = Message(full_message) items = [ Item('1', None, [0, 0, 1, 1], '2020-01-01T00:00:00.000Z', {}), Item('2', None, [0, 0, 1, 1], '2020-01-01T00:00:00.000Z', {}) ] catalog.add_items(items) adapter = AdapterTester(message, catalog, config=self.config) (message, out_catalog) = adapter.invoke() self.assertNotEqual(out_catalog.id, catalog.id) out_items = [item for item in out_catalog.get_items()] self.assertNotEqual(out_items[0].id, items[0].id) self.assertNotEqual(out_items[1].id, items[1].id)
def test_full_copy_1(self): with TemporaryDirectory() as tmp_dir: cat = Catalog(id='test', description='test catalog') item = Item(id='test_item', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={}) cat.add_item(item) cat.normalize_hrefs(os.path.join(tmp_dir, 'catalog-full-copy-1-source')) cat2 = cat.full_copy() cat2.normalize_hrefs(os.path.join(tmp_dir, 'catalog-full-copy-1-dest')) self.check_catalog(cat, 'source') self.check_catalog(cat2, 'dest')
def stage(input_references): STAC_IO.read_text_method = my_read_method catalogs = [] for index, input_reference in enumerate(input_references): items = [] thing = read_file(input_reference) if isinstance(thing, Item): items.append(thing) elif isinstance(thing, Catalog): for item in thing.get_items(): items.append(item) # create catalog catalog = Catalog(id=items[0].id, description='staged STAC catalog with {}'.format(items[0].id)) catalog.add_items(items) catalog.normalize_and_save(root_href=items[0].id, catalog_type=CatalogType.RELATIVE_PUBLISHED) catalog.describe() catalogs.append(os.path.dirname(catalog.get_self_href())) return catalogs
def create_dataset(body: JsonObject) -> JsonObject: """POST: Create Dataset.""" body_schema = { "type": "object", "properties": { TITLE_KEY: { "type": "string", "pattern": TITLE_PATTERN }, DESCRIPTION_KEY: { "type": "string" }, }, "required": [TITLE_KEY, DESCRIPTION_KEY], } # request body validation try: validate(body, body_schema) except ValidationError as err: return error_response(HTTPStatus.BAD_REQUEST, err.message) # check for duplicate type/title datasets_model_class = datasets_model_with_meta() dataset_title = body[TITLE_KEY] if datasets_model_class.datasets_title_idx.count(hash_key=dataset_title): return error_response(HTTPStatus.CONFLICT, f"dataset '{dataset_title}' already exists") # create dataset dataset = datasets_model_class(title=dataset_title) dataset.save() dataset.refresh(consistent_read=True) # create dataset catalog dataset_catalog = Catalog( **{ STAC_ID_KEY: dataset.dataset_prefix, STAC_DESCRIPTION_KEY: body[DESCRIPTION_KEY], STAC_TITLE_KEY: dataset_title, }, catalog_type=CatalogType.SELF_CONTAINED, ) dataset_catalog.normalize_hrefs( f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{dataset.dataset_prefix}" ) dataset_catalog.save() # add reference to root catalog SQS_RESOURCE.get_queue_by_name(QueueName=get_param( ParameterName.UPDATE_CATALOG_MESSAGE_QUEUE_NAME)).send_message( MessageBody=dataset.dataset_prefix, MessageAttributes={ MESSAGE_ATTRIBUTE_TYPE_KEY: MessageAttributeValueTypeDef( DataType=DATA_TYPE_STRING, StringValue=MESSAGE_ATTRIBUTE_TYPE_ROOT) }, ) # return response resp_body = dataset.as_dict() return success_response(HTTPStatus.CREATED, resp_body)
args = parser.parse_args() parsed_s3_path = urlparse(args.imagery_root_s3) bucket = parsed_s3_path.netloc s3 = boto3.resource("s3") bucket = s3.Bucket(bucket) prefix = parsed_s3_path.path.lstrip("/") filtered_objects = bucket.objects.filter(Prefix=prefix) catalog_description = ( "Sentinel-1 imagery corresponding to flood events catalogued within GLOFIMR" ) catalog_title = "GLOFIMR SAR Imagery" catalog = Catalog("glofimr-sar", catalog_description, title=catalog_title) # We know the IDs used here, they are derived from the incrementing ID from the GLOFIMR shapefile # ftp://guest:[email protected]/USFIMR/USFIMR_all.zip flood_data = {"1": [], "2": [], "3": [], "15": [], "16": []} flood_ids = set(flood_data.keys()) for filtered_obj in filtered_objects: flood_id, *_ = filtered_obj.key.split("/") if flood_id in flood_ids: flood_data[flood_id].append(filtered_obj.Object()) subcollections = [] for flood_id, objects in flood_data.items(): aggregate_bounds = None # these objects are ultimately assets that we'd like to group by tile ID, we do that here
def main(): """ # The Data 446 qc'ed chips containing flood events, hand-labeled flood classifications 4385 non-qc'ed chips containing water exported only with sentinel 1 and 2 flood classifications # The Catalog Outline ** We want to generate a root catalog that is all, or only training, or only validation items ** ^^^ Script should support this - Root Catalog - Collection: Sentinel 1 data chips - Item: The Item - Collection: Sentinel 2 data chips - Item: The Item - Collection: Sentinel 1 weak labels - Item: The Item - Collection: Sentinel 2 weak labels - Item: The Item - Collection: Hand labels - Item: The Item - Collection: Permanent water labels - Item: The Item - Collection: Traditional otsu algo labels - Item: The Item ## Alternate catalog structure This structure was considered but rejected in the interest of facilitating collections for each of the label datasets. - Root Catalog - Collection: Sentinel 1 - Catalog: Country - Catalog: Event ID (Note: Catalog will always have the first item. Then it will either have the second item or all the others depending on which dir the first item came from) - Item: (dir: S1 + S1_NoQC) Sentinel 1 data chip - Item: (dir: S1Flood_NoQC) Labels from "weak" classification algorithm applied to S1 - Item: (dir: QC_v2) Labels from hand classification (ORed with item below) - Item: (dir: S1Flood) Labels from traditional Otsu algorithm - Item: (dir: Perm) Labels from perm water dataset (this is a Byte tiff, only 1 or 0 for yes or no perm water) - Collection: Sentinel 2 - Catalog: Country - Catalog: Event ID - Item: (dir: S2 + S2_NoQC) Sentinel 2 data chip - Item: (dir: S2Flood) Labels from traditional Otsu algorithm applied to S2 - Collection: PermJRC - Catalog: Lat 10 - Catalog: Lon 10 - Item: (dir: PermJRC) """ parser = argparse.ArgumentParser( description="Build STAC Catalog for sen1floods11") parser.add_argument("--debug", action="store_true") args = parser.parse_args() debug = args.debug storage = S3Storage("sen1floods11-data") catalog_description = "Bonafilia, D., Tellman, B., Anderson, T., Issenberg, E. 2020. Sen1Floods11: a georeferenced dataset to train and test deep learning flood algorithms for Sentinel-1. The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, 2020, pp. 210-211. Available Open access at: http://openaccess.thecvf.com/content_CVPRW_2020/html/w11/Bonafilia_Sen1Floods11_A_Georeferenced_Dataset_to_Train_and_Test_Deep_Learning_CVPRW_2020_paper.html" # noqa: E501 catalog_title = "A georeferenced dataset to train and test deep learning flood algorithms for Sentinel-1" # noqa: E501 catalog = Catalog("sen1floods11", catalog_description, title=catalog_title) print("Created Catalog {}".format(catalog.id)) # Build Sentinel 1 Collection sentinel1 = Collection( "S1", "Sentinel-1 GRD Chips overlapping labeled data. IW mode, GRD product. See https://developers.google.com/earth-engine/sentinel1 for information on preprocessing", # noqa: E501 extent=Extent(SpatialExtent([None, None, None, None]), None), ) collection_add_sentinel_chips(sentinel1, storage.ls("S1/"), "s1", debug=debug) collection_add_sentinel_chips(sentinel1, storage.ls("S1_NoQC/"), "s1", debug=debug) collection_update_extents(sentinel1) catalog.add_child(sentinel1) # Build Sentinel 2 Collection sentinel2 = Collection( "S2", "Sentinel-2 MSI L1C chips overlapping labeled data. Contains all spectral bands (1 - 12). Does not contain QA mask.", # noqa: E501 extent=Extent(SpatialExtent([None, None, None, None]), None), ) collection_add_sentinel_chips(sentinel2, storage.ls("S2/"), "s2", debug=debug) collection_add_sentinel_chips(sentinel2, storage.ls("S2_NoQC/"), "s2", debug=debug) collection_update_extents(sentinel2) catalog.add_child(sentinel2) # Build S1 Weak Labels Collection s1weak_labels = Collection( "S1Flood_NoQC", "Chips of water/nowater labels derived from standard OTSU thresholding of Sentinel-1 VH band overlapping weakly-labeled data.", # noqa: E501 extent=Extent(SpatialExtent([None, None, None, None]), None), stac_extensions=[Extensions.LABEL], ) label_collection_add_items( s1weak_labels, catalog, storage.ls("S1Flood_NoQC/"), sentinel1_links_func, "0: Not Water. 1: Water.", LabelType.RASTER, label_classes=[LabelClasses([0, 1])], label_tasks=["classification"], debug=debug, ) collection_update_extents(s1weak_labels) catalog.add_child(s1weak_labels) # Build S2 Weak Labels Collection s2weak_labels = Collection( "NoQC", "Weakly-labeled chips derived from traditional Sentinel-2 Classification", # noqa: E501 extent=Extent(SpatialExtent([None, None, None, None]), None), stac_extensions=[Extensions.LABEL], ) label_collection_add_items( s2weak_labels, catalog, storage.ls("NoQC/"), sentinel2_links_func, "-1: No Data / Not Valid. 0: Not Water. 1: Water.", # noqa: E501 LabelType.RASTER, label_classes=[LabelClasses([-1, 0, 1])], label_tasks=["classification"], debug=debug, ) collection_update_extents(s2weak_labels) catalog.add_child(s2weak_labels) # Build Hand Labels Collection hand_labels = Collection( "QC_v2", "446 hand labeled chips of surface water from selected flood events", extent=Extent(SpatialExtent([None, None, None, None]), None), stac_extensions=[Extensions.LABEL], ) label_collection_add_items( hand_labels, catalog, storage.ls("QC_v2/"), sentinel1_sentinel2_links_func, "Hand labeled chips containing ground truth. -1: No Data / Not Valid. 0: Not Water. 1: Water.", # noqa: E501 LabelType.RASTER, label_classes=[LabelClasses([-1, 0, 1])], label_tasks=["classification"], debug=debug, ) collection_update_extents(hand_labels) catalog.add_child(hand_labels) # Build Permanent Labels collection permanent_labels = Collection( "Perm", "Permanent water chips generated from the 'transition' layer of the JRC (European Commission Joint Research Centre) dataset", # noqa: E501 extent=Extent(SpatialExtent([None, None, None, None]), None), stac_extensions=[Extensions.LABEL], ) label_collection_add_items( permanent_labels, catalog, storage.ls("Perm/"), lambda *_: [ ], # No easy way to map JRC source files to the label chips... "0: Not Water. 1: Water.", LabelType.RASTER, label_classes=[LabelClasses([0, 1])], label_tasks=["classification"], debug=debug, ) collection_update_extents(permanent_labels) catalog.add_child(permanent_labels) # Build Otsu algorithm Labels collection otsu_labels = Collection( "S1Flood", "Chips of water/nowater derived from standard OTSU thresholding of Sentinel-1 VH band overlapping labeled data", # noqa: E501 extent=Extent(SpatialExtent([None, None, None, None]), None), stac_extensions=[Extensions.LABEL], ) label_collection_add_items( otsu_labels, catalog, storage.ls("S1Flood/"), sentinel1_links_func, "0: Not Water. 1: Water.", LabelType.RASTER, label_classes=[LabelClasses([0, 1])], label_tasks=["classification"], debug=debug, ) collection_update_extents(otsu_labels) catalog.add_child(otsu_labels) # Save Complete Catalog root_path = "./catalog" catalog.normalize_and_save(root_path, catalog_type=CatalogType.SELF_CONTAINED) print("Saved STAC Catalog {} to {}...".format(catalog.id, root_path))
def main(ctx, input_reference, s_expression, cbn): dump(ctx) item = get_item(os.path.join(input_reference, "catalog.json")) logging.info(f"Processing {item.id}") try: os.mkdir(item.id) except FileExistsError: pass cbn = cbn.replace(' ', '-') result = os.path.join(item.id, f"{cbn}.tif") logging.info(f"Apply {s_expression} to {item.id}") apply_s_expression(item=item, s_expression=s_expression, out_tif=result) logging.info("STAC") item_out = Item( id=item.id, geometry=item.geometry, bbox=item.bbox, datetime=item.datetime, properties=item.properties, stac_extensions=item.stac_extensions, ) eo_item = extensions.eo.EOItemExt(item_out) asset_properties = dict() asset_properties["s-expression"] = s_expression asset = Asset( href=os.path.basename(result), media_type=MediaType.COG, roles=["data"], properties=asset_properties, ) eo_bands = [ extensions.eo.Band.create( name=cbn.lower(), common_name=cbn.lower(), description=f"{cbn.lower()} ({s_expression})", ) ] eo_item.set_bands(eo_bands, asset=asset) item_out.add_asset(key=cbn.lower(), asset=asset) logging.info("STAC") cat = Catalog(id="catalog", description="s-expression") cat.add_items([item_out]) cat.normalize_and_save(root_href="./", catalog_type=CatalogType.SELF_CONTAINED) logging.info("Done!")
from pystac import ( STAC_IO, Catalog, Collection, Extent, Link, Provider, SpatialExtent, TemporalExtent, ) SPOT_SENSOR = {"S4": "SPOT 4", "S5": "SPOT 5"} GeobaseCatalog = Catalog( id="Geobase", description="STAC Catalog for Geobase", title=None, stac_extensions=None ) SpotProviders = [ Provider( "Government of Canada", "Natural Resources Canada Centre for Topographic Information", ["licensor", "processor"], "www.geobase.ca", ), Provider("Sparkgeo", "*****@*****.**", ["processor", "host"], "www.sparkgeo.com"), Provider( "PCI Geomatics", "*****@*****.**", ["processor", "host"], "www.pcigeomatics.com" ), ]