def handle_root(dataset_prefix: str) -> None: """Handle writing a new dataset to the root catalog""" results = S3_CLIENT.list_objects( Bucket=ResourceName.STORAGE_BUCKET_NAME.value, Prefix=CATALOG_KEY) # create root catalog if it doesn't exist if CONTENTS_KEY in results: root_catalog = Catalog.from_file( f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{CATALOG_KEY}" ) else: root_catalog = Catalog( id=ROOT_CATALOG_ID, title=ROOT_CATALOG_TITLE, description=ROOT_CATALOG_DESCRIPTION, catalog_type=CatalogType.SELF_CONTAINED, ) root_catalog.set_self_href( f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{CATALOG_KEY}" ) dataset_path = f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{dataset_prefix}" dataset_catalog = Catalog.from_file(f"{dataset_path}/{CATALOG_KEY}") root_catalog.add_child(dataset_catalog, strategy=GeostoreSTACLayoutStrategy()) root_catalog.normalize_hrefs( f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}", strategy=GeostoreSTACLayoutStrategy(), ) root_catalog.save(catalog_type=CatalogType.SELF_CONTAINED)
def test_full_copy_2(self): with TemporaryDirectory() as tmp_dir: cat = Catalog(id='test', description='test catalog') image_item = Item(id='Imagery', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={}) for key in ['ortho', 'dsm']: image_item.add_asset( key, Asset(href='some/{}.tif'.format(key), media_type=MediaType.GEOTIFF)) label_item = LabelItem( id='Labels', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={}, label_description='labels', label_type='vector', label_properties='label', label_classes=[LabelClasses(classes=['one', 'two'], name='label')], label_tasks=['classification']) label_item.add_source(image_item, assets=['ortho']) cat.add_items([image_item, label_item]) cat.normalize_hrefs(os.path.join(tmp_dir, 'catalog-full-copy-2-source')) cat.save(catalog_type=CatalogType.ABSOLUTE_PUBLISHED) cat2 = cat.full_copy() cat2.normalize_hrefs(os.path.join(tmp_dir, 'catalog-full-copy-2-dest')) cat2.save(catalog_type=CatalogType.ABSOLUTE_PUBLISHED) self.check_catalog(cat, 'source') self.check_catalog(cat2, 'dest')
def test_determine_type_for_unknown(self): catalog = Catalog(id='test', description='test desc') subcat = Catalog(id='subcat', description='subcat desc') catalog.add_child(subcat) catalog.normalize_hrefs('http://example.com') d = catalog.to_dict(include_self_link=False) self.assertIsNone(CatalogType.determine_type(d))
def create_catalog_command(destination, source, id, quiet): """Creates a relative published 3DEP catalog in DESTINATION. If SOURCE is not provided, will use the metadata in AWS. SOURCE is expected to be a directory tree mirroring the structure on USGS, so it is best created using `stac threedep download-metadata`. """ base_ids = id # not sure how to rename arguments in click collections = {} items = {} for product in PRODUCTS: items[product] = [] if base_ids: ids = base_ids else: ids = utils.fetch_ids(product) for id in ids: item = stac.create_item_from_product_and_id( product, id, source) items[product].append(item) if not quiet: print(item.id) extent = Extent.from_items(items[product]) if product == "1": title = "1 arc-second" description = "USGS 3DEP 1 arc-second DEMs" elif product == "13": title = "1/3 arc-second" description = "USGS 3DEP 1/3 arc-second DEMs" else: raise NotImplementedError collection = Collection( id=f"{USGS_3DEP_ID}-{product}", title=title, keywords=["USGS", "3DEP", "NED", "DEM", "elevation"], providers=[USGS_PROVIDER], description=description, extent=extent, license="PDDL-1.0") collections[product] = collection catalog = Catalog(id=USGS_3DEP_ID, description=DESCRIPTION, title="USGS 3DEP DEMs", catalog_type=CatalogType.RELATIVE_PUBLISHED) for product, collection in collections.items(): catalog.add_child(collection) collection.add_items(items[product]) catalog.generate_subcatalogs("${threedep:region}") catalog.normalize_hrefs(destination) catalog.save() catalog.validate()
def test_full_copy_1(self): with TemporaryDirectory() as tmp_dir: cat = Catalog(id='test', description='test catalog') item = Item(id='test_item', geometry=RANDOM_GEOM, bbox=RANDOM_BBOX, datetime=datetime.utcnow(), properties={}) cat.add_item(item) cat.normalize_hrefs(os.path.join(tmp_dir, 'catalog-full-copy-1-source')) cat2 = cat.full_copy() cat2.normalize_hrefs(os.path.join(tmp_dir, 'catalog-full-copy-1-dest')) self.check_catalog(cat, 'source') self.check_catalog(cat2, 'dest')
def do_test( self, catalog: pystac.Catalog, catalog_type: pystac.CatalogType ) -> None: with tempfile.TemporaryDirectory() as tmp_dir: catalog.normalize_hrefs(tmp_dir) self.validate_catalog(catalog) catalog.save(catalog_type=catalog_type) root_href = catalog.self_href self.validate_link_types(root_href, catalog_type) for parent, _, items in catalog.walk(): if issubclass(type(parent), Collection): stac_object_type = pystac.STACObjectType.COLLECTION else: stac_object_type = pystac.STACObjectType.CATALOG self.validate_file(parent.self_href, stac_object_type) for item in items: self.validate_file(item.self_href, pystac.STACObjectType.ITEM)
def create_dataset(body: JsonObject) -> JsonObject: """POST: Create Dataset.""" body_schema = { "type": "object", "properties": { TITLE_KEY: { "type": "string", "pattern": TITLE_PATTERN }, DESCRIPTION_KEY: { "type": "string" }, }, "required": [TITLE_KEY, DESCRIPTION_KEY], } # request body validation try: validate(body, body_schema) except ValidationError as err: return error_response(HTTPStatus.BAD_REQUEST, err.message) # check for duplicate type/title datasets_model_class = datasets_model_with_meta() dataset_title = body[TITLE_KEY] if datasets_model_class.datasets_title_idx.count(hash_key=dataset_title): return error_response(HTTPStatus.CONFLICT, f"dataset '{dataset_title}' already exists") # create dataset dataset = datasets_model_class(title=dataset_title) dataset.save() dataset.refresh(consistent_read=True) # create dataset catalog dataset_catalog = Catalog( **{ STAC_ID_KEY: dataset.dataset_prefix, STAC_DESCRIPTION_KEY: body[DESCRIPTION_KEY], STAC_TITLE_KEY: dataset_title, }, catalog_type=CatalogType.SELF_CONTAINED, ) dataset_catalog.normalize_hrefs( f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{dataset.dataset_prefix}" ) dataset_catalog.save() # add reference to root catalog SQS_RESOURCE.get_queue_by_name(QueueName=get_param( ParameterName.UPDATE_CATALOG_MESSAGE_QUEUE_NAME)).send_message( MessageBody=dataset.dataset_prefix, MessageAttributes={ MESSAGE_ATTRIBUTE_TYPE_KEY: MessageAttributeValueTypeDef( DataType=DATA_TYPE_STRING, StringValue=MESSAGE_ATTRIBUTE_TYPE_ROOT) }, ) # return response resp_body = dataset.as_dict() return success_response(HTTPStatus.CREATED, resp_body)
def main(): parser = make_parser() args = parser.parse_args() valid_set = set() with open(args.valid_csv) as csvfile: for row in csv.reader(csvfile): name = row[0].split("/")[1] name = "_".join(name.split("_")[0:-1]) valid_set.add(name) test_sets = [] any_test_set = set() for test_csv in args.test_csvs: test_set = set() with open(test_csv) as csvfile: for row in csv.reader(csvfile): name = row[0].split("/")[1] name = "_".join(name.split("_")[0:-1]) test_set.add(name) any_test_set.add(name) test_sets.append(test_set) def yes_validation(item): id = item.id id = "_".join(id.split("_")[0:-1]) return id in valid_set and "Bolivia" not in item.id def yes_test_i(i, item): id = item.id id = "_".join(id.split("_")[0:-1]) return id in test_sets[i] def yes_any_test(item): id = item.id id = "_".join(id.split("_")[0:-1]) return id in any_test_set def yes_training(item): return (not yes_any_test(item) and not yes_validation(item) and "Bolivia" not in item.id) catalog = Catalog.from_file("./data/catalog/catalog.json") experiment = args.experiment label_collection_id = EXPERIMENT[experiment] label_collection = catalog.get_child(label_collection_id) test_label_collection_id = EXPERIMENT["hand"] test_label_collection = catalog.get_child(test_label_collection_id) # Top-Level mldata_catalog = Catalog( "{}_mldata".format(experiment), "Training/Validation/Test split for {} experiment in sen1floods11". format(experiment), ) # Training Imagery and Labels training_imagery_collection = Collection("training_imagery", "training items for experiment", label_collection.extent) training_labels_collection = Collection( "training_labels", "labels for scenes in the training collection", label_collection.extent, ) training_label_items = [ i.clone() for i in label_collection.get_items() if yes_training(i) ] mldata_catalog.add_child(training_labels_collection) training_labels_collection.add_items( [i.clone() for i in label_collection.get_items() if yes_training(i)]) mldata_catalog.add_child(training_imagery_collection) training_imagery_items = np.array(list(map( mapper, training_label_items))).flatten() training_imagery_collection.add_items(training_imagery_items) print("Added {} items to training catalog".format( len(training_label_items))) # Validation Imagery and Labels validation_imagery_collection = Collection( "validation_imagery", "validation items for experiment", test_label_collection.extent, ) validation_labels_collection = Collection( "validation_labels", "labels for scenes in the validation collection", test_label_collection.extent, ) validation_label_items = [ i.clone() for i in test_label_collection.get_items() if yes_validation(i) ] mldata_catalog.add_child(validation_labels_collection) validation_labels_collection.add_items( [i.clone() for i in label_collection.get_items() if yes_validation(i)]) mldata_catalog.add_child(validation_imagery_collection) validation_imagery_items = np.array( list(map(mapper, validation_label_items))).flatten() validation_imagery_collection.add_items(validation_imagery_items) print("Added {} items to validation catalog".format( len(validation_label_items))) # Test Imagery and Labels for i in range(len(test_sets)): test_imagery_collection = Collection( "test_imagery_{}".format(i), "test items for experiment", test_label_collection.extent, ) test_labels_collection = Collection( "test_labels_{}".format(i), "labels for scenes in the test collection", test_label_collection.extent, ) test_label_items = [ j.clone() for j in test_label_collection.get_items() if yes_test_i(i, j) ] mldata_catalog.add_child(test_labels_collection) test_labels_collection.add_items([ j.clone() for j in label_collection.get_items() if yes_test_i(i, j) ]) mldata_catalog.add_child(test_imagery_collection) test_imagery_items = np.array(list(map(mapper, test_label_items))).flatten() test_imagery_collection.add_items(test_imagery_items) print("Added {} items to test catalog {}".format( len(test_label_items), i)) print("Saving catalog...") mldata_catalog.normalize_hrefs("./data/mldata_{}".format(experiment)) mldata_catalog.save(CatalogType.SELF_CONTAINED)