Exemple #1
0
def handle_root(dataset_prefix: str) -> None:
    """Handle writing a new dataset to the root catalog"""
    results = S3_CLIENT.list_objects(
        Bucket=ResourceName.STORAGE_BUCKET_NAME.value, Prefix=CATALOG_KEY)

    # create root catalog if it doesn't exist
    if CONTENTS_KEY in results:
        root_catalog = Catalog.from_file(
            f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{CATALOG_KEY}"
        )

    else:
        root_catalog = Catalog(
            id=ROOT_CATALOG_ID,
            title=ROOT_CATALOG_TITLE,
            description=ROOT_CATALOG_DESCRIPTION,
            catalog_type=CatalogType.SELF_CONTAINED,
        )
        root_catalog.set_self_href(
            f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{CATALOG_KEY}"
        )

    dataset_path = f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{dataset_prefix}"
    dataset_catalog = Catalog.from_file(f"{dataset_path}/{CATALOG_KEY}")

    root_catalog.add_child(dataset_catalog,
                           strategy=GeostoreSTACLayoutStrategy())
    root_catalog.normalize_hrefs(
        f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}",
        strategy=GeostoreSTACLayoutStrategy(),
    )

    root_catalog.save(catalog_type=CatalogType.SELF_CONTAINED)
Exemple #2
0
    def test_full_copy_2(self):
        with TemporaryDirectory() as tmp_dir:
            cat = Catalog(id='test', description='test catalog')
            image_item = Item(id='Imagery',
                              geometry=RANDOM_GEOM,
                              bbox=RANDOM_BBOX,
                              datetime=datetime.utcnow(),
                              properties={})
            for key in ['ortho', 'dsm']:
                image_item.add_asset(
                    key, Asset(href='some/{}.tif'.format(key), media_type=MediaType.GEOTIFF))

            label_item = LabelItem(
                id='Labels',
                geometry=RANDOM_GEOM,
                bbox=RANDOM_BBOX,
                datetime=datetime.utcnow(),
                properties={},
                label_description='labels',
                label_type='vector',
                label_properties='label',
                label_classes=[LabelClasses(classes=['one', 'two'], name='label')],
                label_tasks=['classification'])
            label_item.add_source(image_item, assets=['ortho'])

            cat.add_items([image_item, label_item])

            cat.normalize_hrefs(os.path.join(tmp_dir, 'catalog-full-copy-2-source'))
            cat.save(catalog_type=CatalogType.ABSOLUTE_PUBLISHED)
            cat2 = cat.full_copy()
            cat2.normalize_hrefs(os.path.join(tmp_dir, 'catalog-full-copy-2-dest'))
            cat2.save(catalog_type=CatalogType.ABSOLUTE_PUBLISHED)

            self.check_catalog(cat, 'source')
            self.check_catalog(cat2, 'dest')
Exemple #3
0
    def create_catalog_command(destination, source, id, quiet):
        """Creates a relative published 3DEP catalog in DESTINATION.

        If SOURCE is not provided, will use the metadata in AWS. SOURCE is
        expected to be a directory tree mirroring the structure on USGS, so
        it is best created using `stac threedep download-metadata`.
        """
        base_ids = id  # not sure how to rename arguments in click
        collections = {}
        items = {}
        for product in PRODUCTS:
            items[product] = []
            if base_ids:
                ids = base_ids
            else:
                ids = utils.fetch_ids(product)
            for id in ids:
                item = stac.create_item_from_product_and_id(
                    product, id, source)
                items[product].append(item)
                if not quiet:
                    print(item.id)
            extent = Extent.from_items(items[product])
            if product == "1":
                title = "1 arc-second"
                description = "USGS 3DEP 1 arc-second DEMs"
            elif product == "13":
                title = "1/3 arc-second"
                description = "USGS 3DEP 1/3 arc-second DEMs"
            else:
                raise NotImplementedError
            collection = Collection(
                id=f"{USGS_3DEP_ID}-{product}",
                title=title,
                keywords=["USGS", "3DEP", "NED", "DEM", "elevation"],
                providers=[USGS_PROVIDER],
                description=description,
                extent=extent,
                license="PDDL-1.0")
            collections[product] = collection
        catalog = Catalog(id=USGS_3DEP_ID,
                          description=DESCRIPTION,
                          title="USGS 3DEP DEMs",
                          catalog_type=CatalogType.RELATIVE_PUBLISHED)
        for product, collection in collections.items():
            catalog.add_child(collection)
            collection.add_items(items[product])
        catalog.generate_subcatalogs("${threedep:region}")
        catalog.normalize_hrefs(destination)
        catalog.save()
        catalog.validate()
Exemple #4
0
    def do_test(
        self, catalog: pystac.Catalog, catalog_type: pystac.CatalogType
    ) -> None:
        with tempfile.TemporaryDirectory() as tmp_dir:
            catalog.normalize_hrefs(tmp_dir)
            self.validate_catalog(catalog)

            catalog.save(catalog_type=catalog_type)

            root_href = catalog.self_href
            self.validate_link_types(root_href, catalog_type)

            for parent, _, items in catalog.walk():
                if issubclass(type(parent), Collection):
                    stac_object_type = pystac.STACObjectType.COLLECTION
                else:
                    stac_object_type = pystac.STACObjectType.CATALOG
                self.validate_file(parent.self_href, stac_object_type)

                for item in items:
                    self.validate_file(item.self_href, pystac.STACObjectType.ITEM)
Exemple #5
0
def create_dataset(body: JsonObject) -> JsonObject:
    """POST: Create Dataset."""

    body_schema = {
        "type": "object",
        "properties": {
            TITLE_KEY: {
                "type": "string",
                "pattern": TITLE_PATTERN
            },
            DESCRIPTION_KEY: {
                "type": "string"
            },
        },
        "required": [TITLE_KEY, DESCRIPTION_KEY],
    }

    # request body validation
    try:
        validate(body, body_schema)
    except ValidationError as err:
        return error_response(HTTPStatus.BAD_REQUEST, err.message)

    # check for duplicate type/title
    datasets_model_class = datasets_model_with_meta()
    dataset_title = body[TITLE_KEY]
    if datasets_model_class.datasets_title_idx.count(hash_key=dataset_title):
        return error_response(HTTPStatus.CONFLICT,
                              f"dataset '{dataset_title}' already exists")

    # create dataset
    dataset = datasets_model_class(title=dataset_title)
    dataset.save()
    dataset.refresh(consistent_read=True)

    # create dataset catalog
    dataset_catalog = Catalog(
        **{
            STAC_ID_KEY: dataset.dataset_prefix,
            STAC_DESCRIPTION_KEY: body[DESCRIPTION_KEY],
            STAC_TITLE_KEY: dataset_title,
        },
        catalog_type=CatalogType.SELF_CONTAINED,
    )
    dataset_catalog.normalize_hrefs(
        f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{dataset.dataset_prefix}"
    )
    dataset_catalog.save()

    # add reference to root catalog
    SQS_RESOURCE.get_queue_by_name(QueueName=get_param(
        ParameterName.UPDATE_CATALOG_MESSAGE_QUEUE_NAME)).send_message(
            MessageBody=dataset.dataset_prefix,
            MessageAttributes={
                MESSAGE_ATTRIBUTE_TYPE_KEY:
                MessageAttributeValueTypeDef(
                    DataType=DATA_TYPE_STRING,
                    StringValue=MESSAGE_ATTRIBUTE_TYPE_ROOT)
            },
        )

    # return response
    resp_body = dataset.as_dict()

    return success_response(HTTPStatus.CREATED, resp_body)
def main():
    parser = make_parser()
    args = parser.parse_args()

    valid_set = set()
    with open(args.valid_csv) as csvfile:
        for row in csv.reader(csvfile):
            name = row[0].split("/")[1]
            name = "_".join(name.split("_")[0:-1])
            valid_set.add(name)

    test_sets = []
    any_test_set = set()
    for test_csv in args.test_csvs:
        test_set = set()
        with open(test_csv) as csvfile:
            for row in csv.reader(csvfile):
                name = row[0].split("/")[1]
                name = "_".join(name.split("_")[0:-1])
                test_set.add(name)
                any_test_set.add(name)
        test_sets.append(test_set)

    def yes_validation(item):
        id = item.id
        id = "_".join(id.split("_")[0:-1])
        return id in valid_set and "Bolivia" not in item.id

    def yes_test_i(i, item):
        id = item.id
        id = "_".join(id.split("_")[0:-1])
        return id in test_sets[i]

    def yes_any_test(item):
        id = item.id
        id = "_".join(id.split("_")[0:-1])
        return id in any_test_set

    def yes_training(item):
        return (not yes_any_test(item) and not yes_validation(item)
                and "Bolivia" not in item.id)

    catalog = Catalog.from_file("./data/catalog/catalog.json")

    experiment = args.experiment
    label_collection_id = EXPERIMENT[experiment]
    label_collection = catalog.get_child(label_collection_id)
    test_label_collection_id = EXPERIMENT["hand"]
    test_label_collection = catalog.get_child(test_label_collection_id)

    # Top-Level
    mldata_catalog = Catalog(
        "{}_mldata".format(experiment),
        "Training/Validation/Test split for {} experiment in sen1floods11".
        format(experiment),
    )

    # Training Imagery and Labels
    training_imagery_collection = Collection("training_imagery",
                                             "training items for experiment",
                                             label_collection.extent)
    training_labels_collection = Collection(
        "training_labels",
        "labels for scenes in the training collection",
        label_collection.extent,
    )
    training_label_items = [
        i.clone() for i in label_collection.get_items() if yes_training(i)
    ]
    mldata_catalog.add_child(training_labels_collection)
    training_labels_collection.add_items(
        [i.clone() for i in label_collection.get_items() if yes_training(i)])
    mldata_catalog.add_child(training_imagery_collection)
    training_imagery_items = np.array(list(map(
        mapper, training_label_items))).flatten()
    training_imagery_collection.add_items(training_imagery_items)
    print("Added {} items to training catalog".format(
        len(training_label_items)))

    # Validation Imagery and Labels
    validation_imagery_collection = Collection(
        "validation_imagery",
        "validation items for experiment",
        test_label_collection.extent,
    )
    validation_labels_collection = Collection(
        "validation_labels",
        "labels for scenes in the validation collection",
        test_label_collection.extent,
    )
    validation_label_items = [
        i.clone() for i in test_label_collection.get_items()
        if yes_validation(i)
    ]
    mldata_catalog.add_child(validation_labels_collection)
    validation_labels_collection.add_items(
        [i.clone() for i in label_collection.get_items() if yes_validation(i)])
    mldata_catalog.add_child(validation_imagery_collection)
    validation_imagery_items = np.array(
        list(map(mapper, validation_label_items))).flatten()
    validation_imagery_collection.add_items(validation_imagery_items)
    print("Added {} items to validation catalog".format(
        len(validation_label_items)))

    # Test Imagery and Labels
    for i in range(len(test_sets)):
        test_imagery_collection = Collection(
            "test_imagery_{}".format(i),
            "test items for experiment",
            test_label_collection.extent,
        )
        test_labels_collection = Collection(
            "test_labels_{}".format(i),
            "labels for scenes in the test collection",
            test_label_collection.extent,
        )
        test_label_items = [
            j.clone() for j in test_label_collection.get_items()
            if yes_test_i(i, j)
        ]
        mldata_catalog.add_child(test_labels_collection)
        test_labels_collection.add_items([
            j.clone() for j in label_collection.get_items()
            if yes_test_i(i, j)
        ])
        mldata_catalog.add_child(test_imagery_collection)
        test_imagery_items = np.array(list(map(mapper,
                                               test_label_items))).flatten()
        test_imagery_collection.add_items(test_imagery_items)
        print("Added {} items to test catalog {}".format(
            len(test_label_items), i))

    print("Saving catalog...")
    mldata_catalog.normalize_hrefs("./data/mldata_{}".format(experiment))
    mldata_catalog.save(CatalogType.SELF_CONTAINED)