Ejemplo n.º 1
0
def handle_root(dataset_prefix: str) -> None:
    """Handle writing a new dataset to the root catalog"""
    results = S3_CLIENT.list_objects(
        Bucket=ResourceName.STORAGE_BUCKET_NAME.value, Prefix=CATALOG_KEY)

    # create root catalog if it doesn't exist
    if CONTENTS_KEY in results:
        root_catalog = Catalog.from_file(
            f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{CATALOG_KEY}"
        )

    else:
        root_catalog = Catalog(
            id=ROOT_CATALOG_ID,
            title=ROOT_CATALOG_TITLE,
            description=ROOT_CATALOG_DESCRIPTION,
            catalog_type=CatalogType.SELF_CONTAINED,
        )
        root_catalog.set_self_href(
            f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{CATALOG_KEY}"
        )

    dataset_path = f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/{dataset_prefix}"
    dataset_catalog = Catalog.from_file(f"{dataset_path}/{CATALOG_KEY}")

    root_catalog.add_child(dataset_catalog,
                           strategy=GeostoreSTACLayoutStrategy())
    root_catalog.normalize_hrefs(
        f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}",
        strategy=GeostoreSTACLayoutStrategy(),
    )

    root_catalog.save(catalog_type=CatalogType.SELF_CONTAINED)
Ejemplo n.º 2
0
    def test_determine_type_for_unknown(self):
        catalog = Catalog(id='test', description='test desc')
        subcat = Catalog(id='subcat', description='subcat desc')
        catalog.add_child(subcat)
        catalog.normalize_hrefs('http://example.com')
        d = catalog.to_dict(include_self_link=False)

        self.assertIsNone(CatalogType.determine_type(d))
Ejemplo n.º 3
0
    def test_map_items_multiple_2(self):
        catalog = Catalog(id='test-1', description='Test1')
        item1 = Item(id='item1',
                     geometry=RANDOM_GEOM,
                     bbox=RANDOM_BBOX,
                     datetime=datetime.utcnow(),
                     properties={})
        item1.add_asset('ortho', Asset(href='/some/ortho.tif'))
        catalog.add_item(item1)
        kitten = Catalog(id='test-kitten',
                         description='A cuter version of catalog')
        catalog.add_child(kitten)
        item2 = Item(id='item2',
                     geometry=RANDOM_GEOM,
                     bbox=RANDOM_BBOX,
                     datetime=datetime.utcnow(),
                     properties={})
        item2.add_asset('ortho', Asset(href='/some/other/ortho.tif'))
        kitten.add_item(item2)

        def modify_item_title(item):
            item.title = 'Some new title'
            return item

        def create_label_item(item):
            # Assumes the GEOJSON labels are in the
            # same location as the image
            img_href = item.assets['ortho'].href
            label_href = '{}.geojson'.format(os.path.splitext(img_href)[0])
            label_item = Item(id='Labels',
                              geometry=item.geometry,
                              bbox=item.bbox,
                              datetime=datetime.utcnow(),
                              properties={})
            label_item.ext.enable(Extensions.LABEL)
            label_ext = label_item.ext.label
            label_ext.apply(label_description='labels',
                            label_type='vector',
                            label_properties=['label'],
                            label_classes=[
                                LabelClasses.create(classes=['one', 'two'],
                                                    name='label')
                            ],
                            label_tasks=['classification'])
            label_ext.add_source(item, assets=['ortho'])
            label_ext.add_geojson_labels(label_href)

            return [item, label_item]

        c = catalog.map_items(modify_item_title)
        c = c.map_items(create_label_item)
        new_catalog = c

        items = new_catalog.get_all_items()
        self.assertTrue(len(list(items)) == 4)
Ejemplo n.º 4
0
    def create_catalog_command(destination, source, id, quiet):
        """Creates a relative published 3DEP catalog in DESTINATION.

        If SOURCE is not provided, will use the metadata in AWS. SOURCE is
        expected to be a directory tree mirroring the structure on USGS, so
        it is best created using `stac threedep download-metadata`.
        """
        base_ids = id  # not sure how to rename arguments in click
        collections = {}
        items = {}
        for product in PRODUCTS:
            items[product] = []
            if base_ids:
                ids = base_ids
            else:
                ids = utils.fetch_ids(product)
            for id in ids:
                item = stac.create_item_from_product_and_id(
                    product, id, source)
                items[product].append(item)
                if not quiet:
                    print(item.id)
            extent = Extent.from_items(items[product])
            if product == "1":
                title = "1 arc-second"
                description = "USGS 3DEP 1 arc-second DEMs"
            elif product == "13":
                title = "1/3 arc-second"
                description = "USGS 3DEP 1/3 arc-second DEMs"
            else:
                raise NotImplementedError
            collection = Collection(
                id=f"{USGS_3DEP_ID}-{product}",
                title=title,
                keywords=["USGS", "3DEP", "NED", "DEM", "elevation"],
                providers=[USGS_PROVIDER],
                description=description,
                extent=extent,
                license="PDDL-1.0")
            collections[product] = collection
        catalog = Catalog(id=USGS_3DEP_ID,
                          description=DESCRIPTION,
                          title="USGS 3DEP DEMs",
                          catalog_type=CatalogType.RELATIVE_PUBLISHED)
        for product, collection in collections.items():
            catalog.add_child(collection)
            collection.add_items(items[product])
        catalog.generate_subcatalogs("${threedep:region}")
        catalog.normalize_hrefs(destination)
        catalog.save()
        catalog.validate()
Ejemplo n.º 5
0
    def test_clear_items_removes_from_cache(self):
        catalog = Catalog(id='test', description='test')
        subcat = Catalog(id='subcat', description='test')
        catalog.add_child(subcat)
        item = Item(id='test-item',
                    geometry=RANDOM_GEOM,
                    bbox=RANDOM_BBOX,
                    datetime=datetime.utcnow(),
                    properties={'key': 'one'})
        subcat.add_item(item)

        items = list(catalog.get_all_items())
        self.assertEqual(len(items), 1)
        self.assertEqual(items[0].properties['key'], 'one')

        subcat.clear_items()
        item = Item(id='test-item',
                    geometry=RANDOM_GEOM,
                    bbox=RANDOM_BBOX,
                    datetime=datetime.utcnow(),
                    properties={'key': 'two'})
        subcat.add_item(item)

        items = list(catalog.get_all_items())
        self.assertEqual(len(items), 1)
        self.assertEqual(items[0].properties['key'], 'two')

        subcat.remove_item('test-item')
        item = Item(id='test-item',
                    geometry=RANDOM_GEOM,
                    bbox=RANDOM_BBOX,
                    datetime=datetime.utcnow(),
                    properties={'key': 'three'})
        subcat.add_item(item)

        items = list(catalog.get_all_items())
        self.assertEqual(len(items), 1)
        self.assertEqual(items[0].properties['key'], 'three')
Ejemplo n.º 6
0
    def test_invocation_recurses_subcatalogs(self):
        catalog = Catalog('0', 'Catalog 0')
        catalog.add_link(
            Link('harmony_source', 'http://example.com/C0001-EXAMPLE'))
        catalog.add_child(Catalog('1a', 'Catalog 1a'))
        subcatalog = Catalog('1b', 'Catalog 1b')
        catalog.add_child(subcatalog)
        subsubcatalog_a = Catalog('2a', 'Catalog 2a')
        subsubcatalog_b = Catalog('2b', 'Catalog 2b')
        subsubcatalog_b.add_link(
            Link('harmony_source', 'http://example.com/C0002-EXAMPLE'))
        subcatalog.add_children([subsubcatalog_a, subsubcatalog_b])

        message = Message(full_message)
        items_a = [
            Item('3', None, [0, 0, 1, 3], '2020-01-01T00:00:00.000Z', {}),
            Item('4', None, [0, 0, 1, 4], '2020-01-01T00:00:00.000Z', {})
        ]
        items_b = [
            Item('5', None, [0, 0, 1, 5], '2020-01-01T00:00:00.000Z', {}),
            Item('6', None, [0, 0, 1, 6], '2020-01-01T00:00:00.000Z', {})
        ]
        subsubcatalog_a.add_items(items_a)
        subsubcatalog_b.add_items(items_b)
        adapter = AdapterTester(message, catalog, config=self.config)
        adapter.invoke()
        self.assertEqual(AdapterTester.process_args[0][0].bbox,
                         items_a[0].bbox)
        self.assertEqual(AdapterTester.process_args[1][0].bbox,
                         items_a[1].bbox)
        self.assertEqual(AdapterTester.process_args[2][0].bbox,
                         items_b[0].bbox)
        self.assertEqual(AdapterTester.process_args[3][0].bbox,
                         items_b[1].bbox)
        self.assertEqual(AdapterTester.process_args[0][1], message.sources[0])
        self.assertEqual(AdapterTester.process_args[1][1], message.sources[0])
        self.assertEqual(AdapterTester.process_args[2][1], message.sources[1])
        self.assertEqual(AdapterTester.process_args[3][1], message.sources[1])
Ejemplo n.º 7
0
    def test_clear_children_removes_from_cache(self):
        catalog = Catalog(id='test', description='test')
        subcat = Catalog(id='subcat', description='test')
        catalog.add_child(subcat)

        children = list(catalog.get_children())
        self.assertEqual(len(children), 1)
        self.assertEqual(children[0].description, 'test')

        catalog.clear_children()
        subcat = Catalog(id='subcat', description='test2')
        catalog.add_child(subcat)

        children = list(catalog.get_children())
        self.assertEqual(len(children), 1)
        self.assertEqual(children[0].description, 'test2')

        catalog.remove_child('subcat')
        subcat = Catalog(id='subcat', description='test3')
        catalog.add_child(subcat)

        children = list(catalog.get_children())
        self.assertEqual(len(children), 1)
        self.assertEqual(children[0].description, 'test3')
def main():
    """

# The Data

446 qc'ed chips containing flood events, hand-labeled flood classifications
4385 non-qc'ed chips containing water exported only with sentinel 1 and 2 flood classifications

# The Catalog Outline

** We want to generate a root catalog that is all, or only training, or only validation items **
^^^ Script should support this

- Root Catalog
    - Collection: Sentinel 1 data chips
        - Item: The Item
    - Collection: Sentinel 2 data chips
        - Item: The Item
    - Collection: Sentinel 1 weak labels
        - Item: The Item
    - Collection: Sentinel 2 weak labels
        - Item: The Item
    - Collection: Hand labels
        - Item: The Item
    - Collection: Permanent water labels
        - Item: The Item
    - Collection: Traditional otsu algo labels
        - Item: The Item

## Alternate catalog structure

This structure was considered but rejected in the interest of facilitating collections for each
of the label datasets.

- Root Catalog
    - Collection: Sentinel 1
        - Catalog: Country
            - Catalog: Event ID
                (Note: Catalog will always have the first item. Then it will either have the second
                       item or all the others depending on which dir the first item came from)
                - Item: (dir: S1 + S1_NoQC) Sentinel 1 data chip
                - Item: (dir: S1Flood_NoQC) Labels from "weak" classification algorithm applied to S1
                - Item: (dir: QC_v2) Labels from hand classification (ORed with item below)
                - Item: (dir: S1Flood) Labels from traditional Otsu algorithm
                - Item: (dir: Perm) Labels from perm water dataset (this is a Byte tiff, only 1 or 0
                        for yes or no perm water)
    - Collection: Sentinel 2
        - Catalog: Country
            - Catalog: Event ID
                - Item: (dir: S2 + S2_NoQC) Sentinel 2 data chip
                - Item: (dir: S2Flood) Labels from traditional Otsu algorithm applied to S2
    - Collection: PermJRC
        - Catalog: Lat 10
            - Catalog: Lon 10
                - Item: (dir: PermJRC)
    """
    parser = argparse.ArgumentParser(
        description="Build STAC Catalog for sen1floods11")
    parser.add_argument("--debug", action="store_true")
    args = parser.parse_args()
    debug = args.debug

    storage = S3Storage("sen1floods11-data")

    catalog_description = "Bonafilia, D., Tellman, B., Anderson, T., Issenberg, E. 2020. Sen1Floods11: a georeferenced dataset to train and test deep learning flood algorithms for Sentinel-1. The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, 2020, pp. 210-211. Available Open access at: http://openaccess.thecvf.com/content_CVPRW_2020/html/w11/Bonafilia_Sen1Floods11_A_Georeferenced_Dataset_to_Train_and_Test_Deep_Learning_CVPRW_2020_paper.html"  # noqa: E501
    catalog_title = "A georeferenced dataset to train and test deep learning flood algorithms for Sentinel-1"  # noqa: E501

    catalog = Catalog("sen1floods11", catalog_description, title=catalog_title)
    print("Created Catalog {}".format(catalog.id))

    # Build Sentinel 1 Collection
    sentinel1 = Collection(
        "S1",
        "Sentinel-1 GRD Chips overlapping labeled data. IW mode, GRD product. See https://developers.google.com/earth-engine/sentinel1 for information on preprocessing",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
    )
    collection_add_sentinel_chips(sentinel1,
                                  storage.ls("S1/"),
                                  "s1",
                                  debug=debug)
    collection_add_sentinel_chips(sentinel1,
                                  storage.ls("S1_NoQC/"),
                                  "s1",
                                  debug=debug)
    collection_update_extents(sentinel1)
    catalog.add_child(sentinel1)

    # Build Sentinel 2 Collection
    sentinel2 = Collection(
        "S2",
        "Sentinel-2 MSI L1C chips overlapping labeled data. Contains all spectral bands (1 - 12). Does not contain QA mask.",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
    )
    collection_add_sentinel_chips(sentinel2,
                                  storage.ls("S2/"),
                                  "s2",
                                  debug=debug)
    collection_add_sentinel_chips(sentinel2,
                                  storage.ls("S2_NoQC/"),
                                  "s2",
                                  debug=debug)
    collection_update_extents(sentinel2)
    catalog.add_child(sentinel2)

    # Build S1 Weak Labels Collection
    s1weak_labels = Collection(
        "S1Flood_NoQC",
        "Chips of water/nowater labels derived from standard OTSU thresholding of Sentinel-1 VH band overlapping weakly-labeled data.",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
        stac_extensions=[Extensions.LABEL],
    )
    label_collection_add_items(
        s1weak_labels,
        catalog,
        storage.ls("S1Flood_NoQC/"),
        sentinel1_links_func,
        "0: Not Water. 1: Water.",
        LabelType.RASTER,
        label_classes=[LabelClasses([0, 1])],
        label_tasks=["classification"],
        debug=debug,
    )
    collection_update_extents(s1weak_labels)
    catalog.add_child(s1weak_labels)

    # Build S2 Weak Labels Collection
    s2weak_labels = Collection(
        "NoQC",
        "Weakly-labeled chips derived from traditional Sentinel-2 Classification",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
        stac_extensions=[Extensions.LABEL],
    )
    label_collection_add_items(
        s2weak_labels,
        catalog,
        storage.ls("NoQC/"),
        sentinel2_links_func,
        "-1: No Data / Not Valid. 0: Not Water. 1: Water.",  # noqa: E501
        LabelType.RASTER,
        label_classes=[LabelClasses([-1, 0, 1])],
        label_tasks=["classification"],
        debug=debug,
    )
    collection_update_extents(s2weak_labels)
    catalog.add_child(s2weak_labels)

    # Build Hand Labels Collection
    hand_labels = Collection(
        "QC_v2",
        "446 hand labeled chips of surface water from selected flood events",
        extent=Extent(SpatialExtent([None, None, None, None]), None),
        stac_extensions=[Extensions.LABEL],
    )
    label_collection_add_items(
        hand_labels,
        catalog,
        storage.ls("QC_v2/"),
        sentinel1_sentinel2_links_func,
        "Hand labeled chips containing ground truth. -1: No Data / Not Valid. 0: Not Water. 1: Water.",  # noqa: E501
        LabelType.RASTER,
        label_classes=[LabelClasses([-1, 0, 1])],
        label_tasks=["classification"],
        debug=debug,
    )
    collection_update_extents(hand_labels)
    catalog.add_child(hand_labels)

    # Build Permanent Labels collection
    permanent_labels = Collection(
        "Perm",
        "Permanent water chips generated from the 'transition' layer of the JRC (European Commission Joint Research Centre) dataset",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
        stac_extensions=[Extensions.LABEL],
    )
    label_collection_add_items(
        permanent_labels,
        catalog,
        storage.ls("Perm/"),
        lambda *_: [
        ],  # No easy way to map JRC source files to the label chips...
        "0: Not Water. 1: Water.",
        LabelType.RASTER,
        label_classes=[LabelClasses([0, 1])],
        label_tasks=["classification"],
        debug=debug,
    )
    collection_update_extents(permanent_labels)
    catalog.add_child(permanent_labels)

    # Build Otsu algorithm Labels collection
    otsu_labels = Collection(
        "S1Flood",
        "Chips of water/nowater derived from standard OTSU thresholding of Sentinel-1 VH band overlapping labeled data",  # noqa: E501
        extent=Extent(SpatialExtent([None, None, None, None]), None),
        stac_extensions=[Extensions.LABEL],
    )
    label_collection_add_items(
        otsu_labels,
        catalog,
        storage.ls("S1Flood/"),
        sentinel1_links_func,
        "0: Not Water. 1: Water.",
        LabelType.RASTER,
        label_classes=[LabelClasses([0, 1])],
        label_tasks=["classification"],
        debug=debug,
    )
    collection_update_extents(otsu_labels)
    catalog.add_child(otsu_labels)

    # Save Complete Catalog
    root_path = "./catalog"
    catalog.normalize_and_save(root_path,
                               catalog_type=CatalogType.SELF_CONTAINED)
    print("Saved STAC Catalog {} to {}...".format(catalog.id, root_path))
Ejemplo n.º 9
0
                datetime=start_time,
                properties={},
            )
            for asset in assets:
                image_item.add_asset(
                    asset.href.split("/")[-1].split(".")[0], asset)

            stac_items.append(image_item)
        aggregate_spatial_extent = SpatialExtent([[
            aggregate_bounds.bottom,
            aggregate_bounds.left,
            aggregate_bounds.top,
            aggregate_bounds.right,
        ]])
        aggregate_extent = Extent(aggregate_spatial_extent, temporal_extent)
        collection = Collection(
            flood_id,
            "Imagery coextensive with GLOFIMR flood {}".format(flood_id),
            extent=aggregate_extent,
        )
        for stac_item in stac_items:
            collection.add_item(stac_item)

        catalog.add_child(collection)

    # Save Complete Catalog
    root_path = "./data/catalog"
    catalog.normalize_and_save(root_path,
                               catalog_type=CatalogType.SELF_CONTAINED)
    print("Saved STAC Catalog {} to {}...".format(catalog.id, root_path))
Ejemplo n.º 10
0
def merge_all_items(source_catalog: pystac.Catalog,
                    target_catalog: pystac.Catalog,
                    move_assets: bool = False,
                    ignore_conflicts: bool = False,
                    as_child: bool = False,
                    child_folder: Optional[str] = None) -> pystac.Catalog:
    """Merge all items from source_catalog into target_catalog.

    Calls merge_items on any items that have the same ID between the two catalogs.
    Any items that don't exist in the taret_catalog will be added to the target_catalog.
    If the target_catalog is a Collection, it will be set as the collection of any
    new items.

    Args:
        source_catalog (Catalog or Collection): The catalog or collection that items
            will be drawn from to merge into the target catalog.
            This catalog is not mutated in this operation.
        target_item (Catalog or Collection): The target catalog that will be merged into.
            This catalog will not be mutated in this operation.
        move_assets (bool): If true, move the asset files alongside the target item.
        ignore_conflicts (bool): If True, assets with the same keys will not be merged,
            and asset files that would be moved to overwrite an existing file
            will not be moved. If False, either of these situations will throw an error.
        as_child (bool): If True, a child catalog will be added with the content of the
            source catalog. Otherwise, items will be added directly to the destination
            catalog.
        child_folder (str): name of the subfolder to use in case the as_child option is
            set to True. If None, the id of the catalog will be used as folder name.

    Returns:
        Catalog or Collection: The target_catalog
    """
    source_items = source_catalog.get_all_items()
    ids_to_items = {item.id: item for item in source_items}

    parent_dir = os.path.dirname(target_catalog.self_href)
    if as_child:
        child_dir = os.path.join(parent_dir, child_folder or source_catalog.id)
        copy_catalog(source_catalog, child_dir, source_catalog.catalog_type,
                     move_assets)
        child_catalog_path = os.path.join(
            child_dir, os.path.basename(source_catalog.self_href))
        new_source_catalog = pystac.read_file(child_catalog_path)
        if not isinstance(new_source_catalog, pystac.Catalog):
            raise ValueError(
                f"Child catalog {child_catalog_path} is not a STAC Catalog")
        source_catalog = new_source_catalog
        target_catalog.add_child(source_catalog, source_catalog.title)
    else:
        for item in target_catalog.get_all_items():
            source_item = ids_to_items.get(item.id)
            if source_item is not None:
                merge_items(source_item,
                            item,
                            move_assets=move_assets,
                            ignore_conflicts=ignore_conflicts)
                del ids_to_items[item.id]

        # Process source items that did not match existing target items
        layout_strategy = BestPracticesLayoutStrategy()
        for item in ids_to_items.values():
            item_copy = item.clone()
            item_copy.set_self_href(
                layout_strategy.get_item_href(item_copy, parent_dir))
            target_catalog.add_item(item_copy)

            if isinstance(target_catalog, pystac.Collection):
                item_copy.set_collection(target_catalog)
            else:
                item_copy.set_collection(None)

            if move_assets:
                do_move_assets(item_copy, copy=False)

    if isinstance(target_catalog, pystac.Collection):
        target_catalog.update_extent_from_items()

    return target_catalog
def main():
    parser = make_parser()
    args = parser.parse_args()

    valid_set = set()
    with open(args.valid_csv) as csvfile:
        for row in csv.reader(csvfile):
            name = row[0].split("/")[1]
            name = "_".join(name.split("_")[0:-1])
            valid_set.add(name)

    test_sets = []
    any_test_set = set()
    for test_csv in args.test_csvs:
        test_set = set()
        with open(test_csv) as csvfile:
            for row in csv.reader(csvfile):
                name = row[0].split("/")[1]
                name = "_".join(name.split("_")[0:-1])
                test_set.add(name)
                any_test_set.add(name)
        test_sets.append(test_set)

    def yes_validation(item):
        id = item.id
        id = "_".join(id.split("_")[0:-1])
        return id in valid_set and "Bolivia" not in item.id

    def yes_test_i(i, item):
        id = item.id
        id = "_".join(id.split("_")[0:-1])
        return id in test_sets[i]

    def yes_any_test(item):
        id = item.id
        id = "_".join(id.split("_")[0:-1])
        return id in any_test_set

    def yes_training(item):
        return (not yes_any_test(item) and not yes_validation(item)
                and "Bolivia" not in item.id)

    catalog = Catalog.from_file("./data/catalog/catalog.json")

    experiment = args.experiment
    label_collection_id = EXPERIMENT[experiment]
    label_collection = catalog.get_child(label_collection_id)
    test_label_collection_id = EXPERIMENT["hand"]
    test_label_collection = catalog.get_child(test_label_collection_id)

    # Top-Level
    mldata_catalog = Catalog(
        "{}_mldata".format(experiment),
        "Training/Validation/Test split for {} experiment in sen1floods11".
        format(experiment),
    )

    # Training Imagery and Labels
    training_imagery_collection = Collection("training_imagery",
                                             "training items for experiment",
                                             label_collection.extent)
    training_labels_collection = Collection(
        "training_labels",
        "labels for scenes in the training collection",
        label_collection.extent,
    )
    training_label_items = [
        i.clone() for i in label_collection.get_items() if yes_training(i)
    ]
    mldata_catalog.add_child(training_labels_collection)
    training_labels_collection.add_items(
        [i.clone() for i in label_collection.get_items() if yes_training(i)])
    mldata_catalog.add_child(training_imagery_collection)
    training_imagery_items = np.array(list(map(
        mapper, training_label_items))).flatten()
    training_imagery_collection.add_items(training_imagery_items)
    print("Added {} items to training catalog".format(
        len(training_label_items)))

    # Validation Imagery and Labels
    validation_imagery_collection = Collection(
        "validation_imagery",
        "validation items for experiment",
        test_label_collection.extent,
    )
    validation_labels_collection = Collection(
        "validation_labels",
        "labels for scenes in the validation collection",
        test_label_collection.extent,
    )
    validation_label_items = [
        i.clone() for i in test_label_collection.get_items()
        if yes_validation(i)
    ]
    mldata_catalog.add_child(validation_labels_collection)
    validation_labels_collection.add_items(
        [i.clone() for i in label_collection.get_items() if yes_validation(i)])
    mldata_catalog.add_child(validation_imagery_collection)
    validation_imagery_items = np.array(
        list(map(mapper, validation_label_items))).flatten()
    validation_imagery_collection.add_items(validation_imagery_items)
    print("Added {} items to validation catalog".format(
        len(validation_label_items)))

    # Test Imagery and Labels
    for i in range(len(test_sets)):
        test_imagery_collection = Collection(
            "test_imagery_{}".format(i),
            "test items for experiment",
            test_label_collection.extent,
        )
        test_labels_collection = Collection(
            "test_labels_{}".format(i),
            "labels for scenes in the test collection",
            test_label_collection.extent,
        )
        test_label_items = [
            j.clone() for j in test_label_collection.get_items()
            if yes_test_i(i, j)
        ]
        mldata_catalog.add_child(test_labels_collection)
        test_labels_collection.add_items([
            j.clone() for j in label_collection.get_items()
            if yes_test_i(i, j)
        ])
        mldata_catalog.add_child(test_imagery_collection)
        test_imagery_items = np.array(list(map(mapper,
                                               test_label_items))).flatten()
        test_imagery_collection.add_items(test_imagery_items)
        print("Added {} items to test catalog {}".format(
            len(test_label_items), i))

    print("Saving catalog...")
    mldata_catalog.normalize_hrefs("./data/mldata_{}".format(experiment))
    mldata_catalog.save(CatalogType.SELF_CONTAINED)