def bytes2ds_doc(data): if isinstance(data, bytes): data = data.decode("utf-8") stac_doc = json.loads(data) eo3_doc = stac_transform(stac_doc) ds_doc = prep_eo3(eo3_doc) return ds_doc
def mk_dataset(ds, uri): uuid = ds.id if uuid is None: return None, None, "Metadata document it missing id field" existing = index.datasets.get(uuid) if existing is None: return None, None, "No such dataset in the database: {}".format(uuid) ds = SimpleDocNav(prep_eo3(ds.doc, auto_skip=True)) # TODO: what about sources=? return Dataset(existing.type, ds.doc_without_lineage_sources, uris=[uri]), existing, None
def test_prep_eo3(sample_doc, sample_doc_180, eo3_metadata): rdr = eo3_metadata.dataset_reader(prep_eo3(sample_doc)) assert rdr.grid_spatial is not None assert rdr.lat.end > rdr.lat.begin assert rdr.lon.end > rdr.lon.begin assert 'src_a' in rdr.sources assert 'src_b1' in rdr.sources assert 'src_b2' in rdr.sources assert 'src_empty' not in rdr.sources rdr = eo3_metadata.dataset_reader(prep_eo3(sample_doc_180)) assert rdr.grid_spatial is not None assert rdr.sources == {} assert rdr.lat.end > rdr.lat.begin assert rdr.lon.end > rdr.lon.begin assert rdr.lon.begin < 180 < rdr.lon.end non_eo3_doc = {} assert prep_eo3(None) is None assert prep_eo3(non_eo3_doc, auto_skip=True) is non_eo3_doc with pytest.raises(ValueError): prep_eo3(non_eo3_doc)
def eo3_dataset_s2(eo3_metadata): ds_doc = { '$schema': 'https://schemas.opendatacube.org/dataset', 'id': '8b0e2770-5d4e-5238-8995-4aa91691ab85', 'product': {'name': 's2b_msil2a'}, 'label': 'S2B_MSIL2A_20200101T070219_N0213_R120_T39LVG_20200101T091825', 'crs': 'epsg:32739', 'grids': {'g20m': {'shape': [5490, 5490], 'transform': [20, 0, 399960, 0, -20, 8700040, 0, 0, 1]}, 'g60m': {'shape': [1830, 1830], 'transform': [60, 0, 399960, 0, -60, 8700040, 0, 0, 1]}, 'default': {'shape': [10980, 10980], 'transform': [10, 0, 399960, 0, -10, 8700040, 0, 0, 1]}}, 'geometry': {'type': 'Polygon', 'coordinates': [[[509759.0000000001, 8590241.0], [399960.99999999977, 8590241.0], [399960.99999999977, 8700039.0], [509758.99999999965, 8700039.0], [509759.0000000001, 8590241.0]]]}, 'properties': {'eo:gsd': 10, 'datetime': '2020-01-01T07:02:54.188Z', 'eo:platform': 'sentinel-2b', 'eo:instrument': 'msi', 'eo:cloud_cover': 0, 'odc:file_format': 'GeoTIFF', 'odc:region_code': '39LVG', 'odc:processing_datetime': '2020-01-01T07:02:54.188Z'}, 'measurements': {'red': {'path': 'B04.tif'}, 'scl': {'grid': 'g20m', 'path': 'SCL.tif'}, 'blue': {'path': 'B02.tif'}, 'green': {'path': 'B03.tif'}, 'nir_1': {'path': 'B08.tif'}, 'nir_2': {'grid': 'g20m', 'path': 'B8A.tif'}, 'swir_1': {'grid': 'g20m', 'path': 'B11.tif'}, 'swir_2': {'grid': 'g20m', 'path': 'B12.tif'}, 'red_edge_1': {'grid': 'g20m', 'path': 'B05.tif'}, 'red_edge_2': {'grid': 'g20m', 'path': 'B06.tif'}, 'red_edge_3': {'grid': 'g20m', 'path': 'B07.tif'}, 'water_vapour': {'grid': 'g60m', 'path': 'B09.tif'}, 'coastal_aerosol': {'grid': 'g60m', 'path': 'B01.tif'}}, 'lineage': {}} product_doc = { 'name': 's2b_msil2a', 'description': 'Sentinel-2B Level 2 COGs', 'metadata_type': 'eo3', 'metadata': {'product': {'name': 's2b_msil2a'}}, 'measurements': [{'name': 'coastal_aerosol', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_01', 'B01']}, {'name': 'blue', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_02', 'B02']}, {'name': 'green', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_03', 'B03']}, {'name': 'red', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_04', 'B04']}, {'name': 'red_edge_1', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_05', 'B05']}, {'name': 'red_edge_2', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_06', 'B06']}, {'name': 'red_edge_3', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_07', 'B07']}, {'name': 'nir_1', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_08', 'B08']}, {'name': 'nir_2', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_8a', 'B8A']}, {'name': 'water_vapour', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_09', 'B09']}, {'name': 'swir_1', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_11', 'B11']}, {'name': 'swir_2', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_12', 'B12']}, {'name': 'scl', 'dtype': 'uint8', 'units': '1', 'nodata': 0, 'aliases': ['mask', 'qa'], 'flags_definition': {'sca': {'description': 'Sen2Cor Scene Classification', 'bits': [0, 1, 2, 3, 4, 5, 6, 7], 'values': { '0': 'nodata', '1': 'defective', '2': 'dark', '3': 'shadow', '4': 'vegetation', '5': 'bare', '6': 'water', '7': 'unclassified', '8': 'cloud medium probability', '9': 'cloud high probability', '10': 'thin cirrus', '11': 'snow or ice'}}}}] } return Dataset(DatasetType(eo3_metadata, product_doc), prep_eo3(ds_doc))
def eo3_dataset_s2(eo3_metadata): ds_doc = { "$schema": "https://schemas.opendatacube.org/dataset", "id": "8b0e2770-5d4e-5238-8995-4aa91691ab85", "product": { "name": "s2b_msil2a" }, "label": "S2B_MSIL2A_20200101T070219_N0213_R120_T39LVG_20200101T091825", "crs": "epsg:32739", "grids": { "g20m": { "shape": [5490, 5490], "transform": [20, 0, 399960, 0, -20, 8700040, 0, 0, 1], }, "g60m": { "shape": [1830, 1830], "transform": [60, 0, 399960, 0, -60, 8700040, 0, 0, 1], }, "default": { "shape": [10980, 10980], "transform": [10, 0, 399960, 0, -10, 8700040, 0, 0, 1], }, }, "geometry": { "type": "Polygon", "coordinates": [[ [509759.0000000001, 8590241.0], [399960.99999999977, 8590241.0], [399960.99999999977, 8700039.0], [509758.99999999965, 8700039.0], [509759.0000000001, 8590241.0], ]], }, "properties": { "eo:gsd": 10, "datetime": "2020-01-01T07:02:54.188Z", "eo:platform": "sentinel-2b", "eo:instrument": "msi", "eo:cloud_cover": 0, "odc:file_format": "GeoTIFF", "odc:region_code": "39LVG", "odc:processing_datetime": "2020-01-01T07:02:54.188Z", }, "measurements": { "red": { "path": "B04.tif" }, "scl": { "grid": "g20m", "path": "SCL.tif" }, "blue": { "path": "B02.tif" }, "green": { "path": "B03.tif" }, "nir_1": { "path": "B08.tif" }, "nir_2": { "grid": "g20m", "path": "B8A.tif" }, "swir_1": { "grid": "g20m", "path": "B11.tif" }, "swir_2": { "grid": "g20m", "path": "B12.tif" }, "red_edge_1": { "grid": "g20m", "path": "B05.tif" }, "red_edge_2": { "grid": "g20m", "path": "B06.tif" }, "red_edge_3": { "grid": "g20m", "path": "B07.tif" }, "water_vapour": { "grid": "g60m", "path": "B09.tif" }, "coastal_aerosol": { "grid": "g60m", "path": "B01.tif" }, }, "lineage": {}, } product_doc = { "name": "s2b_msil2a", "description": "Sentinel-2B Level 2 COGs", "metadata_type": "eo3", "metadata": { "product": { "name": "s2b_msil2a" } }, "measurements": [ { "name": "coastal_aerosol", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_01", "B01"], }, { "name": "blue", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_02", "B02"], }, { "name": "green", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_03", "B03"], }, { "name": "red", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_04", "B04"], }, { "name": "red_edge_1", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_05", "B05"], }, { "name": "red_edge_2", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_06", "B06"], }, { "name": "red_edge_3", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_07", "B07"], }, { "name": "nir_1", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_08", "B08"], }, { "name": "nir_2", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_8a", "B8A"], }, { "name": "water_vapour", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_09", "B09"], }, { "name": "swir_1", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_11", "B11"], }, { "name": "swir_2", "dtype": "uint16", "units": "1", "nodata": 0, "aliases": ["band_12", "B12"], }, { "name": "scl", "dtype": "uint8", "units": "1", "nodata": 0, "aliases": ["mask", "qa"], "flags_definition": { "sca": { "description": "Sen2Cor Scene Classification", "bits": [0, 1, 2, 3, 4, 5, 6, 7], "values": { "0": "nodata", "1": "defective", "2": "dark", "3": "shadow", "4": "vegetation", "5": "bare", "6": "water", "7": "unclassified", "8": "cloud medium probability", "9": "cloud high probability", "10": "thin cirrus", "11": "snow or ice", }, } }, }, ], } return Dataset(DatasetType(eo3_metadata, product_doc), prep_eo3(ds_doc))
def validate_dataset( doc: Dict, product_definition: Optional[Dict] = None, metadata_type_definition: Optional[Dict] = None, thorough: bool = False, readable_location: Union[str, Path] = None, expect_extra_measurements: bool = False, expect_geometry: bool = True, nullable_fields: Iterable[str] = ("label", ), ) -> ValidationMessages: """ Validate a a dataset document, optionally against the given product. By default this will only look at the metadata, run with thorough=True to open the data files too. :param product_definition: Optionally check that the dataset matches this product definition. :param thorough: Open the imagery too, to check that data types etc match. :param readable_location: Dataset location to use, if not the metadata path. :param expect_extra_measurements: Allow some dataset measurements to be missing from the product definition. This is (deliberately) allowed by ODC, but often a mistake. This flag disables the warning. """ schema = doc.get("$schema") if schema is None: yield _error( "no_schema", f"No $schema field. " f"You probably want an ODC dataset schema {model.ODC_DATASET_SCHEMA_URL!r}", ) return if schema != model.ODC_DATASET_SCHEMA_URL: yield _error( "unknown_doc_type", f"Unknown doc schema {schema!r}. Only ODC datasets are supported ({model.ODC_DATASET_SCHEMA_URL!r})", ) return has_doc_errors = False for error in serialise.DATASET_SCHEMA.iter_errors(doc): has_doc_errors = True displayable_path = ".".join(error.absolute_path) hint = None if displayable_path == "crs" and "not of type" in error.message: hint = "epsg codes should be prefixed with 'epsg:1234'" context = f"({displayable_path}) " if displayable_path else "" yield _error("structure", f"{context}{error.message} ", hint=hint) if has_doc_errors: return dataset = serialise.from_doc(doc, skip_validation=True) if not dataset.product.href: _info("product_href", "A url (href) is recommended for products") yield from _validate_geo(dataset, expect_geometry=expect_geometry) # Note that a dataset may have no measurements (eg. telemetry data). # (TODO: a stricter mode for when we know we should have geo and measurement info) if dataset.measurements: for name, measurement in dataset.measurements.items(): grid_name = measurement.grid if grid_name != "default" or dataset.grids: if grid_name not in dataset.grids: yield _error( "invalid_grid_ref", f"Measurement {name!r} refers to unknown grid {grid_name!r}", ) if is_absolute(measurement.path): yield _warning( "absolute_path", f"measurement {name!r} has an absolute path: {measurement.path!r}", ) yield from _validate_stac_properties(dataset) required_measurements: Dict[str, ExpectedMeasurement] = {} if product_definition is not None: required_measurements.update({ m.name: m for m in map( ExpectedMeasurement.from_definition, product_definition.get("measurements") or (), ) }) product_name = product_definition.get("name") if product_name != dataset.product.name: # This is only informational as it's possible products may be indexed with finer-grained # categories than the original datasets: eg. a separate "nrt" product, or test product. yield _info( "product_mismatch", f"Dataset product name {dataset.product.name!r} " f"does not match the given product ({product_name!r}", ) for name in required_measurements: if name not in dataset.measurements.keys(): yield _error( "missing_measurement", f"Product {product_name} expects a measurement {name!r})", ) measurements_not_in_product = set( dataset.measurements.keys()).difference({ m["name"] for m in product_definition.get("measurements") or () }) if (not expect_extra_measurements) and measurements_not_in_product: things = ", ".join(sorted(measurements_not_in_product)) yield _warning( "extra_measurements", f"Dataset has measurements not present in product definition for {product_name!r}: {things}", hint= "This may be valid, as it's allowed by ODC. Set `expect_extra_measurements` to mute this.", ) if metadata_type_definition: # Datacube does certain transforms on an eo3 doc before storage. # We need to do the same, as the fields will be read from the storage. prepared_doc = prep_eo3(doc) for field_name, offsets in _get_field_offsets( metadata_type=metadata_type_definition): if not any( _has_offset(prepared_doc, offset) for offset in offsets): readable_offsets = " or ".join("->".join(offset) for offset in offsets) yield _warning( "missing_field", f"Dataset is missing field {field_name!r}", hint=f"Expected at {readable_offsets}", ) continue if field_name not in nullable_fields: value = None for offset in offsets: value = toolz.get_in(offset, prepared_doc) if value is None: yield _info( "null_field", f"Value is null for configured field {field_name!r}", ) dataset_location = dataset.locations[ 0] if dataset.locations else readable_location # If we have a location: # For each measurement, try to load it. # If loadable: if thorough: for name, measurement in dataset.measurements.items(): full_path = uri_resolve(dataset_location, measurement.path) expected_measurement = required_measurements.get(name) band = measurement.band or 1 with rasterio.open(full_path) as ds: ds: DatasetReader if band not in ds.indexes: yield _error( "incorrect_band", f"Measurement {name!r} file contains no rio index {band!r}.", hint=f"contains indexes {ds.indexes!r}", ) continue if not expected_measurement: # The measurement is not in the product definition # # This is only informational because a product doesn't have to define all # measurements that the datasets contain. # # This is historically because dataset documents reflect the measurements that # are stored on disk, which can differ. But products define the set of measurments # that are mandatory in every dataset. # # (datasets differ when, for example, sensors go offline, or when there's on-disk # measurements like panchromatic that GA doesn't want in their product definitions) if required_measurements: yield _info( "unspecified_measurement", f"Measurement {name} is not in the product", ) else: expected_dtype = expected_measurement.dtype band_dtype = ds.dtypes[band - 1] # TODO: NaN handling if expected_dtype != band_dtype: yield _error( "different_dtype", f"{name} dtype: " f"product {expected_dtype!r} != dataset {band_dtype!r}", ) ds_nodata = ds.nodatavals[band - 1] # If the dataset is missing 'nodata', we can allow anything in product 'nodata'. # (In ODC, nodata might be a fill value for loading data.) if ds_nodata is None: continue # Otherwise check that nodata matches. expected_nodata = expected_measurement.nodata if expected_nodata != ds_nodata and not ( _is_nan(expected_nodata) and _is_nan(ds_nodata)): yield _error( "different_nodata", f"{name} nodata: " f"product {expected_nodata !r} != dataset {ds_nodata !r}", )
def fuse_ds(ds_1: Dataset, ds_2: Dataset, product: Optional[DatasetType] = None) -> Dataset: """ This function fuses two datasets. It requires that: - the products are fusable - grids with the same name are identical - labels are in the format 'product_suffix' with identical suffixes - CRSs' are identical - datetimes are identical - $schemas are identical """ doc_1, doc_2 = ds_1.metadata_doc, ds_2.metadata_doc if product is None: product = fuse_products(ds_1.type, ds_2.type) fused_doc = dict() fused_doc["id"] = str( odc_uuid(product.name, "0.0.0", sources=[doc_1["id"], doc_2["id"]])) fused_doc["lineage"] = {"source_datasets": [doc_1["id"], doc_2["id"]]} # check that all grids with the same name are identical common_grids = set(doc_1["grids"].keys()).intersection( doc_2["grids"].keys()) assert all(doc_1["grids"][g] == doc_2["grids"][g] for g in common_grids) # TODO: handle the case that grids have conflicts in a seperate function fused_doc["grids"] = {**doc_1["grids"], **doc_2["grids"]} label_suffix = doc_1["label"].replace(doc_1["product"]["name"], "") assert label_suffix == doc_2["label"].replace(doc_2["product"]["name"], "") fused_doc["label"] = f"{product.name}{label_suffix}" equal_keys = ["$schema", "crs"] for key in equal_keys: assert doc_1[key] == doc_2[key] fused_doc[key] = doc_1[key] fused_doc["properties"] = dict() assert doc_1["properties"]["datetime"] == doc_2["properties"][ "datetime"] # datetime is the only manditory property # copy over all identical properties for key, val in doc_1["properties"].items(): if val == doc_2["properties"].get(key, None): fused_doc["properties"][key] = val fused_doc["measurements"] = { **doc_1["measurements"], **doc_2["measurements"] } for key, path in { **measurement_paths(ds_1), **measurement_paths(ds_2) }.items(): fused_doc["measurements"][key]["path"] = path fused_ds = Dataset(product, prep_eo3(fused_doc), uris=[""]) return fused_ds
def test_fuse_dss(wo_definition, fc_definition): standard_metadata_types = { d["name"]: metadata_from_doc(d) for d in default_metadata_type_docs() } eo3 = standard_metadata_types["eo3"] wo_product = DatasetType(eo3, wo_definition) fc_product = DatasetType(eo3, fc_definition) fused_product = fuse_products(wo_product, fc_product) wo_metadata = { 'id': 'e9fb6737-b93d-5cd9-bfe6-7e634abc9905', 'crs': 'epsg:32655', 'grids': { 'default': { 'shape': [7211, 8311], 'transform': [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0] } }, 'label': 'ga_ls_wo_3_091086_2020-04-04_final', '$schema': 'https://schemas.opendatacube.org/dataset', 'lineage': { 'source_datasets': {} }, 'product': { 'name': 'ga_ls_wo_3' }, 'properties': { 'title': 'ga_ls_wo_3_091086_2020-04-04_final', 'eo:gsd': 30.0, 'created': '2021-03-09T23:22:42.130266Z', 'datetime': '2020-04-04T23:33:10.644420Z', 'proj:epsg': 32655, 'proj:shape': [7211, 8311], 'eo:platform': 'landsat-7', 'odc:product': 'ga_ls_wo_3', 'odc:producer': 'ga.gov.au', 'eo:instrument': 'ETM', 'eo:cloud_cover': 44.870310145260326, 'eo:sun_azimuth': 49.20198554, 'proj:transform': [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0], 'landsat:wrs_row': 86, 'odc:file_format': 'GeoTIFF', 'odc:region_code': '091086', 'dtr:end_datetime': '2020-04-04T23:33:24.461679Z', 'eo:sun_elevation': 32.7056476, 'landsat:wrs_path': 91, 'dtr:start_datetime': '2020-04-04T23:32:56.662365Z', 'odc:product_family': 'wo', 'odc:dataset_version': '1.6.0', 'dea:dataset_maturity': 'final', 'odc:collection_number': 3, 'odc:naming_conventions': 'dea_c3', 'odc:processing_datetime': '2020-04-04T23:33:10.644420Z', 'landsat:landsat_scene_id': 'LE70910862020095ASA00', 'landsat:collection_number': 1, 'landsat:landsat_product_id': 'LE07_L1TP_091086_20200404_20200501_01_T1', 'landsat:collection_category': 'T1' }, 'measurements': { 'water': { 'path': 'ga_ls_wo_3_091086_2020-04-04_final_water.tif' } } } fc_metadata = { 'id': '41980746-4f17-5e0c-86a0-92cca8d3c99d', 'crs': 'epsg:32655', 'grids': { 'default': { 'shape': [7211, 8311], 'transform': [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0] } }, 'label': 'ga_ls_fc_3_091086_2020-04-04_final', '$schema': 'https://schemas.opendatacube.org/dataset', 'product': { 'name': 'ga_ls_fc_3' }, 'properties': { 'title': 'ga_ls_fc_3_091086_2020-04-04_final', 'eo:gsd': 30.0, 'created': '2021-03-10T04:14:49.645196Z', 'datetime': '2020-04-04T23:33:10.644420Z', 'proj:epsg': 32655, 'proj:shape': [7211, 8311], 'eo:platform': 'landsat-7', 'odc:product': 'ga_ls_fc_3', 'odc:producer': 'ga.gov.au', 'eo:instrument': 'ETM', 'eo:cloud_cover': 44.870310145260326, 'eo:sun_azimuth': 49.20198554, 'proj:transform': [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0], 'landsat:wrs_row': 86, 'odc:file_format': 'GeoTIFF', 'odc:region_code': '091086', 'dtr:end_datetime': '2020-04-04T23:33:24.461679Z', 'eo:sun_elevation': 32.7056476, 'landsat:wrs_path': 91, 'dtr:start_datetime': '2020-04-04T23:32:56.662365Z', 'odc:product_family': 'fc', 'odc:dataset_version': '2.5.0', 'dea:dataset_maturity': 'final', 'odc:collection_number': 3, 'odc:naming_conventions': 'dea_c3', 'odc:processing_datetime': '2020-04-04T23:33:10.644420Z', 'landsat:landsat_scene_id': 'LE70910862020095ASA00', 'landsat:collection_number': 1, 'landsat:landsat_product_id': 'LE07_L1TP_091086_20200404_20200501_01_T1', 'landsat:collection_category': 'T1' }, 'measurements': { 'bs': { 'path': 'ga_ls_fc_3_091086_2020-04-04_final_bs.tif' }, 'pv': { 'path': 'ga_ls_fc_3_091086_2020-04-04_final_pv.tif' }, 'ue': { 'path': 'ga_ls_fc_3_091086_2020-04-04_final_ue.tif' }, 'npv': { 'path': 'ga_ls_fc_3_091086_2020-04-04_final_npv.tif' } } } # paths get made absolute here # TODO: force paths to stay relative uris = [ "s3://dea-public-data/derivative/ga_ls_wo_3/1-6-0/091/086/2020/04/04/ga_ls_wo_3_091086_2020-04-04_final.stac-item.json" ] wo_ds = Dataset(wo_product, prep_eo3(wo_metadata), uris=uris) uris = [ "s3://dea-public-data/derivative/ga_ls_fc_3/2-5-0/091/086/2020/04/04/ga_ls_fc_3_091086_2020-04-04_final.stac-item.json" ] fc_ds = Dataset(fc_product, prep_eo3(fc_metadata), uris=uris) fused_ds = fuse_ds(wo_ds, fc_ds, fused_product) assert _get_msr_paths(fused_ds) == _get_msr_paths(fc_ds).union( _get_msr_paths(wo_ds)) fused_ds = fuse_ds(wo_ds, fc_ds) assert _get_msr_paths(fused_ds) == _get_msr_paths(fc_ds).union( _get_msr_paths(wo_ds)) bad_metadata = deepcopy(fc_metadata) bad_metadata["properties"]["datetime"] = '2020-04-03T23:33:10.644420Z' bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris) with pytest.raises(AssertionError): fused_ds = fuse_ds(wo_ds, bad_ds, fused_product) bad_metadata = deepcopy(fc_metadata) bad_metadata["crs"] = "epsg:32656" bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris) with pytest.raises(AssertionError): fused_ds = fuse_ds(wo_ds, bad_ds, fused_product) bad_metadata = deepcopy(fc_metadata) bad_metadata['grids']['default']['shape'] = [7212, 8311] bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris) with pytest.raises(AssertionError): fused_ds = fuse_ds(wo_ds, bad_ds, fused_product) bad_metadata = deepcopy(fc_metadata) bad_metadata['label'] += 'a' bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris) with pytest.raises(AssertionError): fused_ds = fuse_ds(wo_ds, bad_ds, fused_product)