def prepare_and_write( ds_path: Path, output_yaml_path: Path, source_telemetry: Path = None, # TODO: Can we infer producer automatically? This is bound to cause mistakes othewise producer="usgs.gov", ) -> Tuple[uuid.UUID, Path]: """ Prepare an eo3 metadata file for a Level1 dataset. Input dataset path can be a folder or a tar file. """ mtl_doc, mtl_filename = get_mtl_content(ds_path) if not mtl_doc: raise ValueError(f"No MTL file found for {ds_path}") usgs_collection_number = mtl_doc["metadata_file_info"].get( "collection_number") if usgs_collection_number is None: raise NotImplementedError( "Dataset has no collection number: pre-collection data is not supported." ) data_format = mtl_doc["product_metadata"]["output_format"] if data_format.upper() != "GEOTIFF": raise NotImplementedError( f"Only GTiff currently supported, got {data_format}") file_format = FileFormat.GeoTIFF # Assumed below. projection_params = mtl_doc["projection_parameters"] if ("grid_cell_size_thermal" in projection_params and "grid_cell_size_reflective" in projection_params and (projection_params["grid_cell_size_reflective"] != projection_params["grid_cell_size_thermal"])): raise NotImplementedError( "reflective and thermal have different cell sizes") ground_sample_distance = min(value for name, value in projection_params.items() if name.startswith("grid_cell_size_")) with DatasetAssembler( metadata_path=output_yaml_path, dataset_location=ds_path, # Detministic ID based on USGS's product id (which changes when the scene is reprocessed by them) dataset_id=uuid.uuid5( USGS_UUID_NAMESPACE, mtl_doc["metadata_file_info"]["landsat_product_id"]), naming_conventions="dea", if_exists=IfExists.Overwrite, ) as p: if source_telemetry: # Only GA's data has source telemetry... assert producer == "ga.gov.au" p.add_source_path(source_telemetry) p.platform = mtl_doc["product_metadata"]["spacecraft_id"] p.instrument = mtl_doc["product_metadata"]["sensor_id"] p.product_family = "level1" p.producer = producer p.datetime = "{}T{}".format( mtl_doc["product_metadata"]["date_acquired"], mtl_doc["product_metadata"]["scene_center_time"], ) p.processed = mtl_doc["metadata_file_info"]["file_date"] p.properties["odc:file_format"] = file_format p.properties["eo:gsd"] = ground_sample_distance cloud_cover = mtl_doc["image_attributes"]["cloud_cover"] # Cloud cover is -1 when missing (such as TIRS-only data) if cloud_cover != -1: p.properties["eo:cloud_cover"] = cloud_cover p.properties["eo:sun_azimuth"] = mtl_doc["image_attributes"][ "sun_azimuth"] p.properties["eo:sun_elevation"] = mtl_doc["image_attributes"][ "sun_elevation"] p.properties["landsat:collection_number"] = usgs_collection_number for section, fields in _COPYABLE_MTL_FIELDS: for field in fields: value = mtl_doc[section].get(field) if value is not None: p.properties[f"landsat:{field}"] = value p.region_code = f"{p.properties['landsat:wrs_path']:03d}{p.properties['landsat:wrs_row']:03d}" org_collection_number = utils.get_collection_number( p.producer, p.properties["landsat:collection_number"]) p.dataset_version = f"{org_collection_number}.0.{p.processed:%Y%m%d}" # NRT product? # Category is one of: T1, T2 or RT ('real time') if p.properties["landsat:collection_category"] == "RT": p.properties["odc:dataset_maturity"] = "nrt" band_aliases = get_band_alias_mappings(p.platform, p.instrument) for usgs_band_id, file_location in _iter_bands_paths(mtl_doc): p.note_measurement( band_aliases[usgs_band_id], file_location, relative_to_dataset_location=True, ) p.add_accessory_file("metadata:landsat_mtl", Path(mtl_filename)) return p.done()
def prepare_and_write( ds_path: Path, collection_location: Path, # TODO: Can we infer producer automatically? This is bound to cause mistakes othewise producer="usgs.gov", ) -> Tuple[uuid.UUID, Path]: """ Prepare an eo3 metadata file for a Level2 dataset. Input dataset path can be a folder or a tar file. """ mtl_doc, mtl_filename = get_mtl_content( ds_path, root_element="landsat_metadata_file") if not mtl_doc: raise ValueError(f"No MTL file found for {ds_path}") usgs_collection_number = mtl_doc["product_contents"].get( "collection_number") if usgs_collection_number is None: raise NotImplementedError( "Dataset has no collection number: pre-collection data is not supported." ) data_format = mtl_doc["product_contents"]["output_format"] if data_format.upper() != "GEOTIFF": raise NotImplementedError( f"Only GTiff currently supported, got {data_format}") file_format = FileFormat.GeoTIFF # Assumed below. if (mtl_doc["projection_attributes"]["grid_cell_size_reflective"] != mtl_doc["projection_attributes"]["grid_cell_size_thermal"]): raise NotImplementedError( "reflective and thermal have different cell sizes") ground_sample_distance = min( value for name, value in mtl_doc["projection_attributes"].items() if name.startswith("grid_cell_size_")) with DatasetAssembler( collection_location=collection_location, # Detministic ID based on USGS's product id (which changes when the scene is reprocessed by them) dataset_id=uuid.uuid5( USGS_UUID_NAMESPACE, mtl_doc["product_contents"]["landsat_product_id"]), naming_conventions="dea", if_exists=IfExists.Overwrite, ) as p: p.platform = mtl_doc["image_attributes"]["spacecraft_id"] p.instrument = mtl_doc["image_attributes"]["sensor_id"] p.product_family = "level2" p.producer = producer p.datetime = "{}T{}".format( mtl_doc["image_attributes"]["date_acquired"], mtl_doc["image_attributes"]["scene_center_time"], ) # p.processed = mtl_doc["metadata_file_info"]["file_date"] p.processed = mtl_doc['level2_processing_record'][ 'date_product_generated'] p.properties["odc:file_format"] = file_format p.properties["eo:gsd"] = ground_sample_distance p.properties["eo:cloud_cover"] = mtl_doc["image_attributes"][ "cloud_cover"] p.properties["eo:sun_azimuth"] = mtl_doc["image_attributes"][ "sun_azimuth"] p.properties["eo:sun_elevation"] = mtl_doc["image_attributes"][ "sun_elevation"] p.properties["landsat:collection_number"] = usgs_collection_number for section, fields in _COPYABLE_MTL_FIELDS: for field in fields: value = mtl_doc[section].get(field) if value is not None: p.properties[f"landsat:{field}"] = value p.region_code = f"{p.properties['landsat:wrs_path']:03d}{p.properties['landsat:wrs_row']:03d}" org_collection_number = utils.get_collection_number( p.producer, p.properties["landsat:collection_number"]) p.dataset_version = f"{org_collection_number}.0.{p.processed:%Y%m%d}" band_aliases = get_band_alias_mappings(p.platform, p.instrument) bands = list(_iter_bands_paths(mtl_doc)) # add to do one band - remove this to do all the bands # bands = bands[0:1] for usgs_band_id, file_location in bands: # p.note_measurement( # band_aliases[usgs_band_id], # file_location, # relative_to_dataset_location=True, # ) path_file = os.path.join(ds_path, file_location) p.write_measurement(band_aliases[usgs_band_id], path_file) p.add_accessory_file("metadata:landsat_mtl", Path(mtl_filename)) return p.done()
def package( out_directory: Path, granule: Granule, included_products: Iterable[str] = DEFAULT_PRODUCTS, include_oa: bool = True, ) -> Tuple[UUID, Path]: """ Package an L2 product. :param include_oa: :param out_directory: The base directory for output datasets. A DEA-naming-conventions folder hierarchy will be created inside this folder. :param granule: Granule information. You probably want to make one with Granule.from_path() :param included_products: A list of imagery products to include in the package. Defaults to all products. :return: The dataset UUID and output metadata path """ included_products = tuple(s.lower() for s in included_products) with h5py.File(granule.wagl_hdf5, "r") as fid: granule_group = fid[granule.name] with DatasetAssembler( out_directory, # WAGL stamps a good, random ID already. dataset_id=granule.wagl_metadata.get("id"), naming_conventions="dea", ) as p: level1 = granule.source_level1_metadata p.add_source_dataset(level1, auto_inherit_properties=True) # It's a GA ARD product. p.producer = "ga.gov.au" p.product_family = "ard" org_collection_number = utils.get_collection_number( p.producer, p.properties["landsat:collection_number"]) # TODO: wagl's algorithm version should determine our dataset version number, right? p.dataset_version = f"{org_collection_number}.0.0" p.region_code = _extract_reference_code(p, granule.name) _read_wagl_metadata(p, granule_group) _read_gqa_doc(p, granule.gqa_doc) _read_fmask_doc(p, granule.fmask_doc) _unpack_products(p, included_products, granule_group) if include_oa: with do(f"Starting OA", heading=True): _unpack_observation_attributes( p, included_products, granule_group, infer_datetime_range=level1.platform.startswith( "landsat"), ) if granule.fmask_image: with do(f"Writing fmask from {granule.fmask_image} "): p.write_measurement( "oa:fmask", granule.fmask_image, expand_valid_data=False, overview_resampling=Resampling.mode, ) with do("Finishing package"): return p.done()
def package( out_directory: Path, granule: Granule, *, product_maturity: ProductMaturity = ProductMaturity.stable, included_products: Iterable[str] = DEFAULT_PRODUCTS, include_oa: bool = True, oa_resolution: Optional[Tuple[float, float]] = None, contiguity_resolution: Optional[Tuple[float, float]] = None, ) -> Tuple[UUID, Path]: """ Package an L2 product. :param include_oa: :param out_directory: The base directory for output datasets. A DEA-naming-conventions folder hierarchy will be created inside this folder. :param granule: Granule information. You probably want to make one with Granule.from_path() :param included_products: A list of imagery products to include in the package. Defaults to all products. :return: The dataset UUID and output metadata path """ included_products = tuple(s.lower() for s in included_products) with h5py.File(granule.wagl_hdf5, "r") as fid: granule_group = fid[granule.name] wagl_doc = _read_wagl_metadata(granule_group) with DatasetAssembler( out_directory.absolute(), # WAGL stamps a good, random ID already. dataset_id=granule.wagl_metadata.get("id"), naming_conventions="dea_s2" if ("sentinel" in wagl_doc["source_datasets"]["platform_id"].lower()) else "dea", ) as p: _apply_wagl_metadata(p, wagl_doc) # It's a GA ARD product. p.producer = "ga.gov.au" p.product_family = "ard" p.maturity = _determine_maturity( acq_date=p.datetime, processed=p.processed, wagl_doc=wagl_doc, ) # We don't bother including product maturity if it's stable, for consistency with old datasets. # Stable is the assumed default. if product_maturity is not ProductMaturity.stable: p.product_maturity = product_maturity if granule.source_level1_metadata is not None: # For historical consistency: we want to use the instrument that the source L1 product # came from, not the instruments reported from the WAGL doc. # # Eg. # Level 1 will say "OLI_TIRS", while wagl doc will say "OLI". # Our current C3 products say "OLI_TIRS" so we need to stay consistent. # (even though WAGL only *used* the OLI bands, it came from an OLI_TIRS observation) # # So delete our current wagl one, since we're adding a source dataset: if p.instrument is not None: del p.properties["eo:instrument"] p.add_source_dataset(granule.source_level1_metadata, auto_inherit_properties=True) # When level 1 is NRT, ARD is always NRT. if granule.source_level1_metadata.maturity == "nrt": p.maturity = "nrt" org_collection_number = utils.get_collection_number( p.platform, p.producer, p.properties.get("landsat:collection_number")) p.dataset_version = f"{org_collection_number}.2.1" p.region_code = _extract_reference_code(p, granule.name) _read_gqa_doc(p, granule.gqa_doc) _read_fmask_doc(p, granule.fmask_doc) if granule.s2cloudless_doc: _read_s2cloudless_doc(p, granule.s2cloudless_doc) if granule.tesp_doc: _take_software_versions(p, granule.tesp_doc) _unpack_products(p, included_products, granule_group) if include_oa: with sub_product("oa", p): with do("Starting OA", heading=True): resolution_groups = { tuple(granule_group[k].attrs["resolution"]): granule_group[k] for k in granule_group.keys() if k.startswith("RES-GROUP-") } # Use the highest resolution as the ground sample distance. if "eo:gsd" in p.properties: del p.properties["eo:gsd"] p.properties["eo:gsd"] = min( min(resolution_groups.keys())) _unpack_observation_attributes( p, get_oa_resolution_group(resolution_groups, p.platform, oa_resolution), ) infer_datetime_range = p.platform.startswith("landsat") with do("Contiguity", timedelta=infer_datetime_range): # For landsat, we want the "common" band resolution, not panchromatic. Pick lower res. if contiguity_resolution is not None: contiguity_res = contiguity_resolution elif p.platform.startswith("landsat"): contiguity_res = max(resolution_groups.keys()) elif p.platform.startswith("sentinel"): contiguity_res = (10.0, 10.0) if contiguity_res not in resolution_groups: raise ValueError( f"No resolution group {contiguity_res} found in {granule.name}." f"Options: {list(resolution_groups.keys())}") contiguity_res_grp = resolution_groups[contiguity_res] timedelta_data = ( contiguity_res_grp["SATELLITE-SOLAR/TIME-DELTA"] if infer_datetime_range else None) _create_contiguity( p, included_products, resolution_yx=tuple( contiguity_res_grp.attrs["resolution"]), timedelta_data=timedelta_data, ) if granule.fmask_image: with do(f"Writing fmask from {granule.fmask_image} "): p.write_measurement( "oa:fmask", granule.fmask_image, expand_valid_data=False, overview_resampling=Resampling.mode, # Because of our strange sub-products and filename standards, we want the # 'oa_' prefix to be included in the recorded band metadata, # but not in its filename. # So we manually calculate a filename without the extra prefix. path=p.names.measurement_filename("fmask"), ) if granule.s2cloudless_prob: with do(f"Writing s2cloudless probability from {granule.s2cloudless_prob} " ): p.write_measurement( "oa:s2cloudless_prob", granule.s2cloudless_prob, expand_valid_data=False, overview_resampling=Resampling.bilinear, path=p.names.measurement_filename( "s2cloudless-prob"), ) if granule.s2cloudless_mask: with do(f"Writing s2cloudless mask from {granule.s2cloudless_mask} " ): p.write_measurement( "oa:s2cloudless_mask", granule.s2cloudless_mask, expand_valid_data=False, overview_resampling=Resampling.mode, path=p.names.measurement_filename( "s2cloudless-mask"), ) with do("Finishing package"): return p.done()
def package_non_standard(outdir, granule): """ yaml creator for the ard pipeline. """ outdir = Path(outdir) / granule.name indir = granule.wagl_hdf5.parent if indir.is_file(): shutil.copy(indir, outdir) else: shutil.copytree(indir, outdir) wagl_h5 = outdir / str(granule.name + ".wagl.h5") dataset_doc = outdir / str(granule.name + ".yaml") boolean_h5 = Path(str(wagl_h5).replace("wagl.h5", "converted.datasets.h5")) fmask_img = outdir / str(granule.name + ".fmask.img") f = h5py.File(boolean_h5) with DatasetAssembler(metadata_path=dataset_doc, naming_conventions="dea") as da: level1 = granule.source_level1_metadata da.add_source_dataset(level1, auto_inherit_properties=True, inherit_geometry=True) da.product_family = "ard" da.producer = "ga.gov.au" da.properties["odc:file_format"] = "HDF5" with h5py.File(wagl_h5, "r") as fid: img_paths = [ppjoin(fid.name, pth) for pth in find(fid, "IMAGE")] granule_group = fid[granule.name] try: wagl_path, *ancil_paths = [ pth for pth in find(granule_group, "SCALAR") if "METADATA" in pth ] except ValueError: raise ValueError("No nbar metadata found in granule") [wagl_doc] = loads_yaml(granule_group[wagl_path][()]) da.processed = get_path(wagl_doc, ("system_information", "time_processed")) platform = da.properties["eo:platform"] if platform == "sentinel-2a" or platform == "sentinel-2b": org_collection_number = 3 else: org_collection_number = utils.get_collection_number( platform, da.producer, da.properties["landsat:collection_number"]) da.dataset_version = f"{org_collection_number}.1.0" da.region_code = eodatasets3.wagl._extract_reference_code( da, granule.name) eodatasets3.wagl._read_gqa_doc(da, granule.gqa_doc) eodatasets3.wagl._read_fmask_doc(da, granule.fmask_doc) with rasterio.open(fmask_img) as ds: fmask_layer = "/{}/OA_FMASK/oa_fmask".format(granule.name) data = ds.read(1) fmask_ds = f.create_dataset(fmask_layer, data=data, compression="lzf", shuffle=True) fmask_ds.attrs["crs_wkt"] = ds.crs.wkt fmask_ds.attrs["geotransform"] = ds.transform.to_gdal() fmask_ds.attrs[ "description"] = "Converted from ERDAS Imagine format to HDF5 to work with the limitations of varied formats within ODC" # noqa E501 grid_spec = images.GridSpec( shape=ds.shape, transform=ds.transform, crs=CRS.from_wkt(fmask_ds.attrs["crs_wkt"]), ) measurement_name = "oa_fmask" pathname = str(outdir.joinpath(boolean_h5)) no_data = fmask_ds.attrs.get("no_data_value") if no_data is None: no_data = float("nan") da._measurements.record_image( measurement_name, grid_spec, pathname, fmask_ds[:], layer="/{}".format(fmask_layer), nodata=no_data, expand_valid_data=False, ) for pathname in img_paths: ds = fid[pathname] ds_path = Path(ds.name) # eodatasets internally uses this grid spec to group image datasets grid_spec = images.GridSpec( shape=ds.shape, transform=Affine.from_gdal(*ds.attrs["geotransform"]), crs=CRS.from_wkt(ds.attrs["crs_wkt"]), ) # product group name; lambertian, nbar, nbart, oa if "STANDARDISED-PRODUCTS" in str(ds_path): product_group = ds_path.parent.name elif "INTERPOLATED-ATMOSPHERIC-COEFFICIENTS" in str(ds_path): product_group = "oa_{}".format(ds_path.parent.name) else: product_group = "oa" # spatial resolution group # used to separate measurements with the same name resolution_group = "rg{}".format( ds_path.parts[2].split("-")[-1]) measurement_name = ("_".join([ resolution_group, product_group, ds.attrs.get("alias", ds_path.name), ]).replace("-", "_").lower()) # we don't wan't hyphens in odc land # include this band in defining the valid data bounds? include = True if "nbart" in measurement_name else False no_data = ds.attrs.get("no_data_value") if no_data is None: no_data = float("nan") # if we are of type bool, we'll have to convert just for GDAL if ds.dtype.name == "bool": pathname = str(outdir.joinpath(boolean_h5)) out_ds = f.create_dataset( measurement_name, data=np.uint8(ds[:]), compression="lzf", shuffle=True, chunks=ds.chunks, ) for k, v in ds.attrs.items(): out_ds.attrs[k] = v da._measurements.record_image( measurement_name, grid_spec, pathname, out_ds[:], layer="/{}".format(out_ds.name), nodata=no_data, expand_valid_data=include, ) else: pathname = str(outdir.joinpath(wagl_h5)) # work around as note_measurement doesn't allow us to specify the gridspec da._measurements.record_image( measurement_name, grid_spec, pathname, ds[:], layer="/{}".format(ds.name), nodata=no_data, expand_valid_data=include, ) # the longest part here is generating the valid data bounds vector # landsat 7 post SLC-OFF can take a really long time return da.done()
def prepare_and_write( ds_path: Path, output_yaml_path: Path, source_telemetry: Path = None, # TODO: Can we infer producer automatically? This is bound to cause mistakes othewise producer="usgs.gov", embed_location: bool = False, ) -> Tuple[uuid.UUID, Path]: """ Prepare an eo3 metadata file for a Level1 dataset. Input dataset path can be a folder or a tar file. """ mtl_doc, root_element, mtl_filename = get_mtl_content(ds_path) if not mtl_doc: raise ValueError(f"No MTL file found for {ds_path}") collection_key = "C2" if root_element == "landsat_metadata_file" else "C1" leveln_key_prefix = "leveln" if collection_key == "C2" else "level1" coll_map = LANDSAT_MTL_MAP[collection_key] usgs_collection_number = mtl_doc[coll_map["product_contents_cn"]].get( "collection_number") if usgs_collection_number is None: raise NotImplementedError( "Dataset has no collection number: pre-collection data is not supported." ) data_format = None if isinstance(coll_map["product_contents_of"], list): for leveln in coll_map["product_contents_of"]: if leveln in mtl_doc: data_format = mtl_doc[leveln]["output_format"] break else: data_format = mtl_doc[coll_map["product_contents_of"]]["output_format"] if data_format.upper() != "GEOTIFF": raise NotImplementedError( f"Only GTiff currently supported, got {data_format}") file_format = FileFormat.GeoTIFF # Assumed below. projection_params = None if isinstance(coll_map[leveln_key_prefix + "_projection_parameters"], list): for leveln in coll_map[leveln_key_prefix + "_projection_parameters"]: if leveln in mtl_doc: projection_params = mtl_doc[leveln] break else: projection_params = mtl_doc[coll_map[leveln_key_prefix + "_projection_parameters"]] if ("grid_cell_size_thermal" in projection_params and "grid_cell_size_reflective" in projection_params and (projection_params["grid_cell_size_reflective"] != projection_params["grid_cell_size_thermal"])): raise NotImplementedError( "reflective and thermal have different cell sizes") ground_sample_distance = min(value for name, value in projection_params.items() if name.startswith("grid_cell_size_")) leveln_product_id = None leveln_processed = None leveln_landsat_data_type = None if isinstance(coll_map[leveln_key_prefix + "_processing_record"], list): for leveln in coll_map[leveln_key_prefix + "_processing_record"]: if leveln in mtl_doc: leveln_product_id = mtl_doc[leveln]["landsat_product_id"] leveln_processed = mtl_doc[leveln]["date_product_generated"] leveln_landsat_data_type = mtl_doc[leveln]["processing_level"] break else: leveln_product_id = mtl_doc[coll_map[ leveln_key_prefix + "_processing_record"]]["landsat_product_id"] leveln_processed = mtl_doc[coll_map[leveln_key_prefix + "_processing_record"]][ "file_date"] # for C1 only leveln_landsat_data_type = mtl_doc[ coll_map["product_contents_of"]]["data_type"] with DatasetPrepare( metadata_path=output_yaml_path, dataset_location=ds_path, # Detministic ID based on USGS's product id (which changes when the scene is reprocessed by them) dataset_id=uuid.uuid5(USGS_UUID_NAMESPACE, leveln_product_id), naming_conventions="dea", ) as p: if source_telemetry: if producer != "ga.gov.au": raise NotImplementedError( "Only GA's L1 data is expected to have telemetry source data?" ) p.add_source_path(source_telemetry) p.platform = mtl_doc[coll_map["image_attributes"]]["spacecraft_id"] p.instrument = mtl_doc[coll_map["image_attributes"]]["sensor_id"] p.product_family = "level" + leveln_landsat_data_type[1] p.producer = producer p.datetime = "{}T{}".format( mtl_doc[coll_map["image_attributes"]]["date_acquired"], mtl_doc[coll_map["image_attributes"]]["scene_center_time"], ) p.processed = leveln_processed if collection_key == "C2": p.properties["landsat:data_type"] = leveln_landsat_data_type p.properties["odc:file_format"] = file_format p.properties["eo:gsd"] = ground_sample_distance cloud_cover = mtl_doc["image_attributes"]["cloud_cover"] # Cloud cover is -1 when missing (such as TIRS-only data) if cloud_cover != -1: p.properties["eo:cloud_cover"] = cloud_cover p.properties["eo:sun_azimuth"] = mtl_doc["image_attributes"][ "sun_azimuth"] p.properties["eo:sun_elevation"] = mtl_doc["image_attributes"][ "sun_elevation"] p.properties["landsat:collection_number"] = usgs_collection_number for section, fields in _COPYABLE_MTL_FIELDS[collection_key]: if section in mtl_doc: for field in fields: value = mtl_doc[section].get(field) if (value is not None and p.properties.get(f"landsat:{field}") is None): p.properties[f"landsat:{field}"] = value p.region_code = f"{p.properties['landsat:wrs_path']:03d}{p.properties['landsat:wrs_row']:03d}" org_collection_number = utils.get_collection_number( p.platform, p.producer, p.properties["landsat:collection_number"]) p.dataset_version = f"{org_collection_number}.0.{p.processed:%Y%m%d}" # NRT product? # Category is one of: T1, T2 or RT ('real time') if p.properties["landsat:collection_category"] == "RT": p.properties["dea:dataset_maturity"] = "nrt" band_aliases = get_band_alias_mappings(p.platform, p.instrument) for usgs_file_type, file_location in _iter_image_paths( mtl_doc[coll_map["product_contents_fn"]]): if usgs_file_type not in band_aliases: all_found = dict( _iter_image_paths( mtl_doc[coll_map["product_contents_fn"]])) raise ValueError( f"Band name {usgs_file_type!r} is not known among our aliases. " f"(All bands found in the dataset: {all_found!r})") p.note_measurement( band_aliases[usgs_file_type], file_location, relative_to_dataset_location=True, expand_valid_data=( usgs_file_type.startswith("band_") and ( # The older collection called quality a "band" "quality" not in usgs_file_type)), ) p.note_accessory_file("metadata:landsat_mtl", Path(mtl_filename)) return p.done(embed_location=embed_location)
def package( out_directory: Path, granule: Granule, included_products: Iterable[str] = DEFAULT_PRODUCTS, include_oa: bool = True, oa_resolution: Optional[Tuple[float, float]] = None, ) -> Tuple[UUID, Path]: """ Package an L2 product. :param include_oa: :param out_directory: The base directory for output datasets. A DEA-naming-conventions folder hierarchy will be created inside this folder. :param granule: Granule information. You probably want to make one with Granule.from_path() :param included_products: A list of imagery products to include in the package. Defaults to all products. :return: The dataset UUID and output metadata path """ included_products = tuple(s.lower() for s in included_products) with h5py.File(granule.wagl_hdf5, "r") as fid: granule_group = fid[granule.name] wagl_doc = _read_wagl_metadata(granule_group) with DatasetAssembler( out_directory.absolute(), # WAGL stamps a good, random ID already. dataset_id=granule.wagl_metadata.get("id"), naming_conventions="dea_s2" if ("sentinel" in wagl_doc["source_datasets"]["platform_id"].lower()) else "dea", ) as p: _apply_wagl_metadata(p, wagl_doc) # It's a GA ARD product. p.producer = "ga.gov.au" p.product_family = "ard" p.maturity = _determine_maturity( acq_date=p.datetime, processed=p.processed, wagl_doc=wagl_doc, ) if granule.source_level1_metadata is not None: # For historical consistency: we want to use the instrument that the source L1 product # came from, not the instruments reported from the WAGL doc. # # Eg. # Level 1 will say "OLI_TIRS", while wagl doc will say "OLI". # Our current C3 products say "OLI_TIRS" so we need to stay consistent. # (even though WAGL only *used* the OLI bands, it came from an OLI_TIRS observation) # # So delete our current wagl one, since we're adding a source dataset: if p.instrument is not None: del p.properties["eo:instrument"] p.add_source_dataset(granule.source_level1_metadata, auto_inherit_properties=True) # When level 1 is NRT, ARD is always NRT. if granule.source_level1_metadata.maturity == "nrt": p.maturity = "nrt" org_collection_number = utils.get_collection_number( p.platform, p.producer, p.properties.get("landsat:collection_number")) p.dataset_version = f"{org_collection_number}.2.1" p.region_code = _extract_reference_code(p, granule.name) _read_gqa_doc(p, granule.gqa_doc) _read_fmask_doc(p, granule.fmask_doc) if granule.tesp_doc: _take_software_versions(p, granule.tesp_doc) _unpack_products(p, included_products, granule_group) if include_oa: with do("Starting OA", heading=True): _unpack_observation_attributes( p, included_products, granule_group, infer_datetime_range=p.platform.startswith("landsat"), oa_resolution=oa_resolution, ) if granule.fmask_image: with do(f"Writing fmask from {granule.fmask_image} "): p.write_measurement( "oa:fmask", granule.fmask_image, expand_valid_data=False, overview_resampling=Resampling.mode, ) with do("Finishing package"): return p.done()