Beispiel #1
0
def prepare_and_write(
    ds_path: Path,
    output_yaml_path: Path,
    source_telemetry: Path = None,
    # TODO: Can we infer producer automatically? This is bound to cause mistakes othewise
    producer="usgs.gov",
) -> Tuple[uuid.UUID, Path]:
    """
    Prepare an eo3 metadata file for a Level1 dataset.

    Input dataset path can be a folder or a tar file.
    """
    mtl_doc, mtl_filename = get_mtl_content(ds_path)
    if not mtl_doc:
        raise ValueError(f"No MTL file found for {ds_path}")

    usgs_collection_number = mtl_doc["metadata_file_info"].get(
        "collection_number")
    if usgs_collection_number is None:
        raise NotImplementedError(
            "Dataset has no collection number: pre-collection data is not supported."
        )

    data_format = mtl_doc["product_metadata"]["output_format"]
    if data_format.upper() != "GEOTIFF":
        raise NotImplementedError(
            f"Only GTiff currently supported, got {data_format}")
    file_format = FileFormat.GeoTIFF

    # Assumed below.
    projection_params = mtl_doc["projection_parameters"]
    if ("grid_cell_size_thermal" in projection_params
            and "grid_cell_size_reflective" in projection_params
            and (projection_params["grid_cell_size_reflective"] !=
                 projection_params["grid_cell_size_thermal"])):
        raise NotImplementedError(
            "reflective and thermal have different cell sizes")
    ground_sample_distance = min(value
                                 for name, value in projection_params.items()
                                 if name.startswith("grid_cell_size_"))

    with DatasetAssembler(
            metadata_path=output_yaml_path,
            dataset_location=ds_path,
            # Detministic ID based on USGS's product id (which changes when the scene is reprocessed by them)
            dataset_id=uuid.uuid5(
                USGS_UUID_NAMESPACE,
                mtl_doc["metadata_file_info"]["landsat_product_id"]),
            naming_conventions="dea",
            if_exists=IfExists.Overwrite,
    ) as p:
        if source_telemetry:
            # Only GA's data has source telemetry...
            assert producer == "ga.gov.au"
            p.add_source_path(source_telemetry)

        p.platform = mtl_doc["product_metadata"]["spacecraft_id"]
        p.instrument = mtl_doc["product_metadata"]["sensor_id"]
        p.product_family = "level1"
        p.producer = producer
        p.datetime = "{}T{}".format(
            mtl_doc["product_metadata"]["date_acquired"],
            mtl_doc["product_metadata"]["scene_center_time"],
        )
        p.processed = mtl_doc["metadata_file_info"]["file_date"]
        p.properties["odc:file_format"] = file_format
        p.properties["eo:gsd"] = ground_sample_distance
        cloud_cover = mtl_doc["image_attributes"]["cloud_cover"]
        # Cloud cover is -1 when missing (such as TIRS-only data)
        if cloud_cover != -1:
            p.properties["eo:cloud_cover"] = cloud_cover
        p.properties["eo:sun_azimuth"] = mtl_doc["image_attributes"][
            "sun_azimuth"]
        p.properties["eo:sun_elevation"] = mtl_doc["image_attributes"][
            "sun_elevation"]
        p.properties["landsat:collection_number"] = usgs_collection_number
        for section, fields in _COPYABLE_MTL_FIELDS:
            for field in fields:
                value = mtl_doc[section].get(field)
                if value is not None:
                    p.properties[f"landsat:{field}"] = value

        p.region_code = f"{p.properties['landsat:wrs_path']:03d}{p.properties['landsat:wrs_row']:03d}"
        org_collection_number = utils.get_collection_number(
            p.producer, p.properties["landsat:collection_number"])
        p.dataset_version = f"{org_collection_number}.0.{p.processed:%Y%m%d}"

        # NRT product?
        # Category is one of: T1, T2 or RT ('real time')
        if p.properties["landsat:collection_category"] == "RT":
            p.properties["odc:dataset_maturity"] = "nrt"

        band_aliases = get_band_alias_mappings(p.platform, p.instrument)
        for usgs_band_id, file_location in _iter_bands_paths(mtl_doc):
            p.note_measurement(
                band_aliases[usgs_band_id],
                file_location,
                relative_to_dataset_location=True,
            )

        p.add_accessory_file("metadata:landsat_mtl", Path(mtl_filename))

        return p.done()
def prepare_and_write(
    ds_path: Path,
    collection_location: Path,
    # TODO: Can we infer producer automatically? This is bound to cause mistakes othewise
    producer="usgs.gov",
) -> Tuple[uuid.UUID, Path]:
    """
    Prepare an eo3 metadata file for a Level2 dataset.

    Input dataset path can be a folder or a tar file.
    """
    mtl_doc, mtl_filename = get_mtl_content(
        ds_path, root_element="landsat_metadata_file")
    if not mtl_doc:
        raise ValueError(f"No MTL file found for {ds_path}")

    usgs_collection_number = mtl_doc["product_contents"].get(
        "collection_number")
    if usgs_collection_number is None:
        raise NotImplementedError(
            "Dataset has no collection number: pre-collection data is not supported."
        )

    data_format = mtl_doc["product_contents"]["output_format"]
    if data_format.upper() != "GEOTIFF":
        raise NotImplementedError(
            f"Only GTiff currently supported, got {data_format}")
    file_format = FileFormat.GeoTIFF

    # Assumed below.
    if (mtl_doc["projection_attributes"]["grid_cell_size_reflective"] !=
            mtl_doc["projection_attributes"]["grid_cell_size_thermal"]):
        raise NotImplementedError(
            "reflective and thermal have different cell sizes")
    ground_sample_distance = min(
        value for name, value in mtl_doc["projection_attributes"].items()
        if name.startswith("grid_cell_size_"))

    with DatasetAssembler(
            collection_location=collection_location,
            # Detministic ID based on USGS's product id (which changes when the scene is reprocessed by them)
            dataset_id=uuid.uuid5(
                USGS_UUID_NAMESPACE,
                mtl_doc["product_contents"]["landsat_product_id"]),
            naming_conventions="dea",
            if_exists=IfExists.Overwrite,
    ) as p:
        p.platform = mtl_doc["image_attributes"]["spacecraft_id"]
        p.instrument = mtl_doc["image_attributes"]["sensor_id"]
        p.product_family = "level2"
        p.producer = producer
        p.datetime = "{}T{}".format(
            mtl_doc["image_attributes"]["date_acquired"],
            mtl_doc["image_attributes"]["scene_center_time"],
        )
        # p.processed = mtl_doc["metadata_file_info"]["file_date"]
        p.processed = mtl_doc['level2_processing_record'][
            'date_product_generated']
        p.properties["odc:file_format"] = file_format
        p.properties["eo:gsd"] = ground_sample_distance
        p.properties["eo:cloud_cover"] = mtl_doc["image_attributes"][
            "cloud_cover"]
        p.properties["eo:sun_azimuth"] = mtl_doc["image_attributes"][
            "sun_azimuth"]
        p.properties["eo:sun_elevation"] = mtl_doc["image_attributes"][
            "sun_elevation"]
        p.properties["landsat:collection_number"] = usgs_collection_number
        for section, fields in _COPYABLE_MTL_FIELDS:
            for field in fields:
                value = mtl_doc[section].get(field)
                if value is not None:
                    p.properties[f"landsat:{field}"] = value

        p.region_code = f"{p.properties['landsat:wrs_path']:03d}{p.properties['landsat:wrs_row']:03d}"
        org_collection_number = utils.get_collection_number(
            p.producer, p.properties["landsat:collection_number"])
        p.dataset_version = f"{org_collection_number}.0.{p.processed:%Y%m%d}"

        band_aliases = get_band_alias_mappings(p.platform, p.instrument)

        bands = list(_iter_bands_paths(mtl_doc))
        # add to do one band - remove this to do all the bands
        # bands = bands[0:1]
        for usgs_band_id, file_location in bands:
            # p.note_measurement(
            #     band_aliases[usgs_band_id],
            #     file_location,
            #     relative_to_dataset_location=True,
            # )
            path_file = os.path.join(ds_path, file_location)
            p.write_measurement(band_aliases[usgs_band_id], path_file)

        p.add_accessory_file("metadata:landsat_mtl", Path(mtl_filename))

        return p.done()
Beispiel #3
0
def package(
    out_directory: Path,
    granule: Granule,
    included_products: Iterable[str] = DEFAULT_PRODUCTS,
    include_oa: bool = True,
) -> Tuple[UUID, Path]:
    """
    Package an L2 product.

    :param include_oa:

    :param out_directory:
        The base directory for output datasets. A DEA-naming-conventions folder hierarchy
        will be created inside this folder.

    :param granule:
        Granule information. You probably want to make one with Granule.from_path()

    :param included_products:
        A list of imagery products to include in the package.
        Defaults to all products.

    :return:
        The dataset UUID and output metadata path
    """
    included_products = tuple(s.lower() for s in included_products)

    with h5py.File(granule.wagl_hdf5, "r") as fid:
        granule_group = fid[granule.name]

        with DatasetAssembler(
                out_directory,
                # WAGL stamps a good, random ID already.
                dataset_id=granule.wagl_metadata.get("id"),
                naming_conventions="dea",
        ) as p:
            level1 = granule.source_level1_metadata
            p.add_source_dataset(level1, auto_inherit_properties=True)

            # It's a GA ARD product.
            p.producer = "ga.gov.au"
            p.product_family = "ard"

            org_collection_number = utils.get_collection_number(
                p.producer, p.properties["landsat:collection_number"])
            # TODO: wagl's algorithm version should determine our dataset version number, right?
            p.dataset_version = f"{org_collection_number}.0.0"
            p.region_code = _extract_reference_code(p, granule.name)

            _read_wagl_metadata(p, granule_group)
            _read_gqa_doc(p, granule.gqa_doc)
            _read_fmask_doc(p, granule.fmask_doc)

            _unpack_products(p, included_products, granule_group)

            if include_oa:
                with do(f"Starting OA", heading=True):
                    _unpack_observation_attributes(
                        p,
                        included_products,
                        granule_group,
                        infer_datetime_range=level1.platform.startswith(
                            "landsat"),
                    )
                if granule.fmask_image:
                    with do(f"Writing fmask from {granule.fmask_image} "):
                        p.write_measurement(
                            "oa:fmask",
                            granule.fmask_image,
                            expand_valid_data=False,
                            overview_resampling=Resampling.mode,
                        )

            with do("Finishing package"):
                return p.done()
def package(
    out_directory: Path,
    granule: Granule,
    *,
    product_maturity: ProductMaturity = ProductMaturity.stable,
    included_products: Iterable[str] = DEFAULT_PRODUCTS,
    include_oa: bool = True,
    oa_resolution: Optional[Tuple[float, float]] = None,
    contiguity_resolution: Optional[Tuple[float, float]] = None,
) -> Tuple[UUID, Path]:
    """
    Package an L2 product.

    :param include_oa:

    :param out_directory:
        The base directory for output datasets. A DEA-naming-conventions folder hierarchy
        will be created inside this folder.

    :param granule:
        Granule information. You probably want to make one with Granule.from_path()

    :param included_products:
        A list of imagery products to include in the package.
        Defaults to all products.

    :return:
        The dataset UUID and output metadata path
    """
    included_products = tuple(s.lower() for s in included_products)

    with h5py.File(granule.wagl_hdf5, "r") as fid:
        granule_group = fid[granule.name]

        wagl_doc = _read_wagl_metadata(granule_group)

        with DatasetAssembler(
                out_directory.absolute(),
                # WAGL stamps a good, random ID already.
                dataset_id=granule.wagl_metadata.get("id"),
                naming_conventions="dea_s2" if
            ("sentinel"
             in wagl_doc["source_datasets"]["platform_id"].lower()) else "dea",
        ) as p:
            _apply_wagl_metadata(p, wagl_doc)

            # It's a GA ARD product.
            p.producer = "ga.gov.au"
            p.product_family = "ard"
            p.maturity = _determine_maturity(
                acq_date=p.datetime,
                processed=p.processed,
                wagl_doc=wagl_doc,
            )

            # We don't bother including product maturity if it's stable, for consistency with old datasets.
            # Stable is the assumed default.
            if product_maturity is not ProductMaturity.stable:
                p.product_maturity = product_maturity

            if granule.source_level1_metadata is not None:
                # For historical consistency: we want to use the instrument that the source L1 product
                # came from, not the instruments reported from the WAGL doc.
                #
                # Eg.
                #     Level 1 will say "OLI_TIRS", while wagl doc will say "OLI".
                #     Our current C3 products say "OLI_TIRS" so we need to stay consistent.
                #     (even though WAGL only *used* the OLI bands, it came from an OLI_TIRS observation)
                #
                # So delete our current wagl one, since we're adding a source dataset:
                if p.instrument is not None:
                    del p.properties["eo:instrument"]

                p.add_source_dataset(granule.source_level1_metadata,
                                     auto_inherit_properties=True)
                # When level 1 is NRT, ARD is always NRT.
                if granule.source_level1_metadata.maturity == "nrt":
                    p.maturity = "nrt"

            org_collection_number = utils.get_collection_number(
                p.platform, p.producer,
                p.properties.get("landsat:collection_number"))

            p.dataset_version = f"{org_collection_number}.2.1"
            p.region_code = _extract_reference_code(p, granule.name)

            _read_gqa_doc(p, granule.gqa_doc)
            _read_fmask_doc(p, granule.fmask_doc)
            if granule.s2cloudless_doc:
                _read_s2cloudless_doc(p, granule.s2cloudless_doc)
            if granule.tesp_doc:
                _take_software_versions(p, granule.tesp_doc)

            _unpack_products(p, included_products, granule_group)

            if include_oa:
                with sub_product("oa", p):
                    with do("Starting OA", heading=True):
                        resolution_groups = {
                            tuple(granule_group[k].attrs["resolution"]):
                            granule_group[k]
                            for k in granule_group.keys()
                            if k.startswith("RES-GROUP-")
                        }

                        # Use the highest resolution as the ground sample distance.
                        if "eo:gsd" in p.properties:
                            del p.properties["eo:gsd"]
                        p.properties["eo:gsd"] = min(
                            min(resolution_groups.keys()))

                        _unpack_observation_attributes(
                            p,
                            get_oa_resolution_group(resolution_groups,
                                                    p.platform, oa_resolution),
                        )

                    infer_datetime_range = p.platform.startswith("landsat")

                    with do("Contiguity", timedelta=infer_datetime_range):
                        # For landsat, we want the "common" band resolution, not panchromatic. Pick lower res.
                        if contiguity_resolution is not None:
                            contiguity_res = contiguity_resolution
                        elif p.platform.startswith("landsat"):
                            contiguity_res = max(resolution_groups.keys())
                        elif p.platform.startswith("sentinel"):
                            contiguity_res = (10.0, 10.0)

                        if contiguity_res not in resolution_groups:
                            raise ValueError(
                                f"No resolution group {contiguity_res} found in {granule.name}."
                                f"Options: {list(resolution_groups.keys())}")
                        contiguity_res_grp = resolution_groups[contiguity_res]

                        timedelta_data = (
                            contiguity_res_grp["SATELLITE-SOLAR/TIME-DELTA"]
                            if infer_datetime_range else None)
                        _create_contiguity(
                            p,
                            included_products,
                            resolution_yx=tuple(
                                contiguity_res_grp.attrs["resolution"]),
                            timedelta_data=timedelta_data,
                        )

                    if granule.fmask_image:
                        with do(f"Writing fmask from {granule.fmask_image} "):
                            p.write_measurement(
                                "oa:fmask",
                                granule.fmask_image,
                                expand_valid_data=False,
                                overview_resampling=Resampling.mode,
                                # Because of our strange sub-products and filename standards, we want the
                                # 'oa_' prefix to be included in the recorded band metadata,
                                # but not in its filename.
                                # So we manually calculate a filename without the extra prefix.
                                path=p.names.measurement_filename("fmask"),
                            )

                    if granule.s2cloudless_prob:
                        with do(f"Writing s2cloudless probability from {granule.s2cloudless_prob} "
                                ):
                            p.write_measurement(
                                "oa:s2cloudless_prob",
                                granule.s2cloudless_prob,
                                expand_valid_data=False,
                                overview_resampling=Resampling.bilinear,
                                path=p.names.measurement_filename(
                                    "s2cloudless-prob"),
                            )

                    if granule.s2cloudless_mask:
                        with do(f"Writing s2cloudless mask from {granule.s2cloudless_mask} "
                                ):
                            p.write_measurement(
                                "oa:s2cloudless_mask",
                                granule.s2cloudless_mask,
                                expand_valid_data=False,
                                overview_resampling=Resampling.mode,
                                path=p.names.measurement_filename(
                                    "s2cloudless-mask"),
                            )

            with do("Finishing package"):
                return p.done()
Beispiel #5
0
def package_non_standard(outdir, granule):
    """
    yaml creator for the ard pipeline.
    """

    outdir = Path(outdir) / granule.name
    indir = granule.wagl_hdf5.parent

    if indir.is_file():
        shutil.copy(indir, outdir)
    else:
        shutil.copytree(indir, outdir)

    wagl_h5 = outdir / str(granule.name + ".wagl.h5")
    dataset_doc = outdir / str(granule.name + ".yaml")
    boolean_h5 = Path(str(wagl_h5).replace("wagl.h5", "converted.datasets.h5"))
    fmask_img = outdir / str(granule.name + ".fmask.img")

    f = h5py.File(boolean_h5)

    with DatasetAssembler(metadata_path=dataset_doc,
                          naming_conventions="dea") as da:
        level1 = granule.source_level1_metadata
        da.add_source_dataset(level1,
                              auto_inherit_properties=True,
                              inherit_geometry=True)
        da.product_family = "ard"
        da.producer = "ga.gov.au"
        da.properties["odc:file_format"] = "HDF5"

        with h5py.File(wagl_h5, "r") as fid:
            img_paths = [ppjoin(fid.name, pth) for pth in find(fid, "IMAGE")]
            granule_group = fid[granule.name]

            try:
                wagl_path, *ancil_paths = [
                    pth for pth in find(granule_group, "SCALAR")
                    if "METADATA" in pth
                ]
            except ValueError:
                raise ValueError("No nbar metadata found in granule")

            [wagl_doc] = loads_yaml(granule_group[wagl_path][()])

            da.processed = get_path(wagl_doc,
                                    ("system_information", "time_processed"))

            platform = da.properties["eo:platform"]
            if platform == "sentinel-2a" or platform == "sentinel-2b":
                org_collection_number = 3
            else:
                org_collection_number = utils.get_collection_number(
                    platform, da.producer,
                    da.properties["landsat:collection_number"])

            da.dataset_version = f"{org_collection_number}.1.0"
            da.region_code = eodatasets3.wagl._extract_reference_code(
                da, granule.name)

            eodatasets3.wagl._read_gqa_doc(da, granule.gqa_doc)
            eodatasets3.wagl._read_fmask_doc(da, granule.fmask_doc)

            with rasterio.open(fmask_img) as ds:
                fmask_layer = "/{}/OA_FMASK/oa_fmask".format(granule.name)
                data = ds.read(1)
                fmask_ds = f.create_dataset(fmask_layer,
                                            data=data,
                                            compression="lzf",
                                            shuffle=True)
                fmask_ds.attrs["crs_wkt"] = ds.crs.wkt
                fmask_ds.attrs["geotransform"] = ds.transform.to_gdal()

                fmask_ds.attrs[
                    "description"] = "Converted from ERDAS Imagine format to HDF5 to work with the limitations of varied formats within ODC"  # noqa E501

                grid_spec = images.GridSpec(
                    shape=ds.shape,
                    transform=ds.transform,
                    crs=CRS.from_wkt(fmask_ds.attrs["crs_wkt"]),
                )

                measurement_name = "oa_fmask"

                pathname = str(outdir.joinpath(boolean_h5))

                no_data = fmask_ds.attrs.get("no_data_value")
                if no_data is None:
                    no_data = float("nan")

                da._measurements.record_image(
                    measurement_name,
                    grid_spec,
                    pathname,
                    fmask_ds[:],
                    layer="/{}".format(fmask_layer),
                    nodata=no_data,
                    expand_valid_data=False,
                )

            for pathname in img_paths:
                ds = fid[pathname]
                ds_path = Path(ds.name)

                # eodatasets internally uses this grid spec to group image datasets
                grid_spec = images.GridSpec(
                    shape=ds.shape,
                    transform=Affine.from_gdal(*ds.attrs["geotransform"]),
                    crs=CRS.from_wkt(ds.attrs["crs_wkt"]),
                )

                # product group name; lambertian, nbar, nbart, oa
                if "STANDARDISED-PRODUCTS" in str(ds_path):
                    product_group = ds_path.parent.name
                elif "INTERPOLATED-ATMOSPHERIC-COEFFICIENTS" in str(ds_path):
                    product_group = "oa_{}".format(ds_path.parent.name)
                else:
                    product_group = "oa"

                # spatial resolution group
                # used to separate measurements with the same name
                resolution_group = "rg{}".format(
                    ds_path.parts[2].split("-")[-1])

                measurement_name = ("_".join([
                    resolution_group,
                    product_group,
                    ds.attrs.get("alias", ds_path.name),
                ]).replace("-",
                           "_").lower())  # we don't wan't hyphens in odc land

                # include this band in defining the valid data bounds?
                include = True if "nbart" in measurement_name else False

                no_data = ds.attrs.get("no_data_value")
                if no_data is None:
                    no_data = float("nan")

                # if we are of type bool, we'll have to convert just for GDAL
                if ds.dtype.name == "bool":
                    pathname = str(outdir.joinpath(boolean_h5))
                    out_ds = f.create_dataset(
                        measurement_name,
                        data=np.uint8(ds[:]),
                        compression="lzf",
                        shuffle=True,
                        chunks=ds.chunks,
                    )

                    for k, v in ds.attrs.items():
                        out_ds.attrs[k] = v

                    da._measurements.record_image(
                        measurement_name,
                        grid_spec,
                        pathname,
                        out_ds[:],
                        layer="/{}".format(out_ds.name),
                        nodata=no_data,
                        expand_valid_data=include,
                    )
                else:
                    pathname = str(outdir.joinpath(wagl_h5))

                    # work around as note_measurement doesn't allow us to specify the gridspec
                    da._measurements.record_image(
                        measurement_name,
                        grid_spec,
                        pathname,
                        ds[:],
                        layer="/{}".format(ds.name),
                        nodata=no_data,
                        expand_valid_data=include,
                    )

        # the longest part here is generating the valid data bounds vector
        # landsat 7 post SLC-OFF can take a really long time
        return da.done()
def prepare_and_write(
    ds_path: Path,
    output_yaml_path: Path,
    source_telemetry: Path = None,
    # TODO: Can we infer producer automatically? This is bound to cause mistakes othewise
    producer="usgs.gov",
    embed_location: bool = False,
) -> Tuple[uuid.UUID, Path]:
    """
    Prepare an eo3 metadata file for a Level1 dataset.

    Input dataset path can be a folder or a tar file.
    """
    mtl_doc, root_element, mtl_filename = get_mtl_content(ds_path)
    if not mtl_doc:
        raise ValueError(f"No MTL file found for {ds_path}")
    collection_key = "C2" if root_element == "landsat_metadata_file" else "C1"
    leveln_key_prefix = "leveln" if collection_key == "C2" else "level1"
    coll_map = LANDSAT_MTL_MAP[collection_key]
    usgs_collection_number = mtl_doc[coll_map["product_contents_cn"]].get(
        "collection_number")
    if usgs_collection_number is None:
        raise NotImplementedError(
            "Dataset has no collection number: pre-collection data is not supported."
        )

    data_format = None
    if isinstance(coll_map["product_contents_of"], list):
        for leveln in coll_map["product_contents_of"]:
            if leveln in mtl_doc:
                data_format = mtl_doc[leveln]["output_format"]
                break
    else:
        data_format = mtl_doc[coll_map["product_contents_of"]]["output_format"]
    if data_format.upper() != "GEOTIFF":
        raise NotImplementedError(
            f"Only GTiff currently supported, got {data_format}")
    file_format = FileFormat.GeoTIFF

    # Assumed below.
    projection_params = None
    if isinstance(coll_map[leveln_key_prefix + "_projection_parameters"],
                  list):
        for leveln in coll_map[leveln_key_prefix + "_projection_parameters"]:
            if leveln in mtl_doc:
                projection_params = mtl_doc[leveln]
                break
    else:
        projection_params = mtl_doc[coll_map[leveln_key_prefix +
                                             "_projection_parameters"]]
    if ("grid_cell_size_thermal" in projection_params
            and "grid_cell_size_reflective" in projection_params
            and (projection_params["grid_cell_size_reflective"] !=
                 projection_params["grid_cell_size_thermal"])):
        raise NotImplementedError(
            "reflective and thermal have different cell sizes")
    ground_sample_distance = min(value
                                 for name, value in projection_params.items()
                                 if name.startswith("grid_cell_size_"))

    leveln_product_id = None
    leveln_processed = None
    leveln_landsat_data_type = None
    if isinstance(coll_map[leveln_key_prefix + "_processing_record"], list):
        for leveln in coll_map[leveln_key_prefix + "_processing_record"]:
            if leveln in mtl_doc:
                leveln_product_id = mtl_doc[leveln]["landsat_product_id"]
                leveln_processed = mtl_doc[leveln]["date_product_generated"]
                leveln_landsat_data_type = mtl_doc[leveln]["processing_level"]
                break
    else:
        leveln_product_id = mtl_doc[coll_map[
            leveln_key_prefix + "_processing_record"]]["landsat_product_id"]
        leveln_processed = mtl_doc[coll_map[leveln_key_prefix +
                                            "_processing_record"]][
                                                "file_date"]  # for C1 only
        leveln_landsat_data_type = mtl_doc[
            coll_map["product_contents_of"]]["data_type"]

    with DatasetPrepare(
            metadata_path=output_yaml_path,
            dataset_location=ds_path,
            # Detministic ID based on USGS's product id (which changes when the scene is reprocessed by them)
            dataset_id=uuid.uuid5(USGS_UUID_NAMESPACE, leveln_product_id),
            naming_conventions="dea",
    ) as p:
        if source_telemetry:
            if producer != "ga.gov.au":
                raise NotImplementedError(
                    "Only GA's L1 data is expected to have telemetry source data?"
                )
            p.add_source_path(source_telemetry)

        p.platform = mtl_doc[coll_map["image_attributes"]]["spacecraft_id"]
        p.instrument = mtl_doc[coll_map["image_attributes"]]["sensor_id"]
        p.product_family = "level" + leveln_landsat_data_type[1]
        p.producer = producer
        p.datetime = "{}T{}".format(
            mtl_doc[coll_map["image_attributes"]]["date_acquired"],
            mtl_doc[coll_map["image_attributes"]]["scene_center_time"],
        )
        p.processed = leveln_processed
        if collection_key == "C2":
            p.properties["landsat:data_type"] = leveln_landsat_data_type
        p.properties["odc:file_format"] = file_format
        p.properties["eo:gsd"] = ground_sample_distance
        cloud_cover = mtl_doc["image_attributes"]["cloud_cover"]
        # Cloud cover is -1 when missing (such as TIRS-only data)
        if cloud_cover != -1:
            p.properties["eo:cloud_cover"] = cloud_cover
        p.properties["eo:sun_azimuth"] = mtl_doc["image_attributes"][
            "sun_azimuth"]
        p.properties["eo:sun_elevation"] = mtl_doc["image_attributes"][
            "sun_elevation"]
        p.properties["landsat:collection_number"] = usgs_collection_number
        for section, fields in _COPYABLE_MTL_FIELDS[collection_key]:
            if section in mtl_doc:
                for field in fields:
                    value = mtl_doc[section].get(field)
                    if (value is not None
                            and p.properties.get(f"landsat:{field}") is None):
                        p.properties[f"landsat:{field}"] = value

        p.region_code = f"{p.properties['landsat:wrs_path']:03d}{p.properties['landsat:wrs_row']:03d}"
        org_collection_number = utils.get_collection_number(
            p.platform, p.producer, p.properties["landsat:collection_number"])
        p.dataset_version = f"{org_collection_number}.0.{p.processed:%Y%m%d}"

        # NRT product?
        # Category is one of: T1, T2 or RT ('real time')
        if p.properties["landsat:collection_category"] == "RT":
            p.properties["dea:dataset_maturity"] = "nrt"

        band_aliases = get_band_alias_mappings(p.platform, p.instrument)

        for usgs_file_type, file_location in _iter_image_paths(
                mtl_doc[coll_map["product_contents_fn"]]):
            if usgs_file_type not in band_aliases:
                all_found = dict(
                    _iter_image_paths(
                        mtl_doc[coll_map["product_contents_fn"]]))
                raise ValueError(
                    f"Band name {usgs_file_type!r} is not known among our aliases. "
                    f"(All bands found in the dataset: {all_found!r})")
            p.note_measurement(
                band_aliases[usgs_file_type],
                file_location,
                relative_to_dataset_location=True,
                expand_valid_data=(
                    usgs_file_type.startswith("band_") and (
                        # The older collection called quality a "band"
                        "quality" not in usgs_file_type)),
            )

        p.note_accessory_file("metadata:landsat_mtl", Path(mtl_filename))
        return p.done(embed_location=embed_location)
Beispiel #7
0
def package(
    out_directory: Path,
    granule: Granule,
    included_products: Iterable[str] = DEFAULT_PRODUCTS,
    include_oa: bool = True,
    oa_resolution: Optional[Tuple[float, float]] = None,
) -> Tuple[UUID, Path]:
    """
    Package an L2 product.

    :param include_oa:

    :param out_directory:
        The base directory for output datasets. A DEA-naming-conventions folder hierarchy
        will be created inside this folder.

    :param granule:
        Granule information. You probably want to make one with Granule.from_path()

    :param included_products:
        A list of imagery products to include in the package.
        Defaults to all products.

    :return:
        The dataset UUID and output metadata path
    """
    included_products = tuple(s.lower() for s in included_products)

    with h5py.File(granule.wagl_hdf5, "r") as fid:
        granule_group = fid[granule.name]

        wagl_doc = _read_wagl_metadata(granule_group)

        with DatasetAssembler(
                out_directory.absolute(),
                # WAGL stamps a good, random ID already.
                dataset_id=granule.wagl_metadata.get("id"),
                naming_conventions="dea_s2" if
            ("sentinel"
             in wagl_doc["source_datasets"]["platform_id"].lower()) else "dea",
        ) as p:
            _apply_wagl_metadata(p, wagl_doc)

            # It's a GA ARD product.
            p.producer = "ga.gov.au"
            p.product_family = "ard"
            p.maturity = _determine_maturity(
                acq_date=p.datetime,
                processed=p.processed,
                wagl_doc=wagl_doc,
            )
            if granule.source_level1_metadata is not None:
                # For historical consistency: we want to use the instrument that the source L1 product
                # came from, not the instruments reported from the WAGL doc.
                #
                # Eg.
                #     Level 1 will say "OLI_TIRS", while wagl doc will say "OLI".
                #     Our current C3 products say "OLI_TIRS" so we need to stay consistent.
                #     (even though WAGL only *used* the OLI bands, it came from an OLI_TIRS observation)
                #
                # So delete our current wagl one, since we're adding a source dataset:
                if p.instrument is not None:
                    del p.properties["eo:instrument"]

                p.add_source_dataset(granule.source_level1_metadata,
                                     auto_inherit_properties=True)
                # When level 1 is NRT, ARD is always NRT.
                if granule.source_level1_metadata.maturity == "nrt":
                    p.maturity = "nrt"

            org_collection_number = utils.get_collection_number(
                p.platform, p.producer,
                p.properties.get("landsat:collection_number"))

            p.dataset_version = f"{org_collection_number}.2.1"
            p.region_code = _extract_reference_code(p, granule.name)

            _read_gqa_doc(p, granule.gqa_doc)
            _read_fmask_doc(p, granule.fmask_doc)
            if granule.tesp_doc:
                _take_software_versions(p, granule.tesp_doc)

            _unpack_products(p, included_products, granule_group)

            if include_oa:
                with do("Starting OA", heading=True):
                    _unpack_observation_attributes(
                        p,
                        included_products,
                        granule_group,
                        infer_datetime_range=p.platform.startswith("landsat"),
                        oa_resolution=oa_resolution,
                    )
                if granule.fmask_image:
                    with do(f"Writing fmask from {granule.fmask_image} "):
                        p.write_measurement(
                            "oa:fmask",
                            granule.fmask_image,
                            expand_valid_data=False,
                            overview_resampling=Resampling.mode,
                        )

            with do("Finishing package"):
                return p.done()