def wagl_unpack(scene, granule, h5group, outdir):
    """
    Unpack and package the NBAR and NBART products.
    """
    # listing of all datasets of IMAGE CLASS type
    img_paths = find(h5group, 'IMAGE')

    for product in PRODUCTS:
        for pathname in [p for p in img_paths if '/{}/'.format(product) in p]:

            dataset = h5group[pathname]
            if dataset.attrs['band_name'] == 'BAND-9':
                # TODO re-work so that a valid BAND-9 from another sensor isn't skipped
                continue

            acqs = scene.get_acquisitions(group=pathname.split('/')[0],
                                          granule=granule)
            acq = [a for a in acqs if
                   a.band_name == dataset.attrs['band_name']][0]

            # base_dir = pjoin(splitext(basename(acq.pathname))[0], granule)
            base_fname = '{}.TIF'.format(splitext(basename(acq.uri))[0])
            match_dict = PATTERN.match(base_fname).groupdict()
            fname = '{}{}_{}{}'.format(match_dict.get('prefix'), product,
                                       match_dict.get('band_name'),
                                       match_dict.get('extension'))
            out_fname = pjoin(outdir,
                              # base_dir.replace('L1C', 'ARD'),
                              # granule.replace('L1C', 'ARD'),
                              product,
                              fname.replace('L1C', 'ARD'))

            # output
            if not exists(dirname(out_fname)):
                os.makedirs(dirname(out_fname))

            write_img(dataset, out_fname, cogtif=True, levels=LEVELS,
                      nodata=dataset.attrs['no_data_value'],
                      geobox=GriddedGeoBox.from_dataset(dataset),
                      resampling=Resampling.nearest,
                      options={'blockxsize': dataset.chunks[1],
                               'blockysize': dataset.chunks[0],
                               'compress': 'deflate',
                               'zlevel': 4})

    # retrieve metadata
    scalar_paths = find(h5group, 'SCALAR')
    pathname = [pth for pth in scalar_paths if 'NBAR-METADATA' in pth][0]
    tags = yaml.load(h5group[pathname][()])
    return tags
Beispiel #2
0
def table_results(table_group,
                  compression=H5CompressionFilter.LZF,
                  filter_opts=None):
    """
    Combine the residual results of each TABLE Dataset into a
    single TABLE Dataset.
    """
    # potentially could just use visit...
    paths = find(table_group, 'TABLE')

    equivalent = []
    products = []
    name = []

    for pth in paths:
        dset = table_group[pth]
        equivalent.append(dset.attrs['equal'])
        products.append(pbasename(dset.parent.name))
        name.append(pbasename(dset.name))

    df = pandas.DataFrame({
        'product': products,
        'dataset_name': name,
        'equivalent': equivalent
    })

    # output
    write_dataframe(df,
                    'TABLE-EQUIVALENCY',
                    table_group,
                    compression,
                    title='EQUIVALENCY-RESULTS',
                    filter_opts=filter_opts)
Beispiel #3
0
def scalar_results(scalar_group, compression=H5CompressionFilter.LZF, filter_opts=None):
    """
    Combine the residual results of each SCALAR Dataset into a
    single TABLE Dataset.
    """
    # potentially could just use visit...
    paths = find(scalar_group, "SCALAR")

    equivalent = []
    products = []
    name = []

    for pth in paths:
        dset = scalar_group[pth]
        equivalent.append(dset[()])
        products.append(pbasename(dset.parent.name))
        name.append(pbasename(dset.name))

    df = pandas.DataFrame(
        {"product": products, "dataset_name": name, "equivalent": equivalent}
    )

    # output
    write_dataframe(
        df,
        "SCALAR-EQUIVALENCY",
        scalar_group,
        compression,
        title="EQUIVALENCY-RESULTS",
        filter_opts=filter_opts,
    )
Beispiel #4
0
def image_results(image_group,
                  compression=H5CompressionFilter.LZF,
                  filter_opts=None):
    """
    Combine the residual results of each IMAGE Dataset into a
    single TABLE Dataset.
    """
    # potentially could just use visit...
    img_paths = find(image_group, 'IMAGE')

    min_ = []
    max_ = []
    percent = []
    pct_90 = []
    pct_99 = []
    resid_paths = []
    hist_paths = []
    chist_paths = []
    products = []
    name = []

    for pth in img_paths:
        hist_pth = pth.replace('RESIDUALS', 'FREQUENCY-DISTRIBUTIONS')
        chist_pth = pth.replace('RESIDUALS', 'CUMULATIVE-DISTRIBUTIONS')
        resid_paths.append(ppjoin(image_group.name, pth))
        hist_paths.append(ppjoin(image_group.name, hist_pth))
        chist_paths.append(ppjoin(image_group.name, chist_pth))

        dset = image_group[pth]
        min_.append(dset.attrs['min_residual'])
        max_.append(dset.attrs['max_residual'])
        percent.append(dset.attrs['percent_difference'])
        products.append(pbasename(dset.parent.name))
        name.append(pbasename(dset.name))

        dset = image_group[chist_pth]
        pct_90.append(dset.attrs['90th_percentile'])
        pct_99.append(dset.attrs['99th_percentile'])

    df = pandas.DataFrame({
        'product': products,
        'dataset_name': name,
        'min_residual': min_,
        'max_residual': max_,
        'percent_difference': percent,
        '90th_percentile': pct_90,
        '99th_percentile': pct_99,
        'residual_image_pathname': resid_paths,
        'residual_histogram_pathname': hist_paths,
        'residual_cumulative_pathname': chist_paths
    })

    # output
    write_dataframe(df,
                    'IMAGE-RESIDUALS',
                    image_group,
                    compression,
                    title='RESIDUALS-TABLE',
                    filter_opts=filter_opts)
Beispiel #5
0
def unpack_products(product_list, container, granule, h5group, outdir):
    """
    Unpack and package the NBAR and NBART products.
    """
    # listing of all datasets of IMAGE CLASS type
    img_paths = find(h5group, 'IMAGE')

    # relative paths of each dataset for ODC metadata doc
    rel_paths = {}

    # TODO pass products through from the scheduler rather than hard code
    for product in product_list:
        for pathname in [p for p in img_paths if '/{}/'.format(product) in p]:

            dataset = h5group[pathname]

            acqs = container.get_acquisitions(group=pathname.split('/')[0],
                                              granule=granule)
            acq = [
                a for a in acqs if a.band_name == dataset.attrs['band_name']
            ][0]

            base_fname = '{}.TIF'.format(splitext(basename(acq.uri))[0])
            match_dict = PATTERN1.match(base_fname).groupdict()
            fname = '{}{}_{}{}'.format(match_dict.get('prefix'), product,
                                       match_dict.get('band_name'),
                                       match_dict.get('extension'))
            rel_path = pjoin(product, re.sub(PATTERN2, ARD, fname))
            out_fname = pjoin(outdir, rel_path)

            _write_cogtif(dataset, out_fname)

            # alias name for ODC metadata doc
            alias = _clean(ALIAS_FMT[product].format(dataset.attrs['alias']))
            rel_paths[alias] = {'path': rel_path, 'layer': 1}

    # retrieve metadata
    scalar_paths = find(h5group, 'SCALAR')
    pathname = [pth for pth in scalar_paths if 'NBAR-METADATA' in pth][0]
    tags = yaml.load(h5group[pathname][()])
    return tags, rel_paths
Beispiel #6
0
def link_interpolated_data(data, out_fname):
    """
    Links the individual interpolated results into a
    single file for easier access.
    """
    for key in data:
        fname = data[key]
        with h5py.File(fname, 'r') as fid:
            dataset_names = find(fid, dataset_class='IMAGE')

        for dname in dataset_names:
            create_external_link(fname, dname, out_fname, dname)
Beispiel #7
0
def unpack_products(product_list, container, granule, h5group, outdir):
    """
    Unpack and package the NBAR and NBART products.
    """
    # listing of all datasets of IMAGE CLASS type
    img_paths = find(h5group, "IMAGE")

    # relative paths of each dataset for ODC metadata doc
    rel_paths = {}

    # TODO pass products through from the scheduler rather than hard code
    for product in product_list:
        for pathname in [p for p in img_paths if "/{}/".format(product) in p]:

            dataset = h5group[pathname]

            acqs = container.get_acquisitions(group=pathname.split("/")[0],
                                              granule=granule)
            acq = [
                a for a in acqs if a.band_name == dataset.attrs["band_name"]
            ][0]

            base_fname = "{}.TIF".format(splitext(basename(acq.uri))[0])
            match_dict = PATTERN1.match(base_fname).groupdict()
            fname = "{}{}_{}{}".format(
                match_dict.get("prefix"),
                product,
                match_dict.get("band_name"),
                match_dict.get("extension"),
            )
            rel_path = pjoin(product, re.sub(PATTERN2, ARD, fname))
            out_fname = pjoin(outdir, rel_path)

            _cogtif_args = get_cogtif_options(dataset, overviews=True)
            write_tif_from_dataset(dataset, out_fname, **_cogtif_args)

            # alias name for ODC metadata doc
            alias = _clean(ALIAS_FMT[product].format(dataset.attrs["alias"]))

            # Band Metadata
            rel_paths[alias] = get_img_dataset_info(dataset, rel_path)

    # retrieve metadata
    wagl_metadata = yaml.load(h5group[DatasetName.METADATA.value][
        DatasetName.CURRENT_METADATA.value][()])

    return wagl_metadata, rel_paths
Beispiel #8
0
def link_standard_data(input_fnames, out_fname):
    # TODO: incorporate linking for multi-granule and multi-group
    #       datasets
    """
    Links the individual reflectance and surface temperature
    results into a single file for easier access.
    """
    for fname in input_fnames:
        with h5py.File(fname, "r") as fid:
            dataset_names = find(fid, dataset_class="IMAGE")

        for dname in dataset_names:
            create_external_link(fname, dname, out_fname, dname)

        # metadata
        with h5py.File(fname, "r") as fid:
            with h5py.File(out_fname, "a") as out_fid:
                yaml_dname = DatasetName.NBAR_YAML.value
                if yaml_dname in fid and yaml_dname not in out_fid:
                    fid.copy(yaml_dname, out_fid, name=yaml_dname)

                yaml_dname = DatasetName.SBT_YAML.value
                if yaml_dname in fid and yaml_dname not in out_fid:
                    fid.copy(yaml_dname, out_fid, name=yaml_dname)
Beispiel #9
0
def mndwi(wagl_h5_file, granule, out_fname):
    """
    Computes the mndwi for a given granule in a wagl h5 file.

    Parameters
    ----------
    wagl_h5_file : str
        wagl-water-atcor generated h5 file

    granule : str
        Group path of the granule within the h5 file

    out_fname : str
        Output filename of the h5 file
    """

    # specify the reflectance products to use in generating mndwi
    products = ["LMBADJ"]

    # specify the resampling approach for the SWIR band
    resample_approach = Resampling.bilinear

    h5_fid = h5py.File(out_fname, "w")

    # find the granule index in the wagl_h5_file
    fid = h5py.File(wagl_h5_file, "r")
    granule_fid = fid[granule]
    paths = find(granule_fid, "IMAGE")

    # get platform name
    md = yaml.load(fid[granule + "/METADATA/CURRENT"][()],
                   Loader=yaml.FullLoader)
    platform_id = md["source_datasets"]["platform_id"]

    # store mndwi-based products into a group
    mndwi_grp = h5_fid.create_group("mndwi")

    for i, prod in enumerate(products):

        # search the h5 groups & get paths to the green and swir bands
        green_path, swir_path = get_mndwi_bands(granule, platform_id, prod,
                                                paths)

        green_ds = granule_fid[green_path]
        chunks = green_ds.chunks
        nRows, nCols = green_ds.shape
        geobox = GriddedGeoBox.from_dataset(green_ds)
        nodata = green_ds.attrs["no_data_value"]

        # create output h5 attributes
        desc = "MNDWI derived with {0} and {1} ({2} reflectances)".format(
            psplit(green_path)[-1],
            psplit(swir_path)[-1],
            prod,
        )

        attrs = {
            "crs_wkt": geobox.crs.ExportToWkt(),
            "geotransform": geobox.transform.to_gdal(),
            "no_data_value": nodata,
            "granule": granule,
            "description": desc,
            "platform": platform_id,
            "spatial_resolution": abs(geobox.transform.a),
        }

        if platform_id.startswith("SENTINEL_2"):
            # we need to upscale the swir band
            swir_ds = granule_fid[swir_path]
            swir_im = reproject_array_to_array(
                src_img=swir_ds[:],
                src_geobox=GriddedGeoBox.from_dataset(swir_ds),
                dst_geobox=geobox,
                src_nodata=swir_ds.attrs["no_data_value"],
                dst_nodata=nodata,
                resampling=resample_approach,
            )
            attrs["SWIR_resampling_method"] = resample_approach.name

        else:
            swir_im = granule_fid[swir_path][:]

        # ------------------------- #
        #  Compute mndwi via tiles  #
        #   and save tiles to h5    #
        # ------------------------- #
        tiles = generate_tiles(samples=nRows,
                               lines=nCols,
                               xtile=chunks[1],
                               ytile=chunks[0])

        # create mndwi dataset
        mndwi_ds = mndwi_grp.create_dataset(
            f"mndwi_image_{prod}",
            shape=(nRows, nCols),
            dtype="float32",
            compression="lzf",
            chunks=chunks,
            shuffle=True,
        )

        for tile in tiles:
            green_tile = green_ds[tile]
            swir_tile = swir_im[tile]
            mndwi_tile = compute_mndwi(green_tile, swir_tile)

            # perform masking
            mask = ((green_tile == nodata)
                    | (swir_tile == nodata)
                    | (~np.isfinite(mndwi_tile)))
            mndwi_tile[mask] = nodata

            mndwi_ds[tile] = mndwi_tile

        # add attrs to dataset
        attach_image_attributes(mndwi_ds, attrs)

    fid.close()
    h5_fid.close()
Beispiel #10
0
def package_non_standard(outdir, granule):
    """
    yaml creator for the ard pipeline.
    """

    outdir = Path(outdir) / granule.name
    indir = granule.wagl_hdf5.parent

    if indir.is_file():
        shutil.copy(indir, outdir)
    else:
        shutil.copytree(indir, outdir)

    wagl_h5 = outdir / str(granule.name + ".wagl.h5")
    dataset_doc = outdir / str(granule.name + ".yaml")
    boolean_h5 = Path(str(wagl_h5).replace("wagl.h5", "converted.datasets.h5"))
    fmask_img = outdir / str(granule.name + ".fmask.img")

    f = h5py.File(boolean_h5)

    with DatasetAssembler(metadata_path=dataset_doc,
                          naming_conventions="dea") as da:
        level1 = granule.source_level1_metadata
        da.add_source_dataset(level1,
                              auto_inherit_properties=True,
                              inherit_geometry=True)
        da.product_family = "ard"
        da.producer = "ga.gov.au"
        da.properties["odc:file_format"] = "HDF5"

        with h5py.File(wagl_h5, "r") as fid:
            img_paths = [ppjoin(fid.name, pth) for pth in find(fid, "IMAGE")]
            granule_group = fid[granule.name]

            try:
                wagl_path, *ancil_paths = [
                    pth for pth in find(granule_group, "SCALAR")
                    if "METADATA" in pth
                ]
            except ValueError:
                raise ValueError("No nbar metadata found in granule")

            [wagl_doc] = loads_yaml(granule_group[wagl_path][()])

            da.processed = get_path(wagl_doc,
                                    ("system_information", "time_processed"))

            platform = da.properties["eo:platform"]
            if platform == "sentinel-2a" or platform == "sentinel-2b":
                org_collection_number = 3
            else:
                org_collection_number = utils.get_collection_number(
                    platform, da.producer,
                    da.properties["landsat:collection_number"])

            da.dataset_version = f"{org_collection_number}.1.0"
            da.region_code = eodatasets3.wagl._extract_reference_code(
                da, granule.name)

            eodatasets3.wagl._read_gqa_doc(da, granule.gqa_doc)
            eodatasets3.wagl._read_fmask_doc(da, granule.fmask_doc)

            with rasterio.open(fmask_img) as ds:
                fmask_layer = "/{}/OA_FMASK/oa_fmask".format(granule.name)
                data = ds.read(1)
                fmask_ds = f.create_dataset(fmask_layer,
                                            data=data,
                                            compression="lzf",
                                            shuffle=True)
                fmask_ds.attrs["crs_wkt"] = ds.crs.wkt
                fmask_ds.attrs["geotransform"] = ds.transform.to_gdal()

                fmask_ds.attrs[
                    "description"] = "Converted from ERDAS Imagine format to HDF5 to work with the limitations of varied formats within ODC"  # noqa E501

                grid_spec = images.GridSpec(
                    shape=ds.shape,
                    transform=ds.transform,
                    crs=CRS.from_wkt(fmask_ds.attrs["crs_wkt"]),
                )

                measurement_name = "oa_fmask"

                pathname = str(outdir.joinpath(boolean_h5))

                no_data = fmask_ds.attrs.get("no_data_value")
                if no_data is None:
                    no_data = float("nan")

                da._measurements.record_image(
                    measurement_name,
                    grid_spec,
                    pathname,
                    fmask_ds[:],
                    layer="/{}".format(fmask_layer),
                    nodata=no_data,
                    expand_valid_data=False,
                )

            for pathname in img_paths:
                ds = fid[pathname]
                ds_path = Path(ds.name)

                # eodatasets internally uses this grid spec to group image datasets
                grid_spec = images.GridSpec(
                    shape=ds.shape,
                    transform=Affine.from_gdal(*ds.attrs["geotransform"]),
                    crs=CRS.from_wkt(ds.attrs["crs_wkt"]),
                )

                # product group name; lambertian, nbar, nbart, oa
                if "STANDARDISED-PRODUCTS" in str(ds_path):
                    product_group = ds_path.parent.name
                elif "INTERPOLATED-ATMOSPHERIC-COEFFICIENTS" in str(ds_path):
                    product_group = "oa_{}".format(ds_path.parent.name)
                else:
                    product_group = "oa"

                # spatial resolution group
                # used to separate measurements with the same name
                resolution_group = "rg{}".format(
                    ds_path.parts[2].split("-")[-1])

                measurement_name = ("_".join([
                    resolution_group,
                    product_group,
                    ds.attrs.get("alias", ds_path.name),
                ]).replace("-",
                           "_").lower())  # we don't wan't hyphens in odc land

                # include this band in defining the valid data bounds?
                include = True if "nbart" in measurement_name else False

                no_data = ds.attrs.get("no_data_value")
                if no_data is None:
                    no_data = float("nan")

                # if we are of type bool, we'll have to convert just for GDAL
                if ds.dtype.name == "bool":
                    pathname = str(outdir.joinpath(boolean_h5))
                    out_ds = f.create_dataset(
                        measurement_name,
                        data=np.uint8(ds[:]),
                        compression="lzf",
                        shuffle=True,
                        chunks=ds.chunks,
                    )

                    for k, v in ds.attrs.items():
                        out_ds.attrs[k] = v

                    da._measurements.record_image(
                        measurement_name,
                        grid_spec,
                        pathname,
                        out_ds[:],
                        layer="/{}".format(out_ds.name),
                        nodata=no_data,
                        expand_valid_data=include,
                    )
                else:
                    pathname = str(outdir.joinpath(wagl_h5))

                    # work around as note_measurement doesn't allow us to specify the gridspec
                    da._measurements.record_image(
                        measurement_name,
                        grid_spec,
                        pathname,
                        ds[:],
                        layer="/{}".format(ds.name),
                        nodata=no_data,
                        expand_valid_data=include,
                    )

        # the longest part here is generating the valid data bounds vector
        # landsat 7 post SLC-OFF can take a really long time
        return da.done()
Beispiel #11
0
def image_results(image_group, compression=H5CompressionFilter.LZF, filter_opts=None):
    """
    Combine the residual results of each IMAGE Dataset into a
    single TABLE Dataset.
    """
    # potentially could just use visit...
    img_paths = find(image_group, "IMAGE")

    min_ = []
    max_ = []
    percent = []
    pct_90 = []
    pct_99 = []
    resid_paths = []
    hist_paths = []
    chist_paths = []
    products = []
    name = []

    for pth in img_paths:
        hist_pth = pth.replace("RESIDUALS", "FREQUENCY-DISTRIBUTIONS")
        chist_pth = pth.replace("RESIDUALS", "CUMULATIVE-DISTRIBUTIONS")
        resid_paths.append(ppjoin(image_group.name, pth))
        hist_paths.append(ppjoin(image_group.name, hist_pth))
        chist_paths.append(ppjoin(image_group.name, chist_pth))

        dset = image_group[pth]
        min_.append(dset.attrs["min_residual"])
        max_.append(dset.attrs["max_residual"])
        percent.append(dset.attrs["percent_difference"])
        products.append(pbasename(dset.parent.name))
        name.append(pbasename(dset.name))

        dset = image_group[chist_pth]
        pct_90.append(dset.attrs["90th_percentile"])
        pct_99.append(dset.attrs["99th_percentile"])

    df = pandas.DataFrame(
        {
            "product": products,
            "dataset_name": name,
            "min_residual": min_,
            "max_residual": max_,
            "percent_difference": percent,
            "90th_percentile": pct_90,
            "99th_percentile": pct_99,
            "residual_image_pathname": resid_paths,
            "residual_histogram_pathname": hist_paths,
            "residual_cumulative_pathname": chist_paths,
        }
    )

    # output
    write_dataframe(
        df,
        "IMAGE-RESIDUALS",
        image_group,
        compression,
        title="RESIDUALS-TABLE",
        filter_opts=filter_opts,
    )