Ejemplo n.º 1
0
def test_add_source_dataset(tmp_path: Path, inherit_geom):
    from eodatasets3 import serialise

    p = DatasetAssembler(tmp_path, naming_conventions="dea_c3")
    source_dataset = serialise.from_path(
        Path(__file__).parent /
        "data/LC08_L1TP_089080_20160302_20170328_01_T1.yaml")
    p.add_source_dataset(source_dataset,
                         auto_inherit_properties=True,
                         inherit_geometry=inherit_geom)

    p.maturity = "interim"
    p.collection_number = "3"
    p.dataset_version = "1.6.0"
    p.producer = "ga.gov.au"
    p.processed = "1998-07-30T12:23:23"
    p.product_family = "wofs"
    p.write_measurement(
        "water",
        Path(__file__).parent /
        "data/wofs/ga_ls_wofs_3_099081_2020-07-26_interim_water_clipped.tif",
    )

    id, path = p.done()

    output = serialise.from_path(path)
    if inherit_geom:
        # POLYGON((609615 - 3077085, 378285 - 3077085, 378285 - 3310515, 609615 - 3310515, 609615 - 3077085))
        assert output.geometry == source_dataset.geometry
    else:
        # POLYGON((684285 - 3439275, 684285 - 3444495, 689925 - 3444495, 689925 - 3439275, 684285 - 3439275))
        # Geometry is not set from the source dataset, but instead from the added wofs measurement
        assert output.geometry != source_dataset.geometry
Ejemplo n.º 2
0
def assert_expected_eo3_path(
        expected_doc: Dict,
        expected_path: Path,
        ignore_fields=(),
):
    """
    Check an output path of an EO3 dataset matches an expected document.

    This is slightly smarter about doing geometry equality etc within the document.
    """
    __tracebackhide__ = operator.methodcaller("errisinstance", AssertionError)
    assert (expected_path.exists()
            ), f"Expected output EO3 path doesn't exist: {expected_path}"
    assert_same_as_file(
        expected_doc,
        expected_path,
        # We check the geometry below
        ignore_fields=("geometry", ) + tuple(ignore_fields),
    )

    if "geometry" not in ignore_fields:
        # Compare geometry after parsing, rather than comparing the raw dict values.
        produced_dataset = serialise.from_path(expected_path)
        expected_dataset = serialise.from_doc(expected_doc,
                                              skip_validation=True)
        if expected_dataset.geometry is None:
            assert produced_dataset.geometry is None
        else:
            assert_shapes_mostly_equal(produced_dataset.geometry,
                                       expected_dataset.geometry, 0.00000001)
Ejemplo n.º 3
0
def _load_level1_doc(
    wagl_doc: Dict,
    user_specified_l1_path: Optional[Path] = None,
    allow_missing_provenance=False,
):

    if user_specified_l1_path:
        if not user_specified_l1_path.exists():
            raise ValueError(
                f"No level1 metadata found at given path {user_specified_l1_path}"
            )
        level1_path = user_specified_l1_path
    else:
        level1_path = Path(
            get_path(wagl_doc, ("source_datasets", "source_level1")))

    # If a directory, assume "<dirname>.odc-metadata.yaml"
    if level1_path.is_dir():
        metadata_path = level1_path / (level1_path.name + ".odc-metadata.yaml")
    # Otherwise it's a sibling file with ".odc-metadata.yaml" suffix
    else:
        if level1_path.suffix.lower() == ".yaml":
            metadata_path = level1_path
        else:
            metadata_path = level1_path.with_suffix(".odc-metadata.yaml")

    if not metadata_path.exists():
        if not allow_missing_provenance:
            raise ValueError(
                "No level1 found or provided. "
                f"WAGL said it was at path {str(level1_path)!r}. "
                "Which has no metadata doc we can find, and you didn't specify an alternative. "
                f"(allow_missing_provenance={allow_missing_provenance})")
        return None
    return serialise.from_path(metadata_path)
Ejemplo n.º 4
0
def run(
    verbose: bool,
    odc_metadata_files: Iterable[Path],
    stac_base_url: str,
    explorer_base_url: str,
    validate: bool,
):
    for input_metadata in odc_metadata_files:
        dataset = serialise.from_path(input_metadata)

        name = input_metadata.stem.replace(".odc-metadata", "")
        output_path = input_metadata.with_name(f"{name}.stac-item.json")

        # Create STAC dict
        item_doc = dc_to_stac(
            dataset,
            input_metadata,
            output_path,
            stac_base_url,
            explorer_base_url,
            do_validate=False,
        )

        if validate:
            eo3stac.validate_item(item_doc,
                                  log=echo if verbose else lambda line: None)

        with output_path.open("w") as f:
            json.dump(item_doc, f, indent=4, default=json_fallback)

        if verbose:
            echo(f'Wrote {style(output_path.as_posix(), "green")}')
Ejemplo n.º 5
0
def run(odc_metadata_files: Iterable[Path]):
    for input_metadata in odc_metadata_files:
        dataset = serialise.from_path(input_metadata)

        project = partial(
            pyproj.transform,
            pyproj.Proj(init=dataset.crs),
            pyproj.Proj(init="epsg:4326"),
        )
        wgs84_geometry: BaseGeometry = transform(project, dataset.geometry)
        item_doc = dict(
            id=dataset.id,
            type="Feature",
            bbox=wgs84_geometry.bounds,
            geometry=wgs84_geometry.__geo_interface__,
            properties={
                **dataset.properties, "odc:product": dataset.product.name
            },
            assets={
                # TODO: Currently assuming no name collisions.
                **{
                    name: {
                        "href": m.path
                    }
                    for name, m in dataset.measurements.items()
                },
                **{
                    name: {
                        "href": m.path
                    }
                    for name, m in dataset.accessories.items()
                },
            },
            links=[
                # {
                #     "rel": "self",
                #     "href": '?',
                # },
                {
                    "rel": "odc_product",
                    "href": dataset.product.href
                },
                {
                    "rel": "alternative",
                    "type": "text/html",
                    "href":
                    f"https://explorer.dea.ga.gov.au/dataset/{dataset.id}",
                },
            ],
        )

        name = input_metadata.stem.replace(".odc-metadata", "")
        output_path = input_metadata.with_name(f"{name}.stac-item.json")

        with output_path.open("w") as f:
            json.dump(item_doc, f, indent=4, default=json_fallback)

        echo(output_path)
Ejemplo n.º 6
0
def test_tostac_no_grids(odc_dataset_path: Path, expected_stac_doc: Dict):
    """
    Converted EO1 datasets don't have grid information. Make sure it still outputs
    without falling over.
    """

    # Remove grids from the input....
    dataset = serialise.from_path(odc_dataset_path)
    dataset.grids = None
    serialise.to_path(odc_dataset_path, dataset)

    run_tostac(odc_dataset_path)
    expected_output_path = odc_dataset_path.with_name(
        odc_dataset_path.name.replace(".odc-metadata.yaml", ".stac-item.json"))

    # No longer expect proj  fields (they come from grids).
    remove_stac_properties(expected_stac_doc,
                           ("proj:shape", "proj:transform", "proj:epsg"))
    # But we do still expect a global CRS.
    expected_stac_doc["properties"]["proj:epsg"] = 32656

    output_doc = json.load(expected_output_path.open())
    assert_same(expected_stac_doc, output_doc)
Ejemplo n.º 7
0
def _write_stac(
    metadata_path: Path,
    task: AlchemistTask,
    dataset_assembler: DatasetAssembler,
):
    out_dataset = serialise.from_path(metadata_path)
    stac_path = Path(str(metadata_path).replace("odc-metadata.yaml", "stac-item.json"))
    # Madness in deferred destination logic
    uri_base = dataset_assembler.names.destination_folder(
        Path(task.settings.output.location)
    )
    uri_base = str(uri_base) + "/"

    stac = dc_to_stac(
        out_dataset,
        metadata_path,
        stac_path,
        uri_base.replace("s3:/", "s3://"),
        task.settings.output.explorer_url,
        False,
    )

    with stac_path.open("w") as f:
        json.dump(stac, f, default=json_fallback)
    dataset_assembler.add_accessory_file("metadata:stac", stac_path)

    # dataset_assembler._checksum.write(dataset_assembler._accessories["checksum:sha1"])
    # Need a new checksummer because EODatasets is insane
    checksummer = PackageChecksum()
    checksum_file = (
        dataset_assembler._dataset_location
        / dataset_assembler._accessories["checksum:sha1"].name
    )
    checksummer.read(checksum_file)
    checksummer.add_file(stac_path)
    checksummer.write(checksum_file)
    return stac
Ejemplo n.º 8
0
def check_prepare_outputs(
    invoke_script,
    run_args,
    expected_doc: Dict,
    expected_metadata_path: Path,
    ignore_fields=(),
):
    __tracebackhide__ = operator.methodcaller("errisinstance", AssertionError)
    run_prepare_cli(invoke_script, *run_args)

    assert expected_metadata_path.exists()
    assert_same_as_file(
        expected_doc,
        expected_metadata_path,
        # We check the geometry below
        ignore_fields=("geometry",) + tuple(ignore_fields),
    )

    # Compare geometry after parsing, rather than comparing the raw dict values.
    produced_dataset = serialise.from_path(expected_metadata_path)
    expected_dataset = serialise.from_doc(expected_doc, skip_validation=True)
    assert_shapes_mostly_equal(
        produced_dataset.geometry, expected_dataset.geometry, 0.00000001
    )
Ejemplo n.º 9
0
    def for_path(
        cls,
        wagl_hdf5: Path,
        granule_names: Optional[Sequence[str]] = None,
        level1_metadata_path: Optional[Path] = None,
        fmask_image_path: Optional[Path] = None,
        fmask_doc_path: Optional[Path] = None,
        gqa_doc_path: Optional[Path] = None,
    ):
        """
        Create granules by scanning the given hdf5 file.

        Optionally specify additional files and level1 path.

        If they are not specified it look for them using WAGL's output naming conventions.
        """
        if not wagl_hdf5.exists():
            raise ValueError(f"Input hdf5 doesn't exist {wagl_hdf5}")

        with h5py.File(wagl_hdf5, "r") as fid:
            granule_names = granule_names or fid.keys()

            for granule_name in granule_names:
                if granule_name not in fid:
                    raise ValueError(
                        f"Granule {granule_name!r} not found in file {wagl_hdf5}"
                    )

                wagl_doc_field = get_path(
                    fid, (granule_name, "METADATA", "CURRENT"))
                if not wagl_doc_field:
                    raise ValueError(
                        f"Granule contains no wagl metadata: {granule_name} in {wagl_hdf5}"
                    )

                [wagl_doc] = loads_yaml(wagl_doc_field[()])

                if not level1_metadata_path:
                    level1_tar_path = Path(
                        get_path(wagl_doc,
                                 ("source_datasets", "source_level1")))
                    level1_metadata_path = level1_tar_path.with_suffix(
                        ".odc-metadata.yaml")
                if not level1_metadata_path.exists():
                    raise ValueError(
                        f"No level1 metadata found at {level1_metadata_path}")

                level1 = serialise.from_path(level1_metadata_path)

                fmask_image_path = fmask_image_path or wagl_hdf5.with_name(
                    f"{granule_name}.fmask.img")
                if not fmask_image_path.exists():
                    raise ValueError(
                        f"No fmask image found at {fmask_image_path}")

                fmask_doc_path = fmask_doc_path or fmask_image_path.with_suffix(
                    ".yaml")
                if not fmask_doc_path.exists():
                    raise ValueError(f"No fmask found at {fmask_doc_path}")
                with fmask_doc_path.open("r") as fl:
                    [fmask_doc] = loads_yaml(fl)

                gqa_doc_path = gqa_doc_path or wagl_hdf5.with_name(
                    f"{granule_name}.gqa.yaml")
                if not gqa_doc_path.exists():
                    raise ValueError(f"No gqa found at {gqa_doc_path}")
                with gqa_doc_path.open("r") as fl:
                    [gqa_doc] = loads_yaml(fl)

                yield cls(
                    name=granule_name,
                    wagl_hdf5=wagl_hdf5,
                    wagl_metadata=wagl_doc,
                    source_level1_metadata=level1,
                    fmask_doc=fmask_doc,
                    fmask_image=fmask_image_path,
                    gqa_doc=gqa_doc,
                )
Ejemplo n.º 10
0
    def for_path(
        cls,
        wagl_hdf5: Path,
        granule_names: Optional[Sequence[str]] = None,
        level1_metadata_path: Optional[Path] = None,
        fmask_image_path: Optional[Path] = None,
        fmask_doc_path: Optional[Path] = None,
        gqa_doc_path: Optional[Path] = None,
        tesp_doc_path: Optional[Path] = None,
        allow_missing_provenance: bool = False,
    ):
        """
        Create granules by scanning the given hdf5 file.

        Optionally specify additional files and level1 path.

        If they are not specified it look for them using WAGL's output naming conventions.
        :param allow_missing_provenance:
        """
        if not wagl_hdf5.exists():
            raise ValueError(f"Input hdf5 doesn't exist {wagl_hdf5}")

        with h5py.File(wagl_hdf5, "r") as fid:
            granule_names = granule_names or fid.keys()

            for granule_name in granule_names:
                if granule_name not in fid:
                    raise ValueError(
                        f"Granule {granule_name!r} not found in file {wagl_hdf5}"
                    )

                wagl_doc_field = get_path(
                    fid, (granule_name, "METADATA", "CURRENT"))
                if not wagl_doc_field:
                    raise ValueError(
                        f"Granule contains no wagl metadata: {granule_name} in {wagl_hdf5}"
                    )

                [wagl_doc] = loads_yaml(wagl_doc_field[()])

                if not level1_metadata_path:
                    level1_metadata_path = _get_level1_metadata_path(wagl_doc)
                if level1_metadata_path and not level1_metadata_path.exists():
                    raise ValueError(
                        f"No level1 metadata found at {level1_metadata_path}")

                level1 = (serialise.from_path(level1_metadata_path)
                          if level1_metadata_path else None)
                if (not level1_metadata_path) and (
                        not allow_missing_provenance):
                    raise ValueError(
                        "No level1 found or provided. "
                        f"WAGL said it was at path {str(level1_metadata_path)!r}. "
                        "It's not, and you didn't specify an alternative. "
                        f"(allow_missing_provenance={allow_missing_provenance})"
                    )

                fmask_image_path = fmask_image_path or wagl_hdf5.with_name(
                    f"{granule_name}.fmask.img")
                if not fmask_image_path.exists():
                    raise ValueError(
                        f"No fmask image found at {fmask_image_path}")

                fmask_doc_path = fmask_doc_path or fmask_image_path.with_suffix(
                    ".yaml")
                if not fmask_doc_path.exists():
                    raise ValueError(f"No fmask found at {fmask_doc_path}")
                with fmask_doc_path.open("r") as fl:
                    [fmask_doc] = loads_yaml(fl)

                gqa_doc_path = gqa_doc_path or wagl_hdf5.with_name(
                    f"{granule_name}.gqa.yaml")
                if not gqa_doc_path.exists():
                    raise ValueError(f"No gqa found at {gqa_doc_path}")
                with gqa_doc_path.open("r") as fl:
                    [gqa_doc] = loads_yaml(fl)

                # Optional doc
                if tesp_doc_path:
                    # But if they gave us a path, we're strict about it existing.
                    if not tesp_doc_path.exists():
                        raise ValueError(
                            f"Supplied tesp doc path doesn't exist: {tesp_doc_path}"
                        )
                else:
                    tesp_doc_path = wagl_hdf5.with_name(
                        f"{granule_name}.tesp.yaml")
                if tesp_doc_path.exists():
                    with tesp_doc_path.open("r") as fl:
                        [tesp_doc] = loads_yaml(fl)

                yield cls(
                    name=granule_name,
                    wagl_hdf5=wagl_hdf5,
                    wagl_metadata=wagl_doc,
                    source_level1_metadata=level1,
                    fmask_doc=fmask_doc,
                    fmask_image=fmask_image_path,
                    gqa_doc=gqa_doc,
                    tesp_doc=tesp_doc,
                )