Exemple #1
0
def munge_metadata(nci_dataset):
    del nci_dataset['image']['bands']['nbart_blue']
    del nci_dataset['image']['bands']['nbart_coastal_aerosol']
    del nci_dataset['image']['bands']['nbart_contiguity']
    del nci_dataset['image']['bands']['nbart_green']
    del nci_dataset['image']['bands']['nbart_nir_1']
    del nci_dataset['image']['bands']['nbart_nir_2']
    del nci_dataset['image']['bands']['nbart_red']
    del nci_dataset['image']['bands']['nbart_red_edge_1']
    del nci_dataset['image']['bands']['nbart_red_edge_2']
    del nci_dataset['image']['bands']['nbart_red_edge_3']
    del nci_dataset['image']['bands']['nbart_swir_2']
    del nci_dataset['image']['bands']['nbart_swir_3']
    del nci_dataset['lineage']
    nci_dataset['creation_dt'] = nci_dataset['extent']['center_dt']  # FIXME: WTF
    nci_dataset['product_type'] = 'S2MSIARD_NBAR'
    nci_dataset['original_id'] = nci_dataset['id']
    nci_dataset['software_versions'].update({
        's2_to_s3_rolling': {  # FIXME: Update
            'repo': 'https://github.com/GeoscienceAustralia/dea-airflow/',
            'version': '1.0.0'}
    })

    # Create a deterministic dataset ID based on these inputs
    nci_dataset['id'] = str(odc_uuid("s2_to_s3_rolling", "1.0.0", [nci_dataset['id']]))
    return nci_dataset
    def _deterministic_uuid(self, task, algorithm_version=None, **other_tags):
        if algorithm_version is None:
            transform_info = self._get_transform_info()
            algorithm_version = transform_info["version_major_minor"]
        if "dataset_version" not in other_tags:
            try:
                other_tags["dataset_version"] = task.settings.output.metadata[
                    "dataset_version"]
            except KeyError:
                _LOG.info(
                    "dataset_version not set and not used to generate deterministic uuid"
                )
        uuid = odc_uuid(
            algorithm=task.settings.specification.transform,
            algorithm_version=algorithm_version,
            sources=[task.dataset.id],
            **other_tags,
        )

        uuid_values = other_tags.copy()
        uuid_values["algorithm_version"] = algorithm_version
        uuid_values["dataset.id"] = task.dataset.id
        uuid_values["algorithm"] = task.settings.specification.transform

        return uuid, uuid_values
Exemple #3
0
    def __post_init__(self):
        self.short_time = self.time_range.short

        if self.uuid.int == 0:
            self.uuid = odc_uuid(self.product.name,
                                 self.product.version,
                                 sources=self._lineage(),
                                 time=self.short_time,
                                 tile=self.tile_index)
Exemple #4
0
def replace_metadata(yaml_file, _s3_bucket, s3_metadata_path):
    """
    Replace metadata with additional info
    :param yaml_file: metadata file in NCI
    :param _s3_bucket: name of s3 bucket
    :param s3_metadata_path: path of metadata file in s3
    """
    s3_resource = boto3.resource("s3").Bucket(_s3_bucket)

    with open(yaml_file) as config_file:
        temp_metadata = yaml.load(config_file, Loader=yaml.CSafeLoader)

    del temp_metadata['image']['bands']['nbart_blue']
    del temp_metadata['image']['bands']['nbart_coastal_aerosol']
    del temp_metadata['image']['bands']['nbart_contiguity']
    del temp_metadata['image']['bands']['nbart_green']
    del temp_metadata['image']['bands']['nbart_nir_1']
    del temp_metadata['image']['bands']['nbart_nir_2']
    del temp_metadata['image']['bands']['nbart_red']
    del temp_metadata['image']['bands']['nbart_red_edge_1']
    del temp_metadata['image']['bands']['nbart_red_edge_2']
    del temp_metadata['image']['bands']['nbart_red_edge_3']
    del temp_metadata['image']['bands']['nbart_swir_2']
    del temp_metadata['image']['bands']['nbart_swir_3']
    del temp_metadata['lineage']
    temp_metadata['creation_dt'] = temp_metadata['extent']['center_dt']
    temp_metadata['product_type'] = 'S2MSIARD_NBAR'
    temp_metadata['original_id'] = temp_metadata['id']
    temp_metadata['software_versions'].update({
        's2_to_s3_rolling': {
            'repo': 'https://github.com/GeoscienceAustralia/dea-airflow/',
            'version': '1.0.0'
        }
    })

    # Create dataset ID based on Kirill's magic
    temp_metadata['id'] = str(
        odc_uuid("s2_to_s3_rolling", "1.0.0", [temp_metadata['id']]))

    # Write to S3 directly
    s3_resource.Object(key=s3_metadata_path).put(Body=yaml.dump(
        temp_metadata, default_flow_style=False, Dumper=yaml.CSafeDumper))

    LOG.info("Finished uploaded metadata %s to %s", yaml_file,
             s3_metadata_path)
Exemple #5
0
def generate_yaml(path_or_url, cfg):
    src = rasterio.open(path_or_url)
    path = path_or_url.split("/")[-1]
    region_code = path.split(".")[0].split("-")[1]

    info = dict(
        uuid=odc_uuid(
            cfg.product,
            cfg.version,
            sources=[],
            period=cfg.period,
            region_code=region_code,
        ),
        epsg=src.meta["crs"].to_epsg(),
        region_code=region_code,
        shape=src.shape,
        transform=src.transform,
        path=path,
    )
    return tpl.render(cfg=cfg, **info)
Exemple #6
0
def deterministic_uuid(task, algorithm_version=None, **other_tags):
    if algorithm_version is None:
        transform_info = get_transform_info(task.settings.specification.transform)
        algorithm_version = transform_info['version_major_minor']
    if 'dataset_version' not in other_tags:
        try:
            other_tags['dataset_version'] = task.settings.output.metadata['dataset_version']
        except KeyError:
            _LOG.info('dataset_version not set and '
                      'not used to generate deterministic uuid')
    uuid = odc_uuid(algorithm=task.settings.specification.transform,
                    algorithm_version=algorithm_version,
                    sources=[task.dataset.id], **other_tags)

    uuid_values = other_tags.copy()
    uuid_values['algorithm_version'] = algorithm_version
    uuid_values['dataset.id'] = task.dataset.id
    uuid_values['algorithm'] = task.settings.specification.transform

    return uuid, uuid_values
Exemple #7
0
def replace_metadata(granule, s3_bucket, s3_metadata_path):
    s3 = boto3.resource("s3").Bucket(s3_bucket)

    yaml_file = "{nci_path}/{granule}/ARD-METADATA.yaml".format(
        nci_path=NCI_DIR,
        granule=granule
    )

    with open(yaml_file) as config_file:
        temp_metadata = yaml.load(config_file, Loader=yaml.CSafeLoader)

    del temp_metadata['image']['bands']['nbart_blue']
    del temp_metadata['image']['bands']['nbart_coastal_aerosol']
    del temp_metadata['image']['bands']['nbart_contiguity']
    del temp_metadata['image']['bands']['nbart_green']
    del temp_metadata['image']['bands']['nbart_nir_1']
    del temp_metadata['image']['bands']['nbart_nir_2']
    del temp_metadata['image']['bands']['nbart_red']
    del temp_metadata['image']['bands']['nbart_red_edge_1']
    del temp_metadata['image']['bands']['nbart_red_edge_2']
    del temp_metadata['image']['bands']['nbart_red_edge_3']
    del temp_metadata['image']['bands']['nbart_swir_2']
    del temp_metadata['image']['bands']['nbart_swir_3']
    del temp_metadata['lineage']
    temp_metadata['creation_dt'] = temp_metadata['extent']['center_dt']
    temp_metadata['product_type'] = 'S2MSIARD_NBAR'
    temp_metadata['original_id'] = temp_metadata['id']
    temp_metadata['software_versions'].append({
        's2_to_s3_rolling': {
            'repo': 'https://github.com/GeoscienceAustralia/dea-orchestration/',
            'version': '1.0.0'}
    })

    # Create dataset ID based on Kirill's magic
    temp_metadata['id'] = str(odc_uuid("s2_to_s3_rolling", "1.0.0", [temp_metadata['id']]))

    # Write to S3 directly
    s3.Object(key=s3_metadata_path).put(Body=yaml.dump(
        temp_metadata, default_flow_style=False, Dumper=yaml.CSafeDumper)
    )
Exemple #8
0
def stac_transform(input_stac: Document, relative: bool = True) -> Document:
    """Takes in a raw STAC 1.0 dictionary and returns an ODC dictionary"""

    product_label, product_name, region_code, default_grid = _stac_product_lookup(
        input_stac
    )

    # Generating UUID for products not having UUID.
    # Checking if provided id is valid UUID.
    # If not valid, creating new deterministic uuid using odc_uuid function based on product_name and product_label.
    # TODO: Verify if this approach to create UUID is valid.
    if _check_valid_uuid(input_stac["id"]):
        deterministic_uuid = input_stac["id"]
    else:
        if product_name in ["s2_l2a"]:
            deterministic_uuid = str(
                odc_uuid("sentinel-2_stac_process", "1.0.0", [product_label])
            )
        else:
            deterministic_uuid = str(
                odc_uuid(f"{product_name}_stac_process", "1.0.0", [product_label])
            )

    # TODO: handle old STAC that doesn't have grid information here...
    bands, grids = _get_stac_bands(input_stac, default_grid, relative=relative)

    stac_properties, lineage = _get_stac_properties_lineage(input_stac)

    properties = input_stac["properties"]
    epsg = properties["proj:epsg"]
    native_crs = f"epsg:{epsg}"

    # Transform geometry to the native CRS at an appropriate precision
    geometry = Geometry(input_stac["geometry"], "epsg:4326")
    if native_crs != "epsg:4326":
        # Arbitrary precisions, but should be fine
        pixel_size = get_in(["default", "transform", 0], grids)
        precision = 0
        if pixel_size < 0:
            precision = 6

        geometry = _geographic_to_projected(geometry, native_crs, precision)

    stac_odc = {
        "$schema": "https://schemas.opendatacube.org/dataset",
        "id": deterministic_uuid,
        "crs": native_crs,
        "grids": grids,
        "product": {"name": product_name.lower()},
        "label": product_label,
        "properties": stac_properties,
        "measurements": bands,
        "lineage": {},
    }

    if region_code:
        stac_odc["properties"]["odc:region_code"] = region_code

    if geometry:
        stac_odc["geometry"] = geometry.json

    if lineage:
        stac_odc["lineage"] = lineage

    return stac_odc
Exemple #9
0
def fuse_ds(ds_1: Dataset,
            ds_2: Dataset,
            product: Optional[DatasetType] = None) -> Dataset:
    """
    This function fuses two datasets. It requires that:
      - the products are fusable
      - grids with the same name are identical
      - labels are in the format 'product_suffix' with identical suffixes
      - CRSs' are identical
      - datetimes are identical
      - $schemas are identical 
    """

    doc_1, doc_2 = ds_1.metadata_doc, ds_2.metadata_doc

    if product is None:
        product = fuse_products(ds_1.type, ds_2.type)

    fused_doc = dict()

    fused_doc["id"] = str(
        odc_uuid(product.name, "0.0.0", sources=[doc_1["id"], doc_2["id"]]))
    fused_doc["lineage"] = {"source_datasets": [doc_1["id"], doc_2["id"]]}

    # check that all grids with the same name are identical
    common_grids = set(doc_1["grids"].keys()).intersection(
        doc_2["grids"].keys())
    assert all(doc_1["grids"][g] == doc_2["grids"][g] for g in common_grids)

    # TODO: handle the case that grids have conflicts in a seperate function
    fused_doc["grids"] = {**doc_1["grids"], **doc_2["grids"]}

    label_suffix = doc_1["label"].replace(doc_1["product"]["name"], "")
    assert label_suffix == doc_2["label"].replace(doc_2["product"]["name"], "")
    fused_doc["label"] = f"{product.name}{label_suffix}"

    equal_keys = ["$schema", "crs"]
    for key in equal_keys:
        assert doc_1[key] == doc_2[key]
        fused_doc[key] = doc_1[key]

    fused_doc["properties"] = dict()
    assert doc_1["properties"]["datetime"] == doc_2["properties"][
        "datetime"]  # datetime is the only manditory property

    # copy over all identical properties
    for key, val in doc_1["properties"].items():
        if val == doc_2["properties"].get(key, None):
            fused_doc["properties"][key] = val

    fused_doc["measurements"] = {
        **doc_1["measurements"],
        **doc_2["measurements"]
    }
    for key, path in {
            **measurement_paths(ds_1),
            **measurement_paths(ds_2)
    }.items():
        fused_doc["measurements"][key]["path"] = path

    fused_ds = Dataset(product, prep_eo3(fused_doc), uris=[""])
    return fused_ds