def _validate_stac_properties(dataset: DatasetDoc): for name, value in dataset.properties.items(): if name not in dataset.properties.KNOWN_PROPERTIES: yield _warning("unknown_property", f"Unknown stac property {name!r}") else: normaliser = dataset.properties.KNOWN_PROPERTIES.get(name) if normaliser and value is not None: try: normalised_value = normaliser(value) # A normaliser can return two values, the latter adding extra extracted fields. if isinstance(normalised_value, tuple): normalised_value = normalised_value[0] # It's okay for datetimes to be strings # .. since ODC's own loader does that. if isinstance(normalised_value, datetime) and isinstance( value, str): value = ciso8601.parse_datetime(value) # Special case for dates, as "no timezone" and "utc timezone" are treated identical. if isinstance(value, datetime): value = default_utc(value) if not isinstance(value, type(normalised_value)): yield _warning( "property_type", f"Value {value} expected to be " f"{type(normalised_value).__name__!r} (got {type(value).__name__!r})", ) elif normalised_value != value: if _is_nan(normalised_value) and _is_nan(value): # Both are NaNs, ignore. pass else: yield _warning( "property_formatting", f"Property {value!r} expected to be {normalised_value!r}", ) except ValueError as e: yield _error("invalid_property", f"{name!r}: {e.args[0]}") if "odc:producer" in dataset.properties: producer = dataset.properties["odc:producer"] # We use domain name to avoid arguing about naming conventions ('ga' vs 'geoscience-australia' vs ...) if "." not in producer: yield _warning( "producer_domain", "Property 'odc:producer' should be the organisation's domain name. Eg. 'ga.gov.au'", ) # This field is a little odd, but is expected by the current version of ODC. # (from discussion with Kirill) if not dataset.properties.get("odc:file_format"): yield _warning( "global_file_format", "Property 'odc:file_format' is empty", hint="Usually 'GeoTIFF'", )
def _normalise_datetime_props(generated_doc): properties = generated_doc.get("properties", {}) for key in properties: if "datetime" in key: # If string value, make it explicitly iso format with timezone. val = properties[key] if isinstance(val, str): properties[key] = default_utc( ciso8601.parse_datetime(val)).isoformat()
def datetime_type(value): # Ruamel's TimeZone class can become invalid from the .replace(utc) call. # (I think it no longer matches the internal ._yaml fields.) # Convert to a regular datetime. if isinstance(value, RuamelTimeStamp): value = value.isoformat() if isinstance(value, str): value = ciso8601.parse_datetime(value) # Store all dates with a timezone. # yaml standard says all dates default to UTC. # (and ruamel normalises timezones to UTC itself) return default_utc(value)
def _determine_maturity(acq_date: datetime, processed: datetime, wagl_doc: Dict): """ Determine maturity field of a dataset. Based on the fallback logic in nbar pages of CMI, eg: https://cmi.ga.gov.au/ga_ls5t_nbart_3 """ ancillary_tiers = { key.lower(): o["tier"] for key, o in wagl_doc["ancillary"].items() if "tier" in o } if "water_vapour" not in ancillary_tiers: # Perhaps this should be a warning, but I'm being strict until told otherwise. # (a warning is easy to ignore) raise ValueError( f"No water vapour ancillary tier. Got {list(ancillary_tiers.keys())!r}" ) water_vapour_is_definitive = ancillary_tiers["water_vapour"].lower( ) == "definitive" if (processed - acq_date) < timedelta(hours=48): return "nrt" if not water_vapour_is_definitive: return "interim" # For accurate BRDF, both Aqua and Terra need to be operating. # Aqua launched May 2002, and we add a ~2 month buffer of operation. if acq_date < default_utc(datetime(2002, 7, 1)): return "final" if "brdf" not in ancillary_tiers: # Perhaps this should be a warning, but I'm being strict until told otherwise. # (a warning is easy to ignore) raise ValueError( f"No brdf tier available. Got {list(ancillary_tiers.keys())!r}") brdf_tier = ancillary_tiers["brdf"].lower() if "definitive" in brdf_tier: return "final" elif "fallback" in brdf_tier: return "interim" else: # This value should not occur for production data, only for experiments return "user"