def find_matching_product(rules, doc): """:rtype: datacube.model.DatasetType""" matched = [rule for rule in rules if changes.contains(doc, rule['metadata'])] if not matched: raise BadMatch('No matching Product found for %s' % json.dumps(doc, indent=4)) if len(matched) > 1: raise BadMatch('Too many matching Products found for %s. Matched %s.' % ( doc.get('id', 'unidentified'), matched)) return matched[0]['type']
def match(doc): matched = [rule.product for rule in rules if changes.contains(doc, rule.signature)] if len(matched) == 1: return matched[0] doc_id = doc.get('id', '<missing id>') if len(matched) == 0: raise BadMatch('No matching Product found for dataset %s' % doc_id) else: raise BadMatch('Auto match failed, dataset %s matches several products:\n %s' % ( doc_id, ','.join(p.name for p in matched)))
def load_rules_from_file(filename, index): rules = next(read_documents(Path(filename)))[1] # TODO: verify schema for rule in rules: type_ = index.products.get_by_name(rule['type']) if not type_: _LOG.error('DatasetType %s does not exists', rule['type']) return if not changes.contains(type_.metadata_doc, rule['metadata']): _LOG.error('DatasetType %s can\'t be matched by its own rule', rule['type']) return rule['type'] = type_ return rules
def find_matching_product(rules, doc): """:rtype: datacube.model.DatasetType""" matched = [ rule for rule in rules if changes.contains(doc, rule['metadata']) ] if not matched: # provide user with information about the failure if len(rules) == 0: raise BadMatch('No rules provided.') elif len(rules) == 1: metadata = rules[0]['metadata'] relevant_doc = {k: v for k, v in doc.items() if k in metadata} raise BadMatch( 'Dataset metadata did not match product rules.' '\nDataset metadata:\n %s\n' '\nProduct metadata:\n %s\n' % (json.dumps( metadata, indent=4), json.dumps(relevant_doc, indent=4))) else: raise BadMatch('No matching Product found for %s' % json.dumps(doc, indent=4)) if len(matched) > 1: raise BadMatch('Too many matching Products found for %s. Matched %s.' % (doc.get('id', 'unidentified'), matched)) return matched[0]['type']
def matches(doc, rule): return changes.contains(doc, rule.signature)
def test_changes_contains(): assert contains("bob", "BOB") is True assert contains("bob", "BOB", case_sensitive=True) is False assert contains(1, 1) is True assert contains(1, {}) is False # same as above, but with None interpreted as {} assert contains(1, None) is False assert contains({}, 1) is False assert contains(None, 1) is False assert contains({}, {}) is True assert contains({}, None) is True # this one is arguable... assert contains(None, {}) is False assert contains(None, None) is True assert contains({"a": 1, "b": 2}, {"a": 1}) is True assert contains({"a": {"b": "BOB"}}, {"a": {"b": "bob"}}) is True assert (contains({"a": { "b": "BOB" }}, {"a": { "b": "bob" }}, case_sensitive=True) is False) assert contains("bob", "alice") is False assert contains({"a": 1}, {"a": 1, "b": 2}) is False assert contains({"a": {"b": 1}}, {"a": {}}) is True assert contains({"a": {"b": 1}}, {"a": None}) is True
def _match_product( dataset_doc: Dict, product_definitions: Dict[str, Dict] ) -> Tuple[Optional[Dict], List[ValidationMessage]]: """Match the given dataset to a product definition""" product = None # EO3 datasets often put the product name directly inside. specified_product_name = get_path(dataset_doc, ("product", "name"), default=None) specified_product_name = specified_product_name or get_path( dataset_doc, ("properties", "odc:product"), default=None) if specified_product_name and (specified_product_name in product_definitions): product = product_definitions[specified_product_name] matching_products = { name: definition for name, definition in product_definitions.items() if changes.contains(dataset_doc, definition["metadata"]) } # We we have nothing, give up! if (not matching_products) and (not product): # Find the product that most closely matches it, to helpfully show the differences! closest_product_name = None closest_differences = None for name, definition in product_definitions.items(): diffs = tuple( _get_product_mismatch_reasons(dataset_doc, definition)) if (closest_differences is None) or len(diffs) < len(closest_differences): closest_product_name = name closest_differences = diffs difference_hint = _differences_as_hint(closest_differences) return None, [ _error( "unknown_product", "Dataset does not match the given products", hint= f"Closest match is {closest_product_name}, with differences:" f"\n{difference_hint}", ) ] messages = [] if specified_product_name not in matching_products: if product: difference_hint = _differences_as_hint( _get_product_mismatch_reasons(dataset_doc, product)) messages.append( _info( "strange_product_claim", f"Dataset claims to be product {specified_product_name!r}, but doesn't match its fields", hint=f"{difference_hint}", )) else: messages.append( _info( "unknown_product_claim", f"Dataset claims to be product {specified_product_name!r}, but it wasn't supplied.", )) if len(matching_products) > 1: matching_names = ", ".join(matching_products.keys()) messages.append( _error( "product_match_clash", "Multiple products match the given dataset", hint=f"Maybe you need more fields in the 'metadata' section?\n" f"Claims to be a {specified_product_name!r}, and matches {matching_names!r}" if specified_product_name else f"Maybe you need more fields in the 'metadata' section?\n" f"Matches {matching_names!r}", )) # (We wont pick one from the bunch here. Maybe they already matched one above to use in continuing validation.) # Just like ODC, match rules will rule all. Even if their metadata has a "product_name" field. if len(matching_products) == 1: [product] = matching_products.values() return product, messages