Exemple #1
0
def find_matching_product(rules, doc):
    """:rtype: datacube.model.DatasetType"""
    matched = [rule for rule in rules if changes.contains(doc, rule['metadata'])]
    if not matched:
        raise BadMatch('No matching Product found for %s' % json.dumps(doc, indent=4))
    if len(matched) > 1:
        raise BadMatch('Too many matching Products found for %s. Matched %s.' % (
            doc.get('id', 'unidentified'), matched))
    return matched[0]['type']
Exemple #2
0
    def match(doc):
        matched = [rule.product for rule in rules if changes.contains(doc, rule.signature)]

        if len(matched) == 1:
            return matched[0]

        doc_id = doc.get('id', '<missing id>')

        if len(matched) == 0:
            raise BadMatch('No matching Product found for dataset %s' % doc_id)
        else:
            raise BadMatch('Auto match failed, dataset %s matches several products:\n  %s' % (
                doc_id,
                ','.join(p.name for p in matched)))
Exemple #3
0
def load_rules_from_file(filename, index):
    rules = next(read_documents(Path(filename)))[1]
    # TODO: verify schema

    for rule in rules:
        type_ = index.products.get_by_name(rule['type'])
        if not type_:
            _LOG.error('DatasetType %s does not exists', rule['type'])
            return
        if not changes.contains(type_.metadata_doc, rule['metadata']):
            _LOG.error('DatasetType %s can\'t be matched by its own rule', rule['type'])
            return
        rule['type'] = type_

    return rules
Exemple #4
0
def find_matching_product(rules, doc):
    """:rtype: datacube.model.DatasetType"""
    matched = [
        rule for rule in rules if changes.contains(doc, rule['metadata'])
    ]
    if not matched:
        # provide user with information about the failure
        if len(rules) == 0:
            raise BadMatch('No rules provided.')
        elif len(rules) == 1:
            metadata = rules[0]['metadata']
            relevant_doc = {k: v for k, v in doc.items() if k in metadata}
            raise BadMatch(
                'Dataset metadata did not match product rules.'
                '\nDataset metadata:\n %s\n'
                '\nProduct metadata:\n %s\n' % (json.dumps(
                    metadata, indent=4), json.dumps(relevant_doc, indent=4)))
        else:
            raise BadMatch('No matching Product found for %s' %
                           json.dumps(doc, indent=4))
    if len(matched) > 1:
        raise BadMatch('Too many matching Products found for %s. Matched %s.' %
                       (doc.get('id', 'unidentified'), matched))
    return matched[0]['type']
Exemple #5
0
 def matches(doc, rule):
     return changes.contains(doc, rule.signature)
Exemple #6
0
def test_changes_contains():
    assert contains("bob", "BOB") is True
    assert contains("bob", "BOB", case_sensitive=True) is False
    assert contains(1, 1) is True
    assert contains(1, {}) is False
    # same as above, but with None interpreted as {}
    assert contains(1, None) is False
    assert contains({}, 1) is False
    assert contains(None, 1) is False
    assert contains({}, {}) is True
    assert contains({}, None) is True

    # this one is arguable...
    assert contains(None, {}) is False
    assert contains(None, None) is True
    assert contains({"a": 1, "b": 2}, {"a": 1}) is True
    assert contains({"a": {"b": "BOB"}}, {"a": {"b": "bob"}}) is True
    assert (contains({"a": {
        "b": "BOB"
    }}, {"a": {
        "b": "bob"
    }},
                     case_sensitive=True) is False)
    assert contains("bob", "alice") is False
    assert contains({"a": 1}, {"a": 1, "b": 2}) is False
    assert contains({"a": {"b": 1}}, {"a": {}}) is True
    assert contains({"a": {"b": 1}}, {"a": None}) is True
Exemple #7
0
def _match_product(
    dataset_doc: Dict, product_definitions: Dict[str, Dict]
) -> Tuple[Optional[Dict], List[ValidationMessage]]:
    """Match the given dataset to a product definition"""

    product = None

    # EO3 datasets often put the product name directly inside.
    specified_product_name = get_path(dataset_doc, ("product", "name"),
                                      default=None)
    specified_product_name = specified_product_name or get_path(
        dataset_doc, ("properties", "odc:product"), default=None)

    if specified_product_name and (specified_product_name
                                   in product_definitions):
        product = product_definitions[specified_product_name]

    matching_products = {
        name: definition
        for name, definition in product_definitions.items()
        if changes.contains(dataset_doc, definition["metadata"])
    }

    # We we have nothing, give up!
    if (not matching_products) and (not product):

        # Find the product that most closely matches it, to helpfully show the differences!
        closest_product_name = None
        closest_differences = None
        for name, definition in product_definitions.items():
            diffs = tuple(
                _get_product_mismatch_reasons(dataset_doc, definition))
            if (closest_differences is
                    None) or len(diffs) < len(closest_differences):
                closest_product_name = name
                closest_differences = diffs

        difference_hint = _differences_as_hint(closest_differences)
        return None, [
            _error(
                "unknown_product",
                "Dataset does not match the given products",
                hint=
                f"Closest match is {closest_product_name}, with differences:"
                f"\n{difference_hint}",
            )
        ]

    messages = []

    if specified_product_name not in matching_products:
        if product:
            difference_hint = _differences_as_hint(
                _get_product_mismatch_reasons(dataset_doc, product))
            messages.append(
                _info(
                    "strange_product_claim",
                    f"Dataset claims to be product {specified_product_name!r}, but doesn't match its fields",
                    hint=f"{difference_hint}",
                ))
        else:
            messages.append(
                _info(
                    "unknown_product_claim",
                    f"Dataset claims to be product {specified_product_name!r}, but it wasn't supplied.",
                ))

    if len(matching_products) > 1:
        matching_names = ", ".join(matching_products.keys())
        messages.append(
            _error(
                "product_match_clash",
                "Multiple products match the given dataset",
                hint=f"Maybe you need more fields in the 'metadata' section?\n"
                f"Claims to be a {specified_product_name!r}, and matches {matching_names!r}"
                if specified_product_name else
                f"Maybe you need more fields in the 'metadata' section?\n"
                f"Matches {matching_names!r}",
            ))
        # (We wont pick one from the bunch here. Maybe they already matched one above to use in continuing validation.)

    # Just like ODC, match rules will rule all. Even if their metadata has a "product_name" field.
    if len(matching_products) == 1:
        [product] = matching_products.values()

    return product, messages