Beispiel #1
0
    def data_walk(self, data_node, tree_node, result, tile=None):
        my_tiles = []
        # print(data_node)
        # pre-seed as much of the cache as we can during the data-walk
        if "@id" in data_node and "@type" in data_node:
            dataType = data_node["@type"][0] if isinstance(
                data_node["@type"], list) else data_node["@type"]
            self.idcache[data_node["@id"]] = dataType
        for k, v in data_node.items():
            if k in ["@id", "@type"]:
                continue
            # always a list
            for vi in v:
                if "@value" in vi:
                    # We're a literal value
                    value = vi["@value"]
                    clss = vi.get(
                        "@type",
                        "http://www.w3.org/2000/01/rdf-schema#Literal")
                    uri = None
                    is_literal = True
                else:
                    # We're an entity
                    uri = vi.get("@id", "")
                    try:
                        clss = vi["@type"][0]
                    except:
                        # {"@id": "http://something/.../"}
                        # with no @type. This is typically an external concept URI reference to a resource instance
                        # Look for it in the children of current node or in the entire document itself (if it's a resource instance ref)
                        possible_cls = []
                        for tn in tree_node["children"]:
                            if tn.startswith(k):
                                possible_cls.append(tn.replace(k, "")[1:])
                        if len(possible_cls) == 1:
                            clss = possible_cls[0]
                        else:
                            try:
                                # this may be a reference to an entity already defined elsewhere in the json document
                                # this can happen when there are more than 1 reference to the same resource instance
                                clss = self.get_cached_reference(uri)
                                vi["@type"] = clss
                            except:
                                raise ValueError(
                                    f"Multiple possible branches and no @type given: {vi}"
                                )

                    value = None
                    is_literal = False

                # Find precomputed possible branches by prop/class combination
                key = f"{k} {clss}"
                if key in tree_node["datatype"].ignore_keys():
                    # these are handled by the datatype itself
                    continue
                elif not key in tree_node["children"] and is_literal:
                    # grumble grumble
                    # model has xsd:string, default is rdfs:Literal
                    key = f"{k} http://www.w3.org/2001/XMLSchema#string"
                    if not key in tree_node["children"]:
                        raise ValueError(
                            f"property/class combination does not exist in model: {k} {clss}\nWhile processing: {vi}"
                        )
                elif not key in tree_node["children"]:
                    raise ValueError(
                        f"property/class combination does not exist in model: {k} {clss}\nWhile processing: {vi}"
                    )

                options = tree_node["children"][key]
                possible = []
                ignore = []

                for o in options:
                    # print(f"Considering:\n  {vi}\n  {o['name']}")
                    if is_literal and o["datatype"].is_a_literal_in_rdf():
                        if len(o["datatype"].validate_from_rdf(value)) == 0:
                            possible.append([o, value])
                        else:
                            print(
                                f"Could not validate {value} as a {o['datatype']}"
                            )
                    elif not is_literal and not o[
                            "datatype"].is_a_literal_in_rdf():
                        if self.is_concept_node(uri):
                            collid = o["config"]["collection_id"]
                            try:
                                if self.validate_concept_in_collection(
                                        uri, collid):
                                    possible.append([o, uri])
                                else:
                                    print(
                                        f"Concept URI {uri} not in Collection {collid}"
                                    )
                            except:
                                print(
                                    f"Errored testing concept {uri} in collection {collid}"
                                )
                        elif self.is_semantic_node(o):
                            possible.append([o, ""])
                        elif o["datatype"].accepts_rdf_uri(uri):
                            # print(f"datatype for {o['name']} accepts uri")
                            possible.append([o, uri])
                        else:
                            # This is when the current option doesn't match, but could be
                            # non-ambiguous resource-instance vs semantic node
                            continue
                    else:
                        raise ValueError("No possible match?")

                # print(f"Possible is: {[x[0]['name'] for x in possible]}")

                if not possible:
                    # print(f"Tried: {options}")
                    raise ValueError(
                        f"Data does not match any actual node, despite prop/class combination {k} {clss}:\n{vi}"
                    )
                elif len(possible) > 1:
                    # descend into data to check if there are further clarifying features
                    possible2 = []
                    for p in possible:
                        try:
                            # Don't really create data, so pass anonymous result dict
                            self.data_walk(vi, p[0], {}, tile)
                            possible2.append(p)
                        except:
                            # Not an option
                            pass
                    if not possible2:
                        raise ValueError(
                            "Considering branches, data does not match any node, despite a prop/class combination"
                        )
                    elif len(possible2) > 1:
                        raise ValueError(
                            f"Even after considering branches, data still matches more than one node: {[x[0]['name'] for x in possible2]}"
                        )
                    else:
                        branch = possible2[0]
                else:
                    branch = possible[0]

                if not self.is_semantic_node(branch[0]):
                    graph_node = branch[0]
                    node_value = graph_node["datatype"].from_rdf(vi)
                    # node_value might be None if the validation of the datatype fails
                    # XXX Should we check this here, or raise in the datatype?

                    # For resource-instances, the datatype doesn't know the ontology prop config
                    if graph_node["datatype"].references_resource_type():
                        if "graphs" in branch[0]["config"]:
                            gs = branch[0]["config"]["graphs"]
                            if len(gs) == 1:
                                # just select it
                                if "ontologyProperty" in gs[0]:
                                    node_value[0]["ontologyProperty"] = gs[0][
                                        "ontologyProperty"]
                                if "inverseOntologyProperty" in gs[0]:
                                    node_value[0][
                                        "inverseOntologyProperty"] = gs[0][
                                            "inverseOntologyProperty"]
                            else:
                                for g in gs:
                                    # Now test current node's class against graph's class
                                    # This isn't a guarantee, but close enough
                                    if vi["@type"][0] == g["rootclass"]:
                                        if "ontologyProperty" in g:
                                            node_value[0][
                                                "ontologyProperty"] = g[
                                                    "ontologyProperty"]
                                        if "inverseOntologyProperty" in g:
                                            node_value[0][
                                                "inverseOntologyProperty"] = g[
                                                    "inverseOntologyProperty"]
                                        break
                else:
                    # Might get checked in a cardinality n branch that shouldn't be repeated
                    node_value = None

                # We know now that it can go into the branch
                # Determine if we can collapse the data into a -list or not
                bnodeid = branch[0]["node_id"]

                # This is going to be the result passed down if we recurse
                bnode = {
                    "data": [],
                    "nodegroup_id": branch[0]["nodegroup_id"],
                    "cardinality": branch[0]["cardinality"]
                }

                if branch[0]["datatype"].collects_multiple_values(
                ) and tile and str(
                        tile.nodegroup.pk) == branch[0]["nodegroup_id"]:
                    # iterating through a root node *-list type
                    pass
                elif bnodeid == branch[0]["nodegroup_id"]:
                    # Used to pick the previous tile in loop which MIGHT be the parent (but might not)
                    parenttile_id = result[
                        "tile"].tileid if "tile" in result else None
                    tile = Tile(
                        tileid=uuid.uuid4(),
                        resourceinstance_id=self.resource.pk,
                        parenttile_id=parenttile_id,
                        nodegroup_id=branch[0]["nodegroup_id"],
                        data={},
                    )
                    self.resource.tiles.append(tile)
                    my_tiles.append(tile)
                elif "tile" in result and result["tile"]:
                    tile = result["tile"]

                if not hasattr(tile, "_json_ld"):
                    tile._json_ld = vi

                bnode["tile"] = tile
                if bnodeid in result:
                    if branch[0]["datatype"].collects_multiple_values():
                        # append to previous tile
                        if type(node_value) != list:
                            node_value = [node_value]
                        bnode = result[bnodeid][0]
                        bnode["data"].append(branch[1])
                        if not self.is_semantic_node(branch[0]):
                            try:
                                n = bnode["tile"].data[bnodeid]
                            except:
                                n = []
                                bnode["tile"].data[bnodeid] = n
                            if type(n) != list:
                                bnode["tile"].data[bnodeid] = [n]
                            bnode["tile"].data[bnodeid].extend(node_value)
                    elif branch[0]["cardinality"] != "n":
                        bnode = result[bnodeid][0]
                        if node_value == bnode["tile"].data[bnodeid]:
                            # No-op, attempt to readd same value
                            pass
                        else:
                            raise ValueError(
                                f"Attempt to add a value to cardinality 1, non-list node {k} {clss}:\n {vi}"
                            )
                    else:
                        bnode["data"].append(branch[1])
                        if not self.is_semantic_node(branch[0]):
                            # print(f"Adding to existing (n): {node_value}")
                            tile.data[bnodeid] = node_value
                        result[bnodeid].append(bnode)
                else:
                    if not self.is_semantic_node(branch[0]):
                        tile.data[bnodeid] = node_value
                    bnode["data"].append(branch[1])
                    result[bnodeid] = [bnode]

                if not is_literal:
                    # Walk down non-literal branches in the data
                    self.data_walk(vi, branch[0], bnode, tile)

        if self.shouldSortTiles:
            sortfuncs = settings.JSON_LD_SORT_FUNCTIONS
            if my_tiles:
                tile_ng_hash = {}
                for t in my_tiles:
                    try:
                        tile_ng_hash[t.nodegroup_id].append(t)
                    except KeyError:
                        tile_ng_hash[t.nodegroup_id] = [t]
                for (k, v) in tile_ng_hash.items():
                    if len(v) > 1:
                        for func in sortfuncs:
                            v.sort(key=func)
                        for t, i in zip(v, range(len(v))):
                            t.sortorder = i

        # Finally, after processing all of the branches for this node, check required nodes are present
        for path in tree_node["children"].values():
            for kid in path:
                if kid["required"] and not f"{kid['node_id']}" in result:
                    raise ValueError(
                        f"Required field not present: {kid['name']}")