def data_walk(self, data_node, tree_node, result, tile=None): my_tiles = [] # print(data_node) # pre-seed as much of the cache as we can during the data-walk if "@id" in data_node and "@type" in data_node: dataType = data_node["@type"][0] if isinstance( data_node["@type"], list) else data_node["@type"] self.idcache[data_node["@id"]] = dataType for k, v in data_node.items(): if k in ["@id", "@type"]: continue # always a list for vi in v: if "@value" in vi: # We're a literal value value = vi["@value"] clss = vi.get( "@type", "http://www.w3.org/2000/01/rdf-schema#Literal") uri = None is_literal = True else: # We're an entity uri = vi.get("@id", "") try: clss = vi["@type"][0] except: # {"@id": "http://something/.../"} # with no @type. This is typically an external concept URI reference to a resource instance # Look for it in the children of current node or in the entire document itself (if it's a resource instance ref) possible_cls = [] for tn in tree_node["children"]: if tn.startswith(k): possible_cls.append(tn.replace(k, "")[1:]) if len(possible_cls) == 1: clss = possible_cls[0] else: try: # this may be a reference to an entity already defined elsewhere in the json document # this can happen when there are more than 1 reference to the same resource instance clss = self.get_cached_reference(uri) vi["@type"] = clss except: raise ValueError( f"Multiple possible branches and no @type given: {vi}" ) value = None is_literal = False # Find precomputed possible branches by prop/class combination key = f"{k} {clss}" if key in tree_node["datatype"].ignore_keys(): # these are handled by the datatype itself continue elif not key in tree_node["children"] and is_literal: # grumble grumble # model has xsd:string, default is rdfs:Literal key = f"{k} http://www.w3.org/2001/XMLSchema#string" if not key in tree_node["children"]: raise ValueError( f"property/class combination does not exist in model: {k} {clss}\nWhile processing: {vi}" ) elif not key in tree_node["children"]: raise ValueError( f"property/class combination does not exist in model: {k} {clss}\nWhile processing: {vi}" ) options = tree_node["children"][key] possible = [] ignore = [] for o in options: # print(f"Considering:\n {vi}\n {o['name']}") if is_literal and o["datatype"].is_a_literal_in_rdf(): if len(o["datatype"].validate_from_rdf(value)) == 0: possible.append([o, value]) else: print( f"Could not validate {value} as a {o['datatype']}" ) elif not is_literal and not o[ "datatype"].is_a_literal_in_rdf(): if self.is_concept_node(uri): collid = o["config"]["collection_id"] try: if self.validate_concept_in_collection( uri, collid): possible.append([o, uri]) else: print( f"Concept URI {uri} not in Collection {collid}" ) except: print( f"Errored testing concept {uri} in collection {collid}" ) elif self.is_semantic_node(o): possible.append([o, ""]) elif o["datatype"].accepts_rdf_uri(uri): # print(f"datatype for {o['name']} accepts uri") possible.append([o, uri]) else: # This is when the current option doesn't match, but could be # non-ambiguous resource-instance vs semantic node continue else: raise ValueError("No possible match?") # print(f"Possible is: {[x[0]['name'] for x in possible]}") if not possible: # print(f"Tried: {options}") raise ValueError( f"Data does not match any actual node, despite prop/class combination {k} {clss}:\n{vi}" ) elif len(possible) > 1: # descend into data to check if there are further clarifying features possible2 = [] for p in possible: try: # Don't really create data, so pass anonymous result dict self.data_walk(vi, p[0], {}, tile) possible2.append(p) except: # Not an option pass if not possible2: raise ValueError( "Considering branches, data does not match any node, despite a prop/class combination" ) elif len(possible2) > 1: raise ValueError( f"Even after considering branches, data still matches more than one node: {[x[0]['name'] for x in possible2]}" ) else: branch = possible2[0] else: branch = possible[0] if not self.is_semantic_node(branch[0]): graph_node = branch[0] node_value = graph_node["datatype"].from_rdf(vi) # node_value might be None if the validation of the datatype fails # XXX Should we check this here, or raise in the datatype? # For resource-instances, the datatype doesn't know the ontology prop config if graph_node["datatype"].references_resource_type(): if "graphs" in branch[0]["config"]: gs = branch[0]["config"]["graphs"] if len(gs) == 1: # just select it if "ontologyProperty" in gs[0]: node_value[0]["ontologyProperty"] = gs[0][ "ontologyProperty"] if "inverseOntologyProperty" in gs[0]: node_value[0][ "inverseOntologyProperty"] = gs[0][ "inverseOntologyProperty"] else: for g in gs: # Now test current node's class against graph's class # This isn't a guarantee, but close enough if vi["@type"][0] == g["rootclass"]: if "ontologyProperty" in g: node_value[0][ "ontologyProperty"] = g[ "ontologyProperty"] if "inverseOntologyProperty" in g: node_value[0][ "inverseOntologyProperty"] = g[ "inverseOntologyProperty"] break else: # Might get checked in a cardinality n branch that shouldn't be repeated node_value = None # We know now that it can go into the branch # Determine if we can collapse the data into a -list or not bnodeid = branch[0]["node_id"] # This is going to be the result passed down if we recurse bnode = { "data": [], "nodegroup_id": branch[0]["nodegroup_id"], "cardinality": branch[0]["cardinality"] } if branch[0]["datatype"].collects_multiple_values( ) and tile and str( tile.nodegroup.pk) == branch[0]["nodegroup_id"]: # iterating through a root node *-list type pass elif bnodeid == branch[0]["nodegroup_id"]: # Used to pick the previous tile in loop which MIGHT be the parent (but might not) parenttile_id = result[ "tile"].tileid if "tile" in result else None tile = Tile( tileid=uuid.uuid4(), resourceinstance_id=self.resource.pk, parenttile_id=parenttile_id, nodegroup_id=branch[0]["nodegroup_id"], data={}, ) self.resource.tiles.append(tile) my_tiles.append(tile) elif "tile" in result and result["tile"]: tile = result["tile"] if not hasattr(tile, "_json_ld"): tile._json_ld = vi bnode["tile"] = tile if bnodeid in result: if branch[0]["datatype"].collects_multiple_values(): # append to previous tile if type(node_value) != list: node_value = [node_value] bnode = result[bnodeid][0] bnode["data"].append(branch[1]) if not self.is_semantic_node(branch[0]): try: n = bnode["tile"].data[bnodeid] except: n = [] bnode["tile"].data[bnodeid] = n if type(n) != list: bnode["tile"].data[bnodeid] = [n] bnode["tile"].data[bnodeid].extend(node_value) elif branch[0]["cardinality"] != "n": bnode = result[bnodeid][0] if node_value == bnode["tile"].data[bnodeid]: # No-op, attempt to readd same value pass else: raise ValueError( f"Attempt to add a value to cardinality 1, non-list node {k} {clss}:\n {vi}" ) else: bnode["data"].append(branch[1]) if not self.is_semantic_node(branch[0]): # print(f"Adding to existing (n): {node_value}") tile.data[bnodeid] = node_value result[bnodeid].append(bnode) else: if not self.is_semantic_node(branch[0]): tile.data[bnodeid] = node_value bnode["data"].append(branch[1]) result[bnodeid] = [bnode] if not is_literal: # Walk down non-literal branches in the data self.data_walk(vi, branch[0], bnode, tile) if self.shouldSortTiles: sortfuncs = settings.JSON_LD_SORT_FUNCTIONS if my_tiles: tile_ng_hash = {} for t in my_tiles: try: tile_ng_hash[t.nodegroup_id].append(t) except KeyError: tile_ng_hash[t.nodegroup_id] = [t] for (k, v) in tile_ng_hash.items(): if len(v) > 1: for func in sortfuncs: v.sort(key=func) for t, i in zip(v, range(len(v))): t.sortorder = i # Finally, after processing all of the branches for this node, check required nodes are present for path in tree_node["children"].values(): for kid in path: if kid["required"] and not f"{kid['node_id']}" in result: raise ValueError( f"Required field not present: {kid['name']}")