def relationship_edges(schema_graph_nx: nx.MultiDiGraph, class_add_mod: dict, **kwargs) -> nx.MultiDiGraph: """ Notes: ===== # pass the below dictionary as the third argument (kwargs) to relationship_edges(). # "in" indicates that the relationship has an in-edges behaviour. # "out" indicates that the relationship has an out-edges behaviour. rel_dict = { "rdfs:subClassOf": { "parentOf": "in" }, "schema:domainIncludes": { "domainValue": "in" }, "sms:requiresDependency": { "requiresDependency": "out" }, "sms:requiresComponent": { "requiresComponent": "out" }, "schema:rangeIncludes": { "rangeValue": "out" } } """ for rel, rel_lab_node_type in kwargs.items(): for rel_label, node_type in rel_lab_node_type.items(): if rel in class_add_mod: parents = class_add_mod[rel] if type(parents) == list: for _parent in parents: if node_type == "in": n1 = extract_name_from_uri_or_curie(_parent["@id"]) n2 = class_add_mod["rdfs:label"] if node_type == "out": n1 = class_add_mod["rdfs:label"] n2 = extract_name_from_uri_or_curie(_parent["@id"]) # do not allow self-loops if n1 != n2: schema_graph_nx.add_edge(n1, n2, key=rel_label) elif type(parents) == dict: if node_type == "in": n1 = extract_name_from_uri_or_curie(parents["@id"]) n2 = class_add_mod["rdfs:label"] if node_type == "out": n1 = class_add_mod["rdfs:label"] n2 = extract_name_from_uri_or_curie(parents["@id"]) # do not allow self-loops if n1 != n2: schema_graph_nx.add_edge(n1, n2, key=rel_label) return schema_graph_nx
def validate_property_label(self, label_uri): """Check if the first character of property label is lower case""" label = extract_name_from_uri_or_curie(label_uri) assert label[0].islower()
def validate_class_label(self, label_uri): """Check if the first character of class label is capitalized""" label = extract_name_from_uri_or_curie(label_uri) assert label[0].isupper()
def check_whether_atid_and_label_match(self, record): """Check if @id field matches with the "rdfs:label" field""" _id = extract_name_from_uri_or_curie(record["@id"]) assert _id == record[ "rdfs:label"], "id and label not match: %r" % record
def load_schema_into_networkx(schema): G = nx.MultiDiGraph() for record in schema["@graph"]: # TODO: clean up obsolete code #if record["@type"] == "rdfs:Class": # creation of nodes # adding nodes to the graph node = {} for (k, value) in record.items(): if ":" in k: key = k.split(":")[1] node[key] = value elif "@" in k: key = k[1:] node[key] = value else: node[k] = value # creation of edges # adding edges to the graph if "rdfs:subClassOf" in record: parents = record["rdfs:subClassOf"] if type(parents) == list: for _parent in parents: n1 = extract_name_from_uri_or_curie(_parent["@id"]) n2 = record["rdfs:label"] # do not allow self-loops if n1 != n2: G.add_edge(n1, n2, key="parentOf") elif type(parents) == dict: n1 = extract_name_from_uri_or_curie(parents["@id"]) n2 = record["rdfs:label"] # do not allow self-loops if n1 != n2: G.add_edge(n1, n2, key="parentOf") # TODO: refactor: abstract adding relationship method if "sms:requiresDependency" in record: dependencies = record["sms:requiresDependency"] if type(dependencies) == list: for _dep in dependencies: n1 = record["rdfs:label"] n2 = extract_name_from_uri_or_curie(_dep["@id"]) # do not allow self-loops if n1 != n2: G.add_edge(n1, n2, key="requiresDependency") if "sms:requiresComponent" in record: components = record["sms:requiresComponent"] if type(components) == list: for _comp in components: n1 = record["rdfs:label"] n2 = extract_name_from_uri_or_curie(_comp["@id"]) # do not allow self-loops if n1 != n2: G.add_edge(n1, n2, key="requiresComponent") if "schema:rangeIncludes" in record: range_nodes = record["schema:rangeIncludes"] if type(range_nodes) == list: for _range_node in range_nodes: n1 = record["rdfs:label"] n2 = extract_name_from_uri_or_curie(_range_node["@id"]) # do not allow self-loops if n1 != n2: G.add_edge(n1, n2, key="rangeValue") elif type(range_nodes) == dict: n1 = record["rdfs:label"] n2 = extract_name_from_uri_or_curie(range_nodes["@id"]) # do not allow self-loops if n1 != n2: G.add_edge(n1, n2, key="rangeValue") if "schema:domainIncludes" in record: domain_nodes = record["schema:domainIncludes"] if type(domain_nodes) == list: for _domain_node in domain_nodes: n1 = extract_name_from_uri_or_curie(_domain_node["@id"]) n2 = record["rdfs:label"] # do not allow self-loops if n1 != n2: G.add_edge(n1, n2, key="domainValue") elif type(domain_nodes) == dict: n1 = extract_name_from_uri_or_curie(domain_nodes["@id"]) n2 = record["rdfs:label"] # do not allow self-loops if n1 != n2: G.add_edge(n1, n2, key="domainValue") # check schema generator (JSON validation schema gen) if "requiresChildAsValue" in node and node["requiresChildAsValue"]["@id"] == "sms:True": node["requiresChildAsValue"] = True if "required" in node: if "sms:true" == record["sms:required"]: node["required"] = True else: node["required"] = False # not sure if this is required? if "sms:validationRules" in record: node["validationRules"] = record["sms:validationRules"] else: node["validationRules"] = [] node['uri'] = record["@id"] node['description'] = record["rdfs:comment"] G.add_node(record['rdfs:label'], **node) #print(node) #print(G.nodes()) return G
def edit_schema_object_nx(self, schema_object: dict) -> None: node_to_replace = class_to_node(class_to_convert=schema_object) # get the networkx graph associated with the SchemaExplorer object in its current state schema_graph_nx = self.get_nx_schema() # outer loop to loop over all the nodes in the graph constructed from master schema for node, data in schema_graph_nx.nodes(data=True): # innner loop to loop over the single node that is to be replaced/edited in the master graph for replace_node, replace_data in node_to_replace.nodes(data=True): # find the node to be replaced in the graph if node == replace_node: # for the "comment", "required", "displayName", "validationRules" fields/keys it's okay to do a direct replacement # without having to worry about adding/removing any associated edges # ques. is it more expensive to do a checking operation (diff b/w fields) or a replace operation? if "comment" in data and "comment" in replace_data: # replace contents of "comment" from replacement node schema_graph_nx.nodes[node]["comment"] = node_to_replace.nodes[replace_node]["comment"] schema_graph_nx.nodes[node]["description"] = node_to_replace.nodes[replace_node]["description"] if "required" in data and "required" in replace_data: # replace boolean value of "required" from replacement node schema_graph_nx.nodes[node]["required"] = node_to_replace.nodes[replace_node]["required"] if "displayName" in data and "displayName" in replace_data: # replace contents of "displayName" from replacement node schema_graph_nx.nodes[node]["displayName"] = node_to_replace.nodes[replace_node]["displayName"] if "validationRules" in data and "validationRules" in replace_data: # replace contents of "validationRules" from replacement node schema_graph_nx.nodes[node]["validationRules"] = node_to_replace.nodes[replace_node]["validationRules"] # for the "subClassOf", "requiresDependency", "requiresComponent", "rangeIncludes" fields/keys require rejiggering # of associated edges # general strategy we follow for rejiggering is remove edges that existed formerly and add new edges based on contents # of the replacement node # "subClassOf" key related edge manipulation if "subClassOf" in replace_data: # if the "subClassOf" attribute already exists on the node, then remove all the "parentOf" in-edges # associated with that node if "subClassOf" in data: # remove formerly existent edges from the master schema/graph for (u, v) in list(schema_graph_nx.in_edges([node])): # there are certain nodes which have "subClassOf" data in list format if type(data["subClassOf"]) == list: for _edges_to_replace in data["subClassOf"]: edge_repl = extract_name_from_uri_or_curie(_edges_to_replace["@id"]) if edge_repl == u: try: # we need to make sure to remove only edges that are tagged with the "parentOf" label schema_graph_nx.remove_edges_from([(u, v, "parentOf")]) except TypeError: pass # there are certain nodes which have "subClassOf" data in dict format elif type(data["subClassOf"]) == dict: for k_id, v_curie in data["subClassOf"].items(): edge_repl = extract_name_from_uri_or_curie(v_curie) if edge_repl == u: try: schema_graph_nx.remove_edges_from([(u, v, "parentOf")]) except TypeError: pass # extract node names from replacement node and use it to add edges to the master schema/graph parents = replace_data["subClassOf"] if type(parents) == list: for _parent in parents: target_node = extract_name_from_uri_or_curie(_parent["@id"]) # label to be associated with "subClassOf" keys is "parentOf" if target_node != replace_node: # make note of the fact that we are changing in-edges here schema_graph_nx.add_edge(target_node, replace_node, key="parentOf") elif type(parents) == dict: for _k_parent, _v_parent in parents.items(): target_node = extract_name_from_uri_or_curie(_v_parent) # label to be associated with "subClassOf" keys is "parentOf" if target_node != replace_node: # make note of the fact that we are changing in-edges here schema_graph_nx.add_edge(target_node, replace_node, key="parentOf") # once the edges have been added, change the contents of the node schema_graph_nx.nodes[node]["subClassOf"] = node_to_replace.nodes[replace_node]["subClassOf"] # "requiresDependency" key related edge manipulation if "requiresDependency" in replace_data: # if the "requiresDependency" attribute already exists on the node, then remove all the "requiresDependency" in-edges # associated with that node if "requiresDependency" in data: for (u, v) in list(schema_graph_nx.out_edges([node])): # there are certain nodes which have "requiresDependency" data in list format if type(data["requiresDependency"]) == list: for _edges_to_replace in data["requiresDependency"]: edge_repl = extract_name_from_uri_or_curie(_edges_to_replace["@id"]) if edge_repl == v: try: schema_graph_nx.remove_edges_from([u, v, "requiresDependency"]) except TypeError: pass # there are certain nodes which have "requiresDependency" data in dict format elif type(data["requiresDependency"]) == dict: for k_id, v_curie in data["requiresDependency"].items(): edge_repl = extract_name_from_uri_or_curie(v_curie) if edge_repl == u: try: schema_graph_nx.remove_edges_from([u, v, "requiresDependency"]) except TypeError: pass deps = replace_data["requiresDependency"] if type(deps) == list: for _dep in deps: target_node = extract_name_from_uri_or_curie(_dep["@id"]) if target_node != replace_node: # make not of the fact that edges being added here are out-edges schema_graph_nx.add_edge(replace_node, target_node, key="requiresDependency") elif type(deps) == dict: for _k_dep, _v_dep in deps.items(): target_node = extract_name_from_uri_or_curie(_v_dep) if target_node != replace_node: # make not of the fact that edges being added here are out-edges schema_graph_nx.add_edge(replace_node, target_node, key="requiresDependency") schema_graph_nx.nodes[node]["requiresDependency"] = node_to_replace.nodes[replace_node]["requiresDependency"] # "requiresComponent" key related edge manipulation if "requiresComponent" in replace_data: if "requiresComponent" in data: for (u, v) in list(schema_graph_nx.out_edges([node])): # there are certain nodes which have "requiresComponent" data in list format if type(data["requiresComponent"]) == list: for _edges_to_replace in data["requiresComponent"]: edge_repl = extract_name_from_uri_or_curie(_edges_to_replace["@id"]) if edge_repl == v: try: schema_graph_nx.remove_edges_from([u, v, "requiresComponent"]) except TypeError: pass elif type(data["requiresComponent"]) == dict: for k_id, v_curie in data["requiresComponent"].items(): edge_repl = extract_name_from_uri_or_curie(v_curie) if edge_repl == v: try: schema_graph_nx.remove_edges_from([u, v, "requiresComponent"]) except TypeError: pass comps = replace_data["requiresComponent"] if type(comps) == list: for _comp in comps: target_node = extract_name_from_uri_or_curie(_comp["@id"]) if target_node != replace_node: schema_graph_nx.add_edge(replace_node, target_node, key="requiresComponent") elif type(comps) == dict: for _k_comp, _v_comp in deps.items(): target_node = extract_name_from_uri_or_curie(_v_comp) if target_node != replace_node: # make not of the fact that edges being added here are out-edges schema_graph_nx.add_edge(replace_node, target_node, key="requiresDependency") schema_graph_nx.nodes[node]["requiresComponent"] = node_to_replace.nodes[replace_node]["requiresComponent"] # "rangeIncludes" key related edge manipulation if "rangeIncludes" in replace_data: if "rangeIncludes" in data: for (u, v) in list(schema_graph_nx.out_edges([node])): # there are certain nodes which have "rangeIncludes" data in list format if type(data["rangeIncludes"]) == list: for _edges_to_replace in data["rangeIncludes"]: edge_repl = extract_name_from_uri_or_curie(_edges_to_replace["@id"]) if edge_repl == v: try: schema_graph_nx.remove_edges_from([u, v, "rangeIncludes"]) except TypeError: pass elif type(data["rangeIncludes"]) == dict: for k_id, v_curie in data["rangeIncludes"].items(): edge_repl = extract_name_from_uri_or_curie(v_curie) if edge_repl == v: try: schema_graph_nx.remove_edges_from([u, v, "rangeIncludes"]) except TypeError: pass range_inc = replace_data["rangeIncludes"] if type(range_inc) == list: for _rinc in range_inc: target_node = extract_name_from_uri_or_curie(_rinc["@id"]) if target_node != replace_node: schema_graph_nx.add_edge(replace_node, target_node, key="rangeValue") elif type(range_inc) == dict: for _k_rinc, _v_rinc in deps.items(): target_node = extract_name_from_uri_or_curie(_v_rinc) if target_node != replace_node: # make not of the fact that edges being added here are out-edges schema_graph_nx.add_edge(replace_node, target_node, key="rangeValue") schema_graph_nx.nodes[node]["rangeIncludes"] = node_to_replace.nodes[replace_node]["rangeIncludes"] # set the networkx schema graph to the the modified networkx schema self.schema_nx = schema_graph_nx # print("Added node {} to the graph successfully.".format(schema_object["rdfs:label"])) # part of the code that replaces the modified class in the original JSON-LD schema (not in the data/ folder though) for i, schema_class in enumerate(self.schema["@graph"]): if schema_class["rdfs:label"] == schema_object["rdfs:label"]: # validate_class_schema(schema_object) # validate that the class to be modified follows the structure for any generic class (node) self.schema["@graph"][i] = schema_object break
def explore_class(self, schema_class): """Find details about a specific schema class """ parents = [] if "subClassOf" in self.schema_nx.nodes[schema_class]: schema_node_val = self.schema_nx.nodes[schema_class]["subClassOf"] parents_list = [] if isinstance(schema_node_val, dict): parents_list.append(self.schema_nx.nodes[schema_class]["subClassOf"]) else: parents_list = schema_node_val for parent in parents_list: parents.append(extract_name_from_uri_or_curie(parent["@id"])) requires_range = [] if "rangeIncludes" in self.schema_nx.nodes[schema_class]: schema_node_val = self.schema_nx.nodes[schema_class]["rangeIncludes"] if isinstance(schema_node_val, dict): subclass_list = [] subclass_list.append(self.schema_nx.nodes[schema_class]["rangeIncludes"]) else: subclass_list = schema_node_val for range_class in subclass_list: requires_range.append(extract_name_from_uri_or_curie(range_class["@id"])) requires_dependencies = [] if "requiresDependency" in self.schema_nx.nodes[schema_class]: schema_node_val = self.schema_nx.nodes[schema_class]["requiresDependency"] if isinstance(schema_node_val, dict): subclass_list = [] subclass_list.append(self.schema_nx.nodes[schema_class]["requiresDependency"]) else: subclass_list = schema_node_val for dep_class in subclass_list: requires_dependencies.append(extract_name_from_uri_or_curie(dep_class["@id"])) requires_components = [] if "requiresComponent" in self.schema_nx.nodes[schema_class]: schema_node_val = self.schema_nx.nodes[schema_class]["requiresComponent"] if isinstance(schema_node_val, dict): subclass_list = [] subclass_list.append(self.schema_nx.nodes[schema_class]["requiresComponent"]) else: subclass_list = schema_node_val for comp_dep_class in subclass_list: requires_components.append(extract_name_from_uri_or_curie(comp_dep_class["@id"])) required = False if "required" in self.schema_nx.nodes[schema_class]: required = self.schema_nx.nodes[schema_class]["required"] validation_rules = [] if "validationRules" in self.schema_nx.nodes[schema_class]: validation_rules = self.schema_nx.nodes[schema_class]["validationRules"] # TODO: make class_info keys here the same as keys in schema graph nodes(e.g. schema_class above); note that downstream code using explore_class would have to be updated as well (e.g. csv_2_schemaorg) class_info = { 'properties': self.find_class_specific_properties(schema_class), 'description': self.schema_nx.nodes[schema_class]['description'], 'uri': curie2uri(self.schema_nx.nodes[schema_class]["uri"], namespaces), #'usage': self.find_class_usages(schema_class), 'usage':'NA', 'child_classes': self.find_adjacent_child_classes(schema_class), 'subClassOf': parents, 'range': requires_range, 'dependencies': requires_dependencies, 'validation_rules': validation_rules, 'required': required, 'component_dependencies': requires_components, 'parent_classes': parents #'parent_classes': self.find_parent_classes(schema_class) } if "displayName" in self.schema_nx.nodes[schema_class]: class_info['displayName'] = self.schema_nx.nodes[schema_class]['displayName'] return class_info