Beispiel #1
0
def relationship_edges(schema_graph_nx: nx.MultiDiGraph, class_add_mod: dict,
                       **kwargs) -> nx.MultiDiGraph:
    """
    Notes:
    =====
    # pass the below dictionary as the third argument (kwargs) to relationship_edges().
    # "in" indicates that the relationship has an in-edges behaviour.
    # "out" indicates that the relationship has an out-edges behaviour.

    rel_dict = {
        "rdfs:subClassOf": {
            "parentOf": "in"
        },
        "schema:domainIncludes": {
            "domainValue": "in"
        },
        "sms:requiresDependency": {
            "requiresDependency": "out"
        },
        "sms:requiresComponent": {
            "requiresComponent": "out"
        },
        "schema:rangeIncludes": {
            "rangeValue": "out"
        }
    }
    """
    for rel, rel_lab_node_type in kwargs.items():
        for rel_label, node_type in rel_lab_node_type.items():
            if rel in class_add_mod:
                parents = class_add_mod[rel]
                if type(parents) == list:
                    for _parent in parents:

                        if node_type == "in":
                            n1 = extract_name_from_uri_or_curie(_parent["@id"])
                            n2 = class_add_mod["rdfs:label"]

                        if node_type == "out":
                            n1 = class_add_mod["rdfs:label"]
                            n2 = extract_name_from_uri_or_curie(_parent["@id"])

                        # do not allow self-loops
                        if n1 != n2:
                            schema_graph_nx.add_edge(n1, n2, key=rel_label)
                elif type(parents) == dict:
                    if node_type == "in":
                        n1 = extract_name_from_uri_or_curie(parents["@id"])
                        n2 = class_add_mod["rdfs:label"]

                    if node_type == "out":
                        n1 = class_add_mod["rdfs:label"]
                        n2 = extract_name_from_uri_or_curie(parents["@id"])

                    # do not allow self-loops
                    if n1 != n2:
                        schema_graph_nx.add_edge(n1, n2, key=rel_label)

    return schema_graph_nx
Beispiel #2
0
 def validate_property_label(self, label_uri):
     """Check if the first character of property label is lower case"""
     label = extract_name_from_uri_or_curie(label_uri)
     assert label[0].islower()
Beispiel #3
0
 def validate_class_label(self, label_uri):
     """Check if the first character of class label is capitalized"""
     label = extract_name_from_uri_or_curie(label_uri)
     assert label[0].isupper()
Beispiel #4
0
 def check_whether_atid_and_label_match(self, record):
     """Check if @id field matches with the "rdfs:label" field"""
     _id = extract_name_from_uri_or_curie(record["@id"])
     assert _id == record[
         "rdfs:label"], "id and label not match: %r" % record
Beispiel #5
0
def load_schema_into_networkx(schema):
    G = nx.MultiDiGraph()
    for record in schema["@graph"]:
       
        # TODO: clean up obsolete code 
        #if record["@type"] == "rdfs:Class":
            
        # creation of nodes
        # adding nodes to the graph
        node = {}
        for (k, value) in record.items():
            if ":" in k:
                key = k.split(":")[1]
                node[key] = value
            elif "@" in k:
                key = k[1:]
                node[key] = value
            else:
                node[k] = value

        # creation of edges
        # adding edges to the graph
        if "rdfs:subClassOf" in record:
            parents = record["rdfs:subClassOf"]
            if type(parents) == list:
                for _parent in parents:
                    n1 = extract_name_from_uri_or_curie(_parent["@id"])
                    n2 = record["rdfs:label"]

                    # do not allow self-loops
                    if n1 != n2:
                        G.add_edge(n1, n2, key="parentOf")
            elif type(parents) == dict:
                n1 = extract_name_from_uri_or_curie(parents["@id"])
                n2 = record["rdfs:label"]

                # do not allow self-loops
                if n1 != n2:
                    G.add_edge(n1, n2, key="parentOf")

        # TODO: refactor: abstract adding relationship method
        if "sms:requiresDependency" in record:
            dependencies = record["sms:requiresDependency"]
            if type(dependencies) == list:
                for _dep in dependencies:
                    n1 = record["rdfs:label"]  
                    n2 = extract_name_from_uri_or_curie(_dep["@id"]) 
                    # do not allow self-loops
                    if n1 != n2:
                        G.add_edge(n1, n2, key="requiresDependency")

        if "sms:requiresComponent" in record:
            components = record["sms:requiresComponent"]
            if type(components) == list:
                for _comp in components:
                    n1 = record["rdfs:label"]  
                    n2 = extract_name_from_uri_or_curie(_comp["@id"]) 
                    # do not allow self-loops
                    if n1 != n2:
                        G.add_edge(n1, n2, key="requiresComponent")

        if "schema:rangeIncludes" in record:
            range_nodes = record["schema:rangeIncludes"]
            if type(range_nodes) == list:
                for _range_node in range_nodes:
                    n1 = record["rdfs:label"]  
                    n2 = extract_name_from_uri_or_curie(_range_node["@id"]) 
                    # do not allow self-loops
                    if n1 != n2:
                        G.add_edge(n1, n2, key="rangeValue")
            elif type(range_nodes) == dict:
                n1 = record["rdfs:label"]  
                n2 = extract_name_from_uri_or_curie(range_nodes["@id"]) 
                # do not allow self-loops
                if n1 != n2:
                    G.add_edge(n1, n2, key="rangeValue")
 
        if "schema:domainIncludes" in record:
            domain_nodes = record["schema:domainIncludes"]
            if type(domain_nodes) == list:
                for _domain_node in domain_nodes:
                    n1 = extract_name_from_uri_or_curie(_domain_node["@id"])
                    n2 = record["rdfs:label"]
                    # do not allow self-loops
                    if n1 != n2:
                        G.add_edge(n1, n2, key="domainValue")
            elif type(domain_nodes) == dict:
                n1 = extract_name_from_uri_or_curie(domain_nodes["@id"])
                n2 = record["rdfs:label"]
                # do not allow self-loops
                if n1 != n2:
                    G.add_edge(n1, n2, key="domainValue")
        
        # check schema generator (JSON validation schema gen)
        if "requiresChildAsValue" in node and node["requiresChildAsValue"]["@id"] == "sms:True":
            node["requiresChildAsValue"] = True
        
        if "required" in node:
            if "sms:true" == record["sms:required"]:
                node["required"] = True  
            else:
                node["required"] = False

        # not sure if this is required?
        if "sms:validationRules" in record:
            node["validationRules"] = record["sms:validationRules"]
        else:
            node["validationRules"] = []

        node['uri'] = record["@id"] 
        node['description'] = record["rdfs:comment"]
        G.add_node(record['rdfs:label'], **node)
        #print(node)
        #print(G.nodes())

    return G
Beispiel #6
0
    def edit_schema_object_nx(self, schema_object: dict) -> None:
        node_to_replace = class_to_node(class_to_convert=schema_object)

        # get the networkx graph associated with the SchemaExplorer object in its current state
        schema_graph_nx = self.get_nx_schema()

        # outer loop to loop over all the nodes in the graph constructed from master schema
        for node, data in schema_graph_nx.nodes(data=True):

            # innner loop to loop over the single node that is to be replaced/edited in the master graph
            for replace_node, replace_data in node_to_replace.nodes(data=True):

                # find the node to be replaced in the graph
                if node == replace_node:

                    # for the "comment", "required", "displayName", "validationRules" fields/keys it's okay to do a direct replacement
                    # without having to worry about adding/removing any associated edges

                    # ques. is it more expensive to do a checking operation (diff b/w fields) or a replace operation?

                    if "comment" in data and "comment" in replace_data: # replace contents of "comment" from replacement node
                        schema_graph_nx.nodes[node]["comment"] = node_to_replace.nodes[replace_node]["comment"]
                        schema_graph_nx.nodes[node]["description"] = node_to_replace.nodes[replace_node]["description"]

                    if "required" in data and "required" in replace_data:   # replace boolean value of "required" from replacement node
                        schema_graph_nx.nodes[node]["required"] = node_to_replace.nodes[replace_node]["required"]

                    if "displayName" in data and "displayName" in replace_data: # replace contents of "displayName" from replacement node
                        schema_graph_nx.nodes[node]["displayName"] = node_to_replace.nodes[replace_node]["displayName"]

                    if "validationRules" in data and "validationRules" in replace_data: # replace contents of "validationRules" from replacement node
                        schema_graph_nx.nodes[node]["validationRules"] = node_to_replace.nodes[replace_node]["validationRules"]

                    # for the "subClassOf", "requiresDependency", "requiresComponent", "rangeIncludes" fields/keys require rejiggering
                    # of associated edges
                    # general strategy we follow for rejiggering is remove edges that existed formerly and add new edges based on contents
                    # of the replacement node

                    # "subClassOf" key related edge manipulation
                    if "subClassOf" in replace_data:

                        # if the "subClassOf" attribute already exists on the node, then remove all the "parentOf" in-edges
                        # associated with that node
                        if "subClassOf" in data:
                            # remove formerly existent edges from the master schema/graph
                            for (u, v) in list(schema_graph_nx.in_edges([node])):

                                # there are certain nodes which have "subClassOf" data in list format
                                if type(data["subClassOf"]) == list:
                                    for _edges_to_replace in data["subClassOf"]:
                                        edge_repl = extract_name_from_uri_or_curie(_edges_to_replace["@id"])

                                        if edge_repl == u:

                                            try:
                                                # we need to make sure to remove only edges that are tagged with the "parentOf" label
                                                schema_graph_nx.remove_edges_from([(u, v, "parentOf")])
                                            except TypeError:
                                                pass

                                # there are certain nodes which have "subClassOf" data in dict format
                                elif type(data["subClassOf"]) == dict:
                                    for k_id, v_curie in data["subClassOf"].items():
                                        edge_repl = extract_name_from_uri_or_curie(v_curie)

                                        if edge_repl == u:

                                            try:
                                                schema_graph_nx.remove_edges_from([(u, v, "parentOf")])
                                            except TypeError:
                                                pass

                        # extract node names from replacement node and use it to add edges to the master schema/graph
                        parents = replace_data["subClassOf"]
                        if type(parents) == list:
                            for _parent in parents:
                                target_node = extract_name_from_uri_or_curie(_parent["@id"])

                                # label to be associated with "subClassOf" keys is "parentOf"
                                if target_node != replace_node:

                                    # make note of the fact that we are changing in-edges here
                                    schema_graph_nx.add_edge(target_node, replace_node, key="parentOf")
                        elif type(parents) == dict:
                            for _k_parent, _v_parent in parents.items():
                                target_node = extract_name_from_uri_or_curie(_v_parent)

                                # label to be associated with "subClassOf" keys is "parentOf"
                                if target_node != replace_node:

                                    # make note of the fact that we are changing in-edges here
                                    schema_graph_nx.add_edge(target_node, replace_node, key="parentOf")

                        # once the edges have been added, change the contents of the node
                        schema_graph_nx.nodes[node]["subClassOf"] = node_to_replace.nodes[replace_node]["subClassOf"]

                    # "requiresDependency" key related edge manipulation
                    if "requiresDependency" in replace_data:

                        # if the "requiresDependency" attribute already exists on the node, then remove all the "requiresDependency" in-edges
                        # associated with that node
                        if "requiresDependency" in data:

                            for (u, v) in list(schema_graph_nx.out_edges([node])):
                                # there are certain nodes which have "requiresDependency" data in list format
                                if type(data["requiresDependency"]) == list:
                                    for _edges_to_replace in data["requiresDependency"]:
                                        edge_repl = extract_name_from_uri_or_curie(_edges_to_replace["@id"])

                                        if edge_repl == v:

                                            try:
                                                schema_graph_nx.remove_edges_from([u, v, "requiresDependency"])
                                            except TypeError:
                                                pass

                                # there are certain nodes which have "requiresDependency" data in dict format
                                elif type(data["requiresDependency"]) == dict:
                                    for k_id, v_curie in data["requiresDependency"].items():
                                        edge_repl = extract_name_from_uri_or_curie(v_curie)

                                        if edge_repl == u:

                                            try:
                                                schema_graph_nx.remove_edges_from([u, v, "requiresDependency"])
                                            except TypeError:
                                                pass

                            deps = replace_data["requiresDependency"]
                            if type(deps) == list:
                                for _dep in deps:
                                    target_node = extract_name_from_uri_or_curie(_dep["@id"])

                                    if target_node != replace_node:

                                        # make not of the fact that edges being added here are out-edges
                                        schema_graph_nx.add_edge(replace_node, target_node, key="requiresDependency")
                            elif type(deps) == dict:
                                for _k_dep, _v_dep in deps.items():
                                    target_node = extract_name_from_uri_or_curie(_v_dep)

                                    if target_node != replace_node:

                                        # make not of the fact that edges being added here are out-edges
                                        schema_graph_nx.add_edge(replace_node, target_node, key="requiresDependency")

                        schema_graph_nx.nodes[node]["requiresDependency"] = node_to_replace.nodes[replace_node]["requiresDependency"]

                    # "requiresComponent" key related edge manipulation
                    if "requiresComponent" in replace_data:

                        if "requiresComponent" in data:
                            for (u, v) in list(schema_graph_nx.out_edges([node])):
                                # there are certain nodes which have "requiresComponent" data in list format
                                if type(data["requiresComponent"]) == list:
                                    for _edges_to_replace in data["requiresComponent"]:
                                        edge_repl = extract_name_from_uri_or_curie(_edges_to_replace["@id"])

                                        if edge_repl == v:

                                            try:
                                                schema_graph_nx.remove_edges_from([u, v, "requiresComponent"])
                                            except TypeError:
                                                pass

                                elif type(data["requiresComponent"]) == dict:
                                    for k_id, v_curie in data["requiresComponent"].items():
                                        edge_repl = extract_name_from_uri_or_curie(v_curie)

                                        if edge_repl == v:

                                            try:
                                                schema_graph_nx.remove_edges_from([u, v, "requiresComponent"])
                                            except TypeError:
                                                pass

                        comps = replace_data["requiresComponent"]
                        if type(comps) == list:
                            for _comp in comps:
                                target_node = extract_name_from_uri_or_curie(_comp["@id"])

                                if target_node != replace_node:
                                    schema_graph_nx.add_edge(replace_node, target_node, key="requiresComponent")
                        elif type(comps) == dict:
                            for _k_comp, _v_comp in deps.items():
                                target_node = extract_name_from_uri_or_curie(_v_comp)

                                if target_node != replace_node:

                                    # make not of the fact that edges being added here are out-edges
                                    schema_graph_nx.add_edge(replace_node, target_node, key="requiresDependency")

                        schema_graph_nx.nodes[node]["requiresComponent"] = node_to_replace.nodes[replace_node]["requiresComponent"]

                    # "rangeIncludes" key related edge manipulation
                    if "rangeIncludes" in replace_data:

                        if "rangeIncludes" in data:
                            for (u, v) in list(schema_graph_nx.out_edges([node])):
                                # there are certain nodes which have "rangeIncludes" data in list format
                                if type(data["rangeIncludes"]) == list:
                                    for _edges_to_replace in data["rangeIncludes"]:
                                        edge_repl = extract_name_from_uri_or_curie(_edges_to_replace["@id"])

                                        if edge_repl == v:
                                            try:
                                                schema_graph_nx.remove_edges_from([u, v, "rangeIncludes"])
                                            except TypeError:
                                                pass

                                elif type(data["rangeIncludes"]) == dict:
                                    for k_id, v_curie in data["rangeIncludes"].items():
                                        edge_repl = extract_name_from_uri_or_curie(v_curie)

                                        if edge_repl == v:
                                            try:
                                                schema_graph_nx.remove_edges_from([u, v, "rangeIncludes"])
                                            except TypeError:
                                                pass

                        range_inc = replace_data["rangeIncludes"]
                        if type(range_inc) == list:
                            for _rinc in range_inc:
                                target_node = extract_name_from_uri_or_curie(_rinc["@id"])

                                if target_node != replace_node:
                                    schema_graph_nx.add_edge(replace_node, target_node, key="rangeValue")
                        elif type(range_inc) == dict:
                            for _k_rinc, _v_rinc in deps.items():
                                target_node = extract_name_from_uri_or_curie(_v_rinc)

                                if target_node != replace_node:

                                    # make not of the fact that edges being added here are out-edges
                                    schema_graph_nx.add_edge(replace_node, target_node, key="rangeValue")

                        schema_graph_nx.nodes[node]["rangeIncludes"] = node_to_replace.nodes[replace_node]["rangeIncludes"]

        # set the networkx schema graph to the the modified networkx schema
        self.schema_nx = schema_graph_nx

        # print("Added node {} to the graph successfully.".format(schema_object["rdfs:label"]))

        # part of the code that replaces the modified class in the original JSON-LD schema (not in the data/ folder though)
        for i, schema_class in enumerate(self.schema["@graph"]):
            if schema_class["rdfs:label"] == schema_object["rdfs:label"]:
                # validate_class_schema(schema_object)    # validate that the class to be modified follows the structure for any generic class (node)

                self.schema["@graph"][i] = schema_object
                break
Beispiel #7
0
    def explore_class(self, schema_class):
        """Find details about a specific schema class
        """
        parents = []
        if  "subClassOf" in self.schema_nx.nodes[schema_class]:
            schema_node_val = self.schema_nx.nodes[schema_class]["subClassOf"]

            parents_list = []
            if isinstance(schema_node_val, dict):
                parents_list.append(self.schema_nx.nodes[schema_class]["subClassOf"])
            else:
                parents_list = schema_node_val

            for parent in parents_list:
                parents.append(extract_name_from_uri_or_curie(parent["@id"]))

        requires_range = []
        if  "rangeIncludes" in self.schema_nx.nodes[schema_class]:
            schema_node_val = self.schema_nx.nodes[schema_class]["rangeIncludes"]

            if isinstance(schema_node_val, dict):
                subclass_list = []
                subclass_list.append(self.schema_nx.nodes[schema_class]["rangeIncludes"])
            else:
                subclass_list = schema_node_val

            for range_class in subclass_list:
                requires_range.append(extract_name_from_uri_or_curie(range_class["@id"]))

        requires_dependencies = []
        if  "requiresDependency" in self.schema_nx.nodes[schema_class]:
            schema_node_val = self.schema_nx.nodes[schema_class]["requiresDependency"]

            if isinstance(schema_node_val, dict):
                subclass_list = []
                subclass_list.append(self.schema_nx.nodes[schema_class]["requiresDependency"])
            else:
                subclass_list = schema_node_val

            for dep_class in subclass_list:
                requires_dependencies.append(extract_name_from_uri_or_curie(dep_class["@id"]))

        requires_components = []
        if  "requiresComponent" in self.schema_nx.nodes[schema_class]:
            schema_node_val = self.schema_nx.nodes[schema_class]["requiresComponent"]

            if isinstance(schema_node_val, dict):
                subclass_list = []
                subclass_list.append(self.schema_nx.nodes[schema_class]["requiresComponent"])
            else:
                subclass_list = schema_node_val

            for comp_dep_class in subclass_list:
                requires_components.append(extract_name_from_uri_or_curie(comp_dep_class["@id"]))

        required = False
        if "required" in self.schema_nx.nodes[schema_class]:
            required = self.schema_nx.nodes[schema_class]["required"]

        validation_rules = []
        if "validationRules" in self.schema_nx.nodes[schema_class]:
            validation_rules = self.schema_nx.nodes[schema_class]["validationRules"]

        # TODO: make class_info keys here the same as keys in schema graph nodes(e.g. schema_class above); note that downstream code using explore_class would have to be updated as well (e.g. csv_2_schemaorg)

        class_info = {
                      'properties': self.find_class_specific_properties(schema_class),
                      'description': self.schema_nx.nodes[schema_class]['description'],
                      'uri': curie2uri(self.schema_nx.nodes[schema_class]["uri"], namespaces),
                      #'usage': self.find_class_usages(schema_class),
                      'usage':'NA',
                      'child_classes': self.find_adjacent_child_classes(schema_class),
                      'subClassOf': parents,
                      'range': requires_range,
                      'dependencies': requires_dependencies,
                      'validation_rules': validation_rules,
                      'required': required,
                      'component_dependencies': requires_components,
                      'parent_classes': parents
                      #'parent_classes': self.find_parent_classes(schema_class)
        }

        if "displayName" in self.schema_nx.nodes[schema_class]:
            class_info['displayName'] = self.schema_nx.nodes[schema_class]['displayName']

        return class_info