예제 #1
0
    def add_split_results(self, knowledge_graph, result_bindings):
        """
		Populate the object model with the resulting raw knowledge_graph and result_bindings (initially from QueryGraphReasoner)
		:param nodes: knowledge_graph in native RTX KG dump
		:param edges: result_bindings in a native format from QueryGraphReasoner
		:return: none
		"""

        #### Add the knowledge_graph nodes
        regular_node_attributes = [
            "id", "uri", "name", "description", "symbol"
        ]
        for input_node in knowledge_graph["nodes"]:
            node = Node()
            for attribute in regular_node_attributes:
                if attribute in input_node:
                    setattr(node, attribute, input_node[attribute])
            node.type = [input_node["category"]]
            #node.node_attributes = FIXME
            self.message.knowledge_graph.nodes.append(node)

        #### Add the knowledge_graph edges
        regular_edge_attributes = [
            "id", "type", "relation", "source_id", "target_id",
            "is_defined_by", "defined_datetime", "provided_by", "weight",
            "evidence_type", "qualifiers", "negated", "", ""
        ]
        for input_edge in knowledge_graph["edges"]:
            edge = Edge()
            for attribute in regular_edge_attributes:
                if attribute in input_edge:
                    setattr(edge, attribute, input_edge[attribute])
            if "probability" in input_edge:
                edge.confidence = input_edge["probability"]
            # missing edge properties: defined_datetime, weight, publications, evidence_type, qualifiers, negated
            # extra edge properties: predicate,
            #edge.edge_attributes = FIXME
            #edge.publications = FIXME
            self.message.knowledge_graph.edges.append(edge)

        #### Add each result
        self.message.results = []
        for input_result in result_bindings:
            result = Result()
            result.description = "No description available"
            result.essence = "?"
            #result.essence_type = "?"
            #result.row_data = "?"
            #result.score = 0
            #result.score_name = "?"
            #result.score_direction = "?"
            result.confidence = 1.0
            result.result_type = "individual query answer"
            result.reasoner_id = "RTX"
            result.result_graph = None
            result.node_bindings = input_result["nodes"]
            #			#### Convert each binding value to a list because the viewer requires it
            #			for binding in result.node_bindings:
            #				result.node_bindings[binding] = [ result.node_bindings[binding] ]
            result.edge_bindings = input_result["edges"]
            self.message.results.append(result)

        #### Set the code_description
        n_results = len(result_bindings)
        plural = "s"
        if n_results == 1: plural = ""
        self.message.code_description = f"{n_results} result{plural} found"

        #### Complete normally
        return ()
예제 #2
0
    def add_subgraph(self,
                     nodes,
                     edges,
                     description,
                     confidence,
                     return_result=False,
                     suppress_bindings=False):
        """
		Populate the object model using networkx neo4j subgraph
		:param nodes: nodes in the subgraph (g.nodes(data=True))
		:param edges: edges in the subgraph (g.edges(data=True))
		:return: none
		"""

        # Get the relevant info from the nodes and edges
        node_keys = []
        node_descriptions = dict()
        node_names = dict()
        node_labels = dict()
        node_uuids = dict()
        node_accessions = dict()
        node_iris = dict()
        node_uuids2iri = dict()
        node_curies = dict()
        node_uuids2curie = dict()
        for u, data in nodes:
            node_keys.append(u)
            if 'description' in data['properties']:
                node_descriptions[u] = data['properties']['description']
            else:
                node_descriptions[u] = "None"
            node_names[u] = data['properties']['name']
            node_labels[u] = list(set(data['labels']).difference({'Base'}))[0]
            node_uuids[u] = data['properties']['UUID']
            node_accessions[u] = data['properties']['accession']
            node_iris[u] = data['properties']['uri']
            node_uuids2iri[data['properties']
                           ['UUID']] = data['properties']['uri']
            curie_id = data['properties']['id']
            if curie_id.split(':')[0].upper() == "CHEMBL":
                curie_id = "CHEMBL:CHEMBL" + curie_id.split(':')[1]
            node_uuids2curie[data['properties']['UUID']] = curie_id
            node_curies[
                u] = curie_id  # These are the actual CURIE IDS eg UBERON:00000941 (uri is the web address)

        edge_keys = []
        edge_types = dict()
        edge_source_db = dict()
        edge_source_iri = dict()
        edge_target_iri = dict()
        edge_source_curie = dict()
        edge_target_curie = dict()
        edge_ids = dict()
        for u, v, data in edges:
            edge_keys.append((u, v))
            edge_types[(u, v)] = data['type']
            edge_source_db[(u, v)] = data['properties']['provided_by']
            edge_source_iri[(
                u, v)] = node_uuids2iri[data['properties']['source_node_uuid']]
            edge_target_iri[(
                u, v)] = node_uuids2iri[data['properties']['target_node_uuid']]
            edge_source_curie[(
                u,
                v)] = node_uuids2curie[data['properties']['source_node_uuid']]
            edge_target_curie[(
                u,
                v)] = node_uuids2curie[data['properties']['target_node_uuid']]
            edge_ids[(u, v)] = data['properties']['provided_by']  # FIXME

        # For each node, populate the relevant information
        node_objects = []
        node_iris_to_node_object = dict()
        for node_key in node_keys:
            node = Node()
            node.id = node_curies[node_key]
            node.type = [node_labels[node_key]]
            node.name = node_names[node_key]
            node.uri = node_iris[node_key]
            node.accession = node_accessions[node_key]
            node.description = node_descriptions[node_key]
            node_objects.append(node)
            node_iris_to_node_object[node_iris[node_key]] = node

            #### Add this node to the master knowledge graph
            if node.id not in self._node_ids:
                self.message.knowledge_graph.nodes.append(node)
                self._node_ids[node.id] = node.type[
                    0]  # Just take the first of potentially several FIXME

        #### Create the bindings lists
        node_bindings = list()
        edge_bindings = list()

        # for each edge, create an edge between them
        edge_objects = []
        for u, v in edge_keys:
            edge = Edge()
            #edge.id is set below when building the bindings
            edge.type = edge_types[(u, v)]
            edge.source_id = node_iris_to_node_object[edge_source_iri[(u,
                                                                       v)]].id
            edge.target_id = node_iris_to_node_object[edge_target_iri[(u,
                                                                       v)]].id
            edge_objects.append(edge)
            #edge.attribute_list
            #edge.confidence
            #edge.evidence_type
            edge.is_defined_by = "RTX"
            edge.provided_by = edge_source_db[(u, v)]
            #edge.publications
            #edge.qualifiers
            #edge.relation
            #edge.source_id
            #edge.target_id
            #edge.type

            #### Add this edge to the master knowledge graph
            edge_str = "%s -%s- %s" % (edge.source_id, edge.type,
                                       edge.target_id)
            if edge_str not in self._edge_ids:
                self.message.knowledge_graph.edges.append(edge)
                edge.id = "%d" % self._edge_counter
                self._edge_ids[edge_str] = edge.id
                self._edge_counter += 1
            else:
                edge.id = self._edge_ids[edge_str]

            #### Try to figure out how the source fits into the query_graph for the bindings
            source_type = self._node_ids[edge.source_id]
            if edge.source_id in self._type_map:
                source_knowledge_map_key = self._type_map[edge.source_id]
            else:
                source_knowledge_map_key = self._type_map[source_type]
            if not source_knowledge_map_key:
                eprint(
                    "Expected to find '%s' in the response._type_map, but did not"
                    % source_type)
                raise Exception(
                    "Expected to find '%s' in the response._type_map, but did not"
                    % source_type)

            node_bindings.append(
                NodeBinding(qg_id=source_knowledge_map_key,
                            kg_id=edge.source_id))
            #			if source_knowledge_map_key not in node_bindings:
            #				node_bindings[source_knowledge_map_key] = list()
            #				node_bindings_dict[source_knowledge_map_key] = dict()
            #			if edge.source_id not in node_bindings_dict[source_knowledge_map_key]:
            #				node_bindings[source_knowledge_map_key].append(edge.source_id)
            #				node_bindings_dict[source_knowledge_map_key][edge.source_id] = 1

            #### Try to figure out how the target fits into the query_graph for the knowledge map
            target_type = self._node_ids[edge.target_id]
            if edge.target_id in self._type_map:
                target_knowledge_map_key = self._type_map[edge.target_id]
            else:
                target_knowledge_map_key = self._type_map[target_type]
            if not target_knowledge_map_key:
                eprint(
                    "ERROR: Expected to find '%s' in the response._type_map, but did not"
                    % target_type)
                raise Exception(
                    "Expected to find '%s' in the response._type_map, but did not"
                    % target_type)

            node_bindings.append(
                NodeBinding(qg_id=target_knowledge_map_key,
                            kg_id=edge.target_id))
            #			if target_knowledge_map_key not in node_bindings:
            #				node_bindings[target_knowledge_map_key] = list()
            #				node_bindings_dict[target_knowledge_map_key] = dict()
            #			if edge.target_id not in node_bindings_dict[target_knowledge_map_key]:
            #				node_bindings[target_knowledge_map_key].append(edge.target_id)
            #				node_bindings_dict[target_knowledge_map_key][edge.target_id] = 1

            #### Try to figure out how the edge fits into the query_graph for the knowledge map
            source_target_key = "e" + source_knowledge_map_key + "-" + target_knowledge_map_key
            target_source_key = "e" + target_knowledge_map_key + "-" + source_knowledge_map_key
            if edge.type in self._type_map:
                knowledge_map_key = self._type_map[edge.type]
            elif source_target_key in self._type_map:
                knowledge_map_key = source_target_key
            elif target_source_key in self._type_map:
                knowledge_map_key = target_source_key
            else:
                eprint(
                    "ERROR: Expected to find '%s' or '%s' or '%s' in the response._type_map, but did not"
                    % (edge.type, source_target_key, target_source_key))
                knowledge_map_key = "ERROR"

            edge_bindings.append(
                EdgeBinding(qg_id=knowledge_map_key, kg_id=edge.id))


#			if knowledge_map_key not in edge_bindings:
#				edge_bindings[knowledge_map_key] = list()
#				edge_bindings_dict[knowledge_map_key] = dict()
#			if edge.id not in edge_bindings_dict[knowledge_map_key]:
#				edge_bindings[knowledge_map_key].append(edge.id)
#				edge_bindings_dict[knowledge_map_key][edge.id] = 1

# Create the result (potential answer)
        result1 = Result()
        result1.reasoner_id = "RTX"
        result1.description = description
        result1.confidence = confidence
        if suppress_bindings is False:
            result1.node_bindings = node_bindings
            result1.edge_bindings = edge_bindings

        # Create a KnowledgeGraph object and put the list of nodes and edges into it
        #### This is still legal, then is redundant with the knowledge map, so leave it out maybe
        knowledge_graph = KnowledgeGraph()
        knowledge_graph.nodes = node_objects
        knowledge_graph.edges = edge_objects
        if suppress_bindings is True:
            result1.result_graph = knowledge_graph

        # Put the first result (potential answer) into the message
        self._results.append(result1)
        self.message.results = self._results

        # Increment the number of results
        self._num_results += 1
        if self._num_results == 1:
            self.message.code_description = "%s result found" % self._num_results
        else:
            self.message.code_description = "%s results found" % self._num_results

        #### Finish and return the result if requested
        if return_result:
            return result1
        else:
            pass
예제 #3
0
    def answer(self, entity, use_json=False):
        """
		Answer a question of the type "What is X" but is general:
		:param entity: KG neo4j node name (eg "carbetocin")
		:param use_json: If the answer should be in Translator standardized API output format
		:return: a description and type of the node
		"""

        #### See if this entity is in the KG via the index
        eprint("Looking up '%s' in KgNodeIndex" % entity)
        kgNodeIndex = KGNodeIndex()
        curies = kgNodeIndex.get_curies(entity)

        #### If not in the KG, then return no information
        if not curies:
            if not use_json:
                return None
            else:
                error_code = "TermNotFound"
                error_message = "This concept is not in our knowledge graph"
                response = FormatOutput.FormatResponse(0)
                response.add_error_message(error_code, error_message)
                return response.message

        # Get label/kind of node the source is
        eprint("Getting properties for '%s'" % curies[0])
        properties = RU.get_node_properties(curies[0])
        eprint("Properties are:")
        eprint(properties)

        #### By default, return the results just as a plain simple list of data structures
        if not use_json:
            return properties

        #### Or, if requested, format the output as the standardized API output format
        else:
            #### Create a stub Message object
            response = FormatOutput.FormatResponse(0)
            response.message.table_column_names = [
                "id", "type", "name", "description", "uri"
            ]
            response.message.code_description = None

            #### Create a Node object and fill it
            node1 = Node()
            node1.id = properties["id"]
            node1.uri = properties["uri"]
            node1.type = [properties["category"]]
            node1.name = properties["name"]
            node1.description = properties["description"]

            #### Create the first result (potential answer)
            result1 = Result()
            result1.id = "http://arax.ncats.io/api/v1/result/0000"
            result1.description = "The term %s is in our knowledge graph and is defined as %s" % (
                properties["name"], properties["description"])
            result1.confidence = 1.0
            result1.essence = properties["name"]
            result1.essence_type = properties["category"]
            node_types = ",".join(node1.type)
            result1.row_data = [
                node1.id, node_types, node1.name, node1.description, node1.uri
            ]

            #### Create a KnowledgeGraph object and put the list of nodes and edges into it
            result_graph = KnowledgeGraph()
            result_graph.nodes = [node1]
            result_graph.edges = []

            #### Put the ResultGraph into the first result (potential answer)
            result1.result_graph = result_graph

            #### Put the first result (potential answer) into the message
            results = [result1]
            response.message.results = results

            #### Also put the union of all result_graph components into the top Message KnowledgeGraph
            #### Normally the knowledge_graph will be much more complex than this, but take a shortcut for this single-node result
            response.message.knowledge_graph = result_graph

            #### Also manufacture a query_graph post hoc
            qnode1 = QNode()
            qnode1.id = "n00"
            qnode1.curie = properties["id"]
            qnode1.type = None
            query_graph = QueryGraph()
            query_graph.nodes = [qnode1]
            query_graph.edges = []
            response.message.query_graph = query_graph

            #### Create the corresponding knowledge_map
            node_binding = NodeBinding(qg_id="n00", kg_id=properties["id"])
            result1.node_bindings = [node_binding]
            result1.edge_bindings = []

            #eprint(response.message)
            return response.message