def answer(source_node_ID,
               target_node_type,
               association_node_type,
               use_json=False,
               threshold=0.2,
               n=20):
        """
		Answers the question what X are similar to Y based on overlap of common Z nodes. X is target_node_type,
		Y is source_node_ID, Z is association_node_type. The relationships are automatically determined in
		SimilarNodesInCommon by looking for 1 hop relationships and poping the FIRST one (you are warned).
		:param source_node_ID: actual name in the KG
		:param target_node_type: kinds of nodes you want returned
		:param association_node_type: kind of node you are computing the Jaccard overlap on
		:param use_json: print the results in standardized format
		:param threshold: only return results where jaccard is >= this threshold
		:param n: number of results to return (default 20)
		:return: reponse (or printed text)
		"""

        # Initialize the response class
        response = FormatOutput.FormatResponse(5)
        # add the column names for the row data
        response.message.table_column_names = [
            "source name", "source ID", "target name", "target ID",
            "Jaccard index"
        ]

        # Initialize the similar nodes class
        similar_nodes_in_common = SimilarNodesInCommon.SimilarNodesInCommon()

        # get the description
        source_node_description = RU.get_node_property(source_node_ID, 'name')

        # get the source node label
        source_node_label = RU.get_node_property(source_node_ID, 'label')

        # Get the nodes in common
        node_jaccard_tuples_sorted, error_code, error_message = similar_nodes_in_common.get_similar_nodes_in_common_source_target_association(
            source_node_ID, target_node_type, association_node_type, threshold)

        # reduce to top 100
        if len(node_jaccard_tuples_sorted) > n:
            node_jaccard_tuples_sorted = node_jaccard_tuples_sorted[0:n]

        # make sure that the input node isn't in the list
        node_jaccard_tuples_sorted = [
            i for i in node_jaccard_tuples_sorted if i[0] != source_node_ID
        ]

        # check for an error
        if error_code is not None or error_message is not None:
            if not use_json:
                print(error_message)
                return
            else:
                response.add_error_message(error_code, error_message)
                response.print()
                return

        #### If use_json not specified, then return results as a fairly plain list
        if not use_json:
            to_print = "The %s's involving similar %ss as %s are: \n" % (
                target_node_type, association_node_type,
                source_node_description)
            for other_disease_ID, jaccard in node_jaccard_tuples_sorted:
                to_print += "%s\t%s\tJaccard %f\n" % (
                    other_disease_ID,
                    RU.get_node_property(other_disease_ID, 'name'), jaccard)
            print(to_print)

        #### Else if use_json requested, return the results in the Translator standard API JSON format
        else:

            #### Create the QueryGraph for this type of question
            query_graph = QueryGraph()
            source_node = QNode()
            source_node.id = "n00"
            source_node.curie = source_node_ID
            source_node.type = source_node_label
            association_node = QNode()
            association_node.id = "n01"
            association_node.type = association_node_type
            association_node.is_set = True
            target_node = QNode()
            target_node.id = "n02"
            target_node.type = target_node_type
            query_graph.nodes = [source_node, association_node, target_node]

            #source_association_relationship_type = "unknown1"
            edge1 = QEdge()
            edge1.id = "en00-n01"
            edge1.source_id = "n00"
            edge1.target_id = "n01"
            #edge1.type = source_association_relationship_type

            #association_target_relationship_type = "unknown2"
            edge2 = QEdge()
            edge2.id = "en01-n02"
            edge2.source_id = "n01"
            edge2.target_id = "n02"
            #edge2.type = association_target_relationship_type

            query_graph.edges = [edge1, edge2]

            #### DONT Suppress the query_graph because we can now do the knowledge_map with v0.9.1
            response.message.query_graph = query_graph

            #### Create a mapping dict with the source curie and node types and edge types. This dict is used for reverse lookups by type
            #### for mapping to the QueryGraph. There is a potential point of failure here if there are duplicate node or edge types. FIXME
            response._type_map = dict()
            response._type_map[source_node.curie] = source_node.id
            response._type_map[association_node.type] = association_node.id
            response._type_map[target_node.type] = target_node.id
            response._type_map["e" + edge1.source_id + "-" +
                               edge1.target_id] = edge1.id
            response._type_map["e" + edge2.source_id + "-" +
                               edge2.target_id] = edge2.id

            #### Extract the sorted IDs from the list of tuples
            node_jaccard_ID_sorted = [
                id for id, jac in node_jaccard_tuples_sorted
            ]

            # print(RU.return_subgraph_through_node_labels(source_node_ID, source_node_label, node_jaccard_ID_sorted, target_node_type,
            #										[association_node_type], with_rel=[], directed=True, debug=True))

            # get the entire subgraph
            g = RU.return_subgraph_through_node_labels(source_node_ID,
                                                       source_node_label,
                                                       node_jaccard_ID_sorted,
                                                       target_node_type,
                                                       [association_node_type],
                                                       with_rel=[],
                                                       directed=False,
                                                       debug=False)

            # extract the source_node_number
            for node, data in g.nodes(data=True):
                if data['properties']['id'] == source_node_ID:
                    source_node_number = node
                    break

            # Get all the target numbers
            target_id2numbers = dict()
            node_jaccard_ID_sorted_set = set(node_jaccard_ID_sorted)
            for node, data in g.nodes(data=True):
                if data['properties']['id'] in node_jaccard_ID_sorted_set:
                    target_id2numbers[data['properties']['id']] = node

            for other_disease_ID, jaccard in node_jaccard_tuples_sorted:
                target_name = RU.get_node_property(other_disease_ID, 'name')
                to_print = "The %s %s involves similar %ss as %s with similarity value %f" % (
                    target_node_type, target_name, association_node_type,
                    source_node_description, jaccard)

                # get all the shortest paths between source and target
                all_paths = nx.all_shortest_paths(
                    g, source_node_number, target_id2numbers[other_disease_ID])

                # get all the nodes on these paths
                #try:
                if 1 == 1:
                    rel_nodes = set()
                    for path in all_paths:
                        for node in path:
                            rel_nodes.add(node)

                    if rel_nodes:
                        # extract the relevant subgraph
                        sub_g = nx.subgraph(g, rel_nodes)

                        # add it to the response
                        res = response.add_subgraph(sub_g.nodes(data=True),
                                                    sub_g.edges(data=True),
                                                    to_print,
                                                    jaccard,
                                                    return_result=True)
                        res.essence = "%s" % target_name  # populate with essence of question result
                        res.essence_type = target_node_type
                        row_data = []  # initialize the row data
                        row_data.append("%s" % source_node_description)
                        row_data.append("%s" % source_node_ID)
                        row_data.append("%s" % target_name)
                        row_data.append("%s" % other_disease_ID)
                        row_data.append("%f" % jaccard)
                        res.row_data = row_data


#				except:
#					pass
            response.print()
Example #2
0
    def add_qedge(self, message, input_parameters, describe=False):
        """
        Adds a new QEdge object to the QueryGraph inside the Message object
        :return: Response object with execution information
        :rtype: Response
        """

        # #### Internal documentation setup
        allowable_parameters = {
            'id': {
                'Any string that is unique among all QEdge id fields, with recommended format e00, e01, e02, etc.'
            },
            'source_id': {
                'id of the source QNode already present in the QueryGraph (e.g. n01, n02)'
            },
            'target_id': {
                'id of the target QNode already present in the QueryGraph (e.g. n01, n02)'
            },
            'type': {
                'Any valid Translator/BioLink relationship type (e.g. physically_interacts_with, participates_in)'
            },
        }
        if describe:
            #allowable_parameters['action'] = { 'None' }
            #allowable_parameters = dict()
            allowable_parameters[
                'dsl_command'] = '`add_qedge()`'  # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/
            allowable_parameters[
                'brief_description'] = """The `add_qedge` method adds an additional QEdge to the QueryGraph in the Message object. Currently
                source_id and target_id QNodes must already be present in the QueryGraph. The specified type is not currently checked that it is a
                valid Translator/BioLink relationship type, but it should be."""
            return allowable_parameters

        #### Define a default response
        response = Response()
        self.response = response
        self.message = message

        #### Basic checks on arguments
        if not isinstance(input_parameters, dict):
            response.error("Provided parameters is not a dict",
                           error_code="ParametersNotDict")
            return response

        #### Define a complete set of allowed parameters and their defaults
        parameters = {
            'id': None,
            'source_id': None,
            'target_id': None,
            'type': None,
        }

        #### Loop through the input_parameters and override the defaults and make sure they are allowed
        for key, value in input_parameters.items():
            if key not in parameters:
                response.error(f"Supplied parameter {key} is not permitted",
                               error_code="UnknownParameter")
            else:
                parameters[key] = value
        #### Return if any of the parameters generated an error (showing not just the first one)
        if response.status != 'OK':
            return response

        #### Store these final parameters for convenience
        response.data['parameters'] = parameters
        self.parameters = parameters

        #### Now apply the filters. Order of operations is probably quite important
        #### Scalar value filters probably come first like minimum_confidence, then complex logic filters
        #### based on edge or node properties, and then finally maximum_results
        response.info(
            f"Adding a QueryEdge to Message with parameters {parameters}")

        #### Make sure there's a query_graph already here
        if message.query_graph is None:
            message.query_graph = QueryGraph()
            message.query_graph.nodes = []
            message.query_graph.edges = []
        if message.query_graph.edges is None:
            message.query_graph.edges = []

        #### Create a QEdge
        qedge = QEdge()
        if parameters['id'] is not None:
            id = parameters['id']
        else:
            id = self.__get_next_free_edge_id()
        qedge.id = id

        #### Get the list of available node_ids
        qnodes = message.query_graph.nodes
        ids = {}
        for qnode in qnodes:
            id = qnode.id
            ids[id] = 1

        #### Add the source_id
        if parameters['source_id'] is not None:
            if parameters['source_id'] not in ids:
                response.error(
                    f"While trying to add QEdge, there is no QNode with id {parameters['source_id']}",
                    error_code="UnknownSourceId")
                return response
            qedge.source_id = parameters['source_id']
        else:
            response.error(
                f"While trying to add QEdge, source_id is a required parameter",
                error_code="MissingSourceId")
            return response

        #### Add the target_id
        if parameters['target_id'] is not None:
            if parameters['target_id'] not in ids:
                response.error(
                    f"While trying to add QEdge, there is no QNode with id {parameters['target_id']}",
                    error_code="UnknownTargetId")
                return response
            qedge.target_id = parameters['target_id']
        else:
            response.error(
                f"While trying to add QEdge, target_id is a required parameter",
                error_code="MissingTargetId")
            return response

        #### Add the type if any. Need to verify it's an allowed type. FIXME
        if parameters['type'] is not None:
            qedge.type = parameters['type']

        #### Add it to the query_graph edge list
        message.query_graph.edges.append(qedge)

        #### Return the response
        return response
Example #3
0
    def answer(self,
               source_name,
               target_label,
               relationship_type,
               use_json=False,
               directed=False):
        """
		Answer a question of the type "What proteins does drug X target" but is general:
		 what <node X type> does <node Y grounded> <relatioship Z> that can be answered in one hop in the KG (increasing the step size if necessary).
		:param query_terms: a triple consisting of a source node name (KG neo4j node name, the target label (KG neo4j
		"node label") and the relationship type (KG neo4j "Relationship type")
		:param source_name: KG neo4j node name (eg "carbetocin")
		:param target_label: KG node label (eg. "protein")
		:param relationship_type: KG relationship type (eg. "physically_interacts_with")
		:param use_json: If the answer should be in Eric's Json standardized API output format
		:return: list of dictionaries containing the nodes that are one hop (along relationship type) that connect source to target.
		"""
        # Get label/kind of node the source is
        source_label = RU.get_node_property(source_name, "label")

        # Get the subgraph (all targets along relationship)
        has_intermediate_node = False
        try:
            g = RU.return_subgraph_paths_of_type(source_name,
                                                 source_label,
                                                 None,
                                                 target_label,
                                                 [relationship_type],
                                                 directed=directed)
        except CustomExceptions.EmptyCypherError:
            try:
                has_intermediate_node = True
                g = RU.return_subgraph_paths_of_type(
                    source_name,
                    source_label,
                    None,
                    target_label, ['subclass_of', relationship_type],
                    directed=directed)
            except CustomExceptions.EmptyCypherError:
                error_message = "No path between %s and %s via relationship %s" % (
                    source_name, target_label, relationship_type)
                error_code = "NoPathsFound"
                response = FormatOutput.FormatResponse(3)
                response.add_error_message(error_code, error_message)
                return response

        # extract the source_node_number
        for node, data in g.nodes(data=True):
            if data['properties']['id'] == source_name:
                source_node_number = node
                break

        # Get all the target numbers
        target_numbers = []
        for node, data in g.nodes(data=True):
            if data['properties']['id'] != source_name:
                target_numbers.append(node)

        # if there's an intermediate node, get the name
        if has_intermediate_node:
            neighbors = list(g.neighbors(source_node_number))
            if len(neighbors) > 1:
                error_message = "More than one intermediate node"
                error_code = "AmbiguousPath"
                response = FormatOutput.FormatResponse(3)
                response.add_error_message(error_code, error_message)
                return response
            else:
                intermediate_node = neighbors.pop()

        #### If use_json not specified, then return results as a fairly plain list
        if not use_json:
            results_list = list()
            for target_number in target_numbers:
                data = g.nodes[target_number]
                results_list.append({
                    'type':
                    list(set(data['labels']) - {'Base'}).pop(),
                    'name':
                    data['properties']['name'],
                    'desc':
                    data['properties']['name'],
                    'prob':
                    1
                })  # All these are known to be true
            return results_list

        #### Else if use_json requested, return the results in the Translator standard API JSON format
        else:
            response = FormatOutput.FormatResponse(3)  # it's a Q3 question
            response.message.table_column_names = [
                "source name", "source ID", "target name", "target ID"
            ]
            source_description = g.nodes[source_node_number]['properties'][
                'name']

            #### Create the QueryGraph for this type of question
            query_graph = QueryGraph()
            source_node = QNode()
            source_node.id = "n00"
            source_node.curie = g.nodes[source_node_number]['properties']['id']
            source_node.type = g.nodes[source_node_number]['properties'][
                'category']
            target_node = QNode()
            target_node.id = "n01"
            target_node.type = target_label
            query_graph.nodes = [source_node, target_node]
            edge1 = QEdge()
            edge1.id = "e00"
            edge1.source_id = "n00"
            edge1.target_id = "n01"
            edge1.type = relationship_type
            query_graph.edges = [edge1]
            response.message.query_graph = query_graph

            #### Create a mapping dict with the source curie and the target type. This dict is used for reverse lookups by type
            #### for mapping to the QueryGraph.
            response._type_map = dict()
            response._type_map[source_node.curie] = source_node.id
            response._type_map[target_node.type] = target_node.id
            response._type_map[edge1.type] = edge1.id

            #### Loop over all the returned targets and put them into the response structure
            for target_number in target_numbers:
                target_description = g.nodes[target_number]['properties'][
                    'name']
                if not has_intermediate_node:
                    subgraph = g.subgraph([source_node_number, target_number])
                else:
                    subgraph = g.subgraph(
                        [source_node_number, intermediate_node, target_number])
                res = response.add_subgraph(
                    subgraph.nodes(data=True),
                    subgraph.edges(data=True),
                    "%s and %s are connected by the relationship %s" %
                    (source_description, target_description,
                     relationship_type),
                    1,
                    return_result=True)
                res.essence = "%s" % target_description  # populate with essence of question result
                res.essence_type = g.nodes[target_number]['properties'][
                    'category']  # populate with the type of the essence of question result
                row_data = []  # initialize the row data
                row_data.append("%s" % source_description)
                row_data.append(
                    "%s" % g.nodes[source_node_number]['properties']['id'])
                row_data.append("%s" % target_description)
                row_data.append("%s" %
                                g.nodes[target_number]['properties']['id'])
                res.row_data = row_data
            return response