Beispiel #1
0
class TestJointReasoner(unittest.TestCase):
    def setUp(self):
        self.bkb_handler = BkbDataHandler()
        self.joint_reasoner = ChpJointReasoner(self.bkb_handler)

    def test_joint_reasoner_one_gene(self):
        # Specify evidence
        evidence = {'ENSEMBL:ENSG00000155657': 'True'}
        # Specify targets
        dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}}
        # Setup query
        query = Query(evidence=evidence, dynamic_targets=dynamic_targets)
        query = self.joint_reasoner.run_query(query)

    def test_joint_reasoner_one_gene_one_drug(self):
        # Specify evidence
        evidence = {
            'ENSEMBL:ENSG00000155657': 'True',
            'CHEMBL:CHEMBL83': 'True',
        }
        # Specify targets
        dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}}
        # Setup query
        query = Query(evidence=evidence, dynamic_targets=dynamic_targets)
        query = self.joint_reasoner.run_query(query)
class ChpBrainApiConfig(AppConfig):
    logger.warning('Running CHP Brain API Configuration. May take a minute.')
    name = 'chp_core_brain'

    # Used for distrbuted reasoning
    # Get Hosts File if it exists
    #parent_dir = os.path.dirname(os.path.realpath(__file__))
    #HOSTS_FILENAME = os.path.join(parent_dir, 'hosts')
    #NUM_PROCESSES_PER_HOST = multiprocessing.cpu_count()
    #if not os.path.exists(HOSTS_FILENAME):
    hosts_filename = None
    num_processes_per_host = 0

    # Instantiate BKB handler
    bkb_handler = BkbDataHandler(disease='tcga_gbm',
                                 bkb_major_version='darwin',
                                 bkb_minor_version='2.0')

    logger.info('Instantiating reasoners.')
    # Instantiate Reasoners
    dynamic_reasoner = ChpDynamicReasoner(
        bkb_handler=bkb_handler,
        hosts_filename=hosts_filename,
        num_processes_per_host=num_processes_per_host)
    joint_reasoner = ChpJointReasoner(
        bkb_handler=bkb_handler,
        hosts_filename=hosts_filename,
        num_processes_per_host=num_processes_per_host)
    def _setup_handler(self):
        # Only do the rest of this if a message is passed
        if self.messages is not None:
            # Setup messages
            self._setup_messages()

            # Instiatate Reasoners
            if 'default' in self.message_dict:
                if self.dynamic_reasoner is None:
                    self.dynamic_reasoner = ChpDynamicReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)
            if 'simple' in self.message_dict:
                if self.joint_reasoner is None:
                    self.joint_reasoner = ChpJointReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)
 def setUpClass(cls):
     super(TestOneHopHandler, cls).setUpClass()
     # load in sample query graphs
     with open('query_samples/onehop/standard_queries.json', 'r') as f_:
         cls.standard_queries = json.load(f_)
     with open('query_samples/onehop/wildcard_queries.json', 'r') as f_:
         cls.wildcard_queries = json.load(f_)
     cls.bkb_handler = BkbDataHandler()
     cls.dynamic_reasoner = ChpDynamicReasoner(cls.bkb_handler)
     cls.joint_reasoner = ChpJointReasoner(cls.bkb_handler)
    def _setup_handler(self):
        self.default_survival_target = {
            "EFO:0000714": {
                "op": '>=',
                "value": 970
            }
        }

        # Only do the rest of this if a query is passed
        if self.queries is not None:
            # Setup queries
            self._setup_messages()

            # Instiatate Reasoners
            if self.dynamic_reasoner is None:
                self.dynamic_reasoner = ChpDynamicReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)
            if self.joint_reasoner is None:
                self.joint_reasoner = ChpJointReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)
class DefaultHandlerMixin:
    def _setup_handler(self):
        # Only do the rest of this if a message is passed
        if self.messages is not None:
            # Setup messages
            self._setup_messages()

            # Instiatate Reasoners
            if 'default' in self.message_dict:
                if self.dynamic_reasoner is None:
                    self.dynamic_reasoner = ChpDynamicReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)
            if 'simple' in self.message_dict:
                if self.joint_reasoner is None:
                    self.joint_reasoner = ChpJointReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)

    def _setup_messages(self):
        self.message_dict = defaultdict(list)
        for message in self.messages:
            if self._is_simple_message(message):
                self.message_dict['simple'].append(message)
            else:
                self.message_dict['default'].append(message)

    def _is_simple_message(self, message):
        """ Check if this is a {0 or 1} drug, {0 or 1} gene, one outcome standard message.
        """
        _found_outcome = False
        _found_disease = False
        _found_gene = False
        _found_drug = False
        query_graph = message.query_graph
        for node_key, node in query_graph.nodes.items():
            if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY:
                # If we've already found the target and there's another phenotypic feature, then this isn't simple.
                if _found_outcome:
                    return False
                else:
                    _found_outcome = True
            if node.categories[0] == BIOLINK_DISEASE_ENTITY:
                # If we've already found disease and there's another disease, then this isn't simple.
                if _found_disease:
                    return False
                else:
                    _found_disease = True
            if node.categories[0] == BIOLINK_GENE_ENTITY:
                if _found_gene:
                    return False
                else:
                    _found_gene = True
            if node.categories[0] == BIOLINK_DRUG_ENTITY:
                if _found_drug:
                    return False
                else:
                    _found_drug = True
        return True

    def _extract_chp_query(self, message, message_type=None):
        # Initialize Chp Query
        chp_query = ChpQuery(reasoning_type='updating')
        # Ensure we are using all nodes/edges
        total_nodes = 0
        total_edges = 0

        query_graph = message.query_graph

        # get phenotype node
        targets = list()
        for node_key in query_graph.nodes.keys():
            node = query_graph.nodes[node_key]
            if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY:
                target_id = node_key
                total_nodes += 1

        survival_value = 970
        survival_operator = '>='

        # get disease node info and ensure only 1 disease:
        for node_key in query_graph.nodes.keys():
            node = query_graph.nodes[node_key]
            if node.categories[0] == BIOLINK_DISEASE_ENTITY:
                disease_id = node_key
                for edge_key in query_graph.edges.keys():
                    edge = query_graph.edges[edge_key]
                    if self.check_predicate_support(
                            edge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY
                    ) and edge.subject == disease_id and edge.object == target_id:
                        survival_time_constraint = edge.find_constraint(
                            name='survival_time')
                        if survival_time_constraint is not None:
                            survival_value = survival_time_constraint.value
                            survival_operator = survival_time_constraint.operator
                            if survival_operator == 'matches':
                                survival_operator = '=='
                        total_edges += 1
                total_nodes += 1
        # set BKB target
        chp_query.add_dynamic_target(node.ids[0], survival_operator,
                                     survival_value)
        truth_target = (node.ids[0], '{} {}'.format(survival_operator,
                                                    survival_value))

        # get evidence
        for node_key in query_graph.nodes.keys():
            # genes
            node = query_graph.nodes[node_key]
            if node.categories[0] == BIOLINK_GENE_ENTITY:
                # check for appropriate gene node structure
                gene_id = node_key
                for edge_key in query_graph.edges.keys():
                    edge = query_graph.edges[edge_key]
                    if self.check_predicate_support(
                            edge.predicates[0],
                            BIOLINK_GENE_ASSOCIATED_WITH_CONDITION_ENTITY
                    ) and edge.subject == gene_id and edge.object == disease_id:
                        total_edges += 1
                # check for appropriate gene node curie
                gene_curie = node.ids[0]
                gene = gene_curie
                chp_query.add_meta_evidence(gene, 'True')
                total_nodes += 1
            # drugs
            if node.categories[0] == BIOLINK_DRUG_ENTITY:
                # check for appropriate drug node structure
                drug_id = node_key
                for edge_key in query_graph.edges.keys():
                    edge = query_graph.edges[edge_key]
                    if self.check_predicate_support(
                            edge.predicates[0], BIOLINK_TREATS_ENTITY
                    ) and edge.subject == drug_id and edge.object == disease_id:
                        total_edges += 1
                # check for appropriate drug node curie
                drug_curie = node.ids[0]
                drug = drug_curie
                chp_query.add_dynamic_evidence(node.ids[0], '==', 'True')
                total_nodes += 1

        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        return chp_query

    def _run_query(self, chp_query, query_type):
        if query_type == 'simple':
            chp_query = self.joint_reasoner.run_query(chp_query)
            # If a probability was found for the target
            if len(chp_query.result) > 0:
                # If a probability was found for the truth target
                if chp_query.truth_target in chp_query.result:
                    total_unnormalized_prob = 0
                    for target, contrib in chp_query.result.items():
                        prob = max(0, contrib)
                        total_unnormalized_prob += prob
                    chp_query.truth_prob = max([
                        0, chp_query.result[(chp_query.truth_target)]
                    ]) / total_unnormalized_prob
                else:
                    chp_query.truth_prob = 0
            else:
                chp_query.truth_prob = -1
            chp_query.report = None
        else:
            chp_query = self.dynamic_reasoner.run_query(chp_query)
            chp_res_dict = chp_query.result.process_updates(normalize=True)
            try:
                chp_query.truth_prob = max([
                    0, chp_res_dict[chp_query.truth_target[0]][
                        chp_query.truth_target[1]]
                ])
            except KeyError:
                # May need to come back and fix this.
                chp_query.truth_prob = -1

            chp_query.report = None
        return chp_query

    def _construct_trapi_message(self, chp_query, message, query_type=None):

        # update target node info and form edge pair combos for results graph

        qg = message.query_graph
        kg = message.knowledge_graph
        node_bindings = {}
        for qnode_key, qnode in qg.nodes.items():
            if qnode.categories[0] == BIOLINK_GENE_ENTITY:
                knode_key = kg.add_node(
                    qnode.ids[0],
                    self.curies[BIOLINK_GENE_ENTITY.get_curie()][qnode.ids[0]]
                    [0],
                    qnode.categories[0].get_curie(),
                )
            elif qnode.categories[0] == BIOLINK_DRUG_ENTITY:
                knode_key = kg.add_node(
                    qnode.ids[0],
                    self.curies[BIOLINK_DRUG_ENTITY.get_curie()][qnode.ids[0]]
                    [0],
                    qnode.categories[0].get_curie(),
                )
            else:
                knode_key = kg.add_node(
                    qnode.ids[0],
                    qnode.ids[0],
                    qnode.categories[0].get_curie(),
                )
            node_bindings[qnode_key] = [knode_key]

        edge_bindings = {}
        for qedge_key, qedge in qg.edges.items():
            kedge_key = kg.add_edge(
                node_bindings[qedge.subject][0],
                node_bindings[qedge.object][0],
                predicate=qedge.predicates[0].get_curie(),
                relation=qedge.relation,
            )
            edge_bindings[qedge_key] = [kedge_key]
            # Add Attribute
            if self.check_predicate_support(qedge.predicates[0],
                                            BIOLINK_HAS_PHENOTYPE_ENTITY):
                kg.edges[kedge_key].add_attribute(
                    attribute_type_id='Probability of Survival',
                    value=chp_query.truth_prob,
                    value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY.
                    get_curie(),
                )
        # Proces results
        message.results.add_result(
            node_bindings,
            edge_bindings,
        )
        return message
class OneHopHandlerMixin:
    """ OneHopeHandler is the handler for 1-hop queries. That is
        query graphs (QGs) that consists of 2 nodes and a single edge.

        :param query: the query graph sent by the ARA.
        :type query: dict
        :param hosts_filename: a filename for a stored QG. Defaults to None
        :type hosts_filename: str
        :param num_processes_per_host: Not implemented thouroughly, but would be
            used for distributed reasoning.
        :type num_processes_per_host: int
        :param max_results: specific to 1-hop queries, specifies the number of
            wildcard genes to return.
        :type max_results: int
    """
    def _setup_handler(self):
        self.default_survival_target = {
            "EFO:0000714": {
                "op": '>=',
                "value": 970
            }
        }

        # Only do the rest of this if a query is passed
        if self.queries is not None:
            # Setup queries
            self._setup_messages()

            # Instiatate Reasoners
            if self.dynamic_reasoner is None:
                self.dynamic_reasoner = ChpDynamicReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)
            if self.joint_reasoner is None:
                self.joint_reasoner = ChpJointReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)

    def _setup_messages(self):
        self.queries_dict = defaultdict(list)
        for query in self.queries:
            self.queries_dict[self._get_onehop_type(
                query.message)].append(query)

    def _get_onehop_type(self, message):
        wildcard_type = None
        node_types = []
        all_node_categories = []
        for node_id, node in message.query_graph.nodes.items():
            if node.ids is None:
                if wildcard_type is None:
                    wildcard_type = node.categories[0]
                node_types.append(node.categories[0])
            all_node_categories.append(node.categories[0])

        # implicit 2-hop-queries
        if all(category == BIOLINK_GENE_ENTITY
               for category in all_node_categories):
            return 'gene_two_hop'
        elif all(category == BIOLINK_DRUG_ENTITY
                 for category in all_node_categories):
            return 'drug_two_hop'

        # If standard onehop query
        if wildcard_type is None:
            return 'standard'
        elif wildcard_type == BIOLINK_DRUG_ENTITY:
            return 'drug'
        elif wildcard_type == BIOLINK_GENE_ENTITY:
            return 'gene'
        else:
            raise ValueError(
                'Did not understand wildcard type {}.'.format(wildcard_type))

    def check_query(self):
        """ Currently not implemented. Would check validity of query.
        """
        return True

    @staticmethod
    def _process_predicate_proxy(qedge, chp_query):
        predicate_proxy_constraint = qedge.find_constraint('predicate_proxy')
        if predicate_proxy_constraint is None:
            predicate_proxy = get_default_predicate_proxy()
            proxy_constraint = qedge.find_constraint(predicate_proxy)
        else:
            predicate_proxy = predicate_proxy_constraint.value[0]
            proxy_constraint = qedge.find_constraint(predicate_proxy)
        if proxy_constraint is None:
            proxy_operator = get_default_operator(predicate_proxy)
            proxy_value = get_default_value(predicate_proxy)
        else:
            proxy_operator = proxy_constraint.operator
            proxy_value = proxy_constraint.value
        # Setup dynamic target
        chp_query.add_dynamic_target(predicate_proxy, proxy_operator,
                                     proxy_value)
        return chp_query

    @staticmethod
    def _process_predicate_context(qedge, message_type, chp_query):
        evidence = {}
        dynamic_evidence = {}
        predicate_context_constraint = qedge.find_constraint(
            'predicate_context')

        if predicate_context_constraint is not None:
            for context in predicate_context_constraint.value:
                context_curie = get_biolink_entity(context)
                context_constraint = qedge.find_constraint(context)
                # used 2 hop structure where context curie is the proxy
                if context_constraint is None:
                    continue
                if context_curie == BIOLINK_GENE_ENTITY:
                    if message_type == 'gene' or message_type == 'drug_two_hop':
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                chp_query.add_dynamic_evidence(
                                    _curie, '==', 'True')
                        else:
                            chp_query.add_dynamic_evidence(
                                context_constraint.value, '==', 'True')
                    else:
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                chp_query.add_meta_evidence(_curie, 'True')
                        else:
                            chp.add_meta_evidence(_curie, 'True')
                elif context_curie == BIOLINK_DRUG_ENTITY:
                    if message_type == 'drug' or message_type == 'gene_two_hop':
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                chp_query.add_dynamic_evidence(
                                    _curie, '==', 'True')
                        else:
                            chp_query.add_dynamic_evidence(
                                context_constraint.value, '==', 'True')
                    else:
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                chp_query.add_meta_evidence(_curie, 'True')
                        else:
                            chp_query.add_meta_evidence(_curie, 'True')
                else:
                    raise ValueError(
                        'Unsupported context type: {}'.format(context_curie))
        return chp_query

    def _extract_chp_query(self, query, message_type):
        # Extract Message
        message = query.message

        # Initialize CHP BKB Query
        chp_query = ChpQuery(reasoning_type='updating')

        # Grab edge
        for qedge_id, qedge in message.query_graph.edges.items():
            break
        # Process predicate proxy
        chp_query = self._process_predicate_proxy(qedge, chp_query)
        # Process predicate context
        chp_query = self._process_predicate_context(qedge, message_type,
                                                    chp_query)
        #TODO: Probably need a more robust solution for when no context is provided in wildcard queries and you need it.
        #if len(evidence) == 0:
        #    raise ValueError('Did not supply context with a query that required context.')

        if message_type == 'standard':
            # Setup gene and drug evidence
            for qnode_id, qnode in message.query_graph.nodes.items():
                if qnode.categories[
                        0] == BIOLINK_GENE_ENTITY or qnode.categories[
                            0] == BIOLINK_DRUG_ENTITY:
                    chp_query.add_meta_evidence(qnode.ids[0], 'True')
        elif message_type == 'gene' or message_type == 'drug_two_hop':
            for qnode_id, qnode in message.query_graph.nodes.items():
                if qnode.categories[0] == BIOLINK_DRUG_ENTITY:
                    if qnode.ids is not None:
                        chp_query.add_meta_evidence(qnode.ids[0], 'True')
        elif message_type == 'drug' or message_type == 'gene_two_hop':
            for qnode_id, qnode in message.query_graph.nodes.items():
                if qnode.categories[0] == BIOLINK_GENE_ENTITY:
                    if qnode.ids is not None:
                        chp_query.add_meta_evidence(qnode.ids[0], 'True')

        target = list(chp_query.dynamic_targets.keys())[0]
        truth_target = (target, '{} {}'.format(
            chp_query.dynamic_targets[target]["op"],
            chp_query.dynamic_targets[target]["value"]))
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        return chp_query

    def _run_query(self, chp_query, query_type):
        """ Runs build BKB query to calculate probability of survival.
            A probability is returned to specificy survival time w.r.t a drug.
            Contributions for each gene are calculuated and classified under
            their true/false target assignments.
        """
        if query_type == 'standard':
            chp_query = self.joint_reasoner.run_query(
                chp_query, interpolation_type='gene')
            # If a probability was found for the target
            if len(chp_query.result) > 0:
                # If a probability was found for the truth target
                if chp_query.truth_target in chp_query.result:
                    total_unnormalized_prob = 0
                    for target, contrib in chp_query.result.items():
                        prob = max(0, contrib)
                        total_unnormalized_prob += prob
                    chp_query.truth_prob = max([
                        0, chp_query.result[(chp_query.truth_target)]
                    ]) / total_unnormalized_prob
                else:
                    chp_query.truth_prob = 0
            else:
                chp_query.truth_prob = -1
            chp_query.report = None
            return chp_query
        else:
            # Do this if a disease node is present
            if query_type == 'gene' or query_type == 'drug_two_hop':
                chp_query = self.joint_reasoner.run_query(
                    chp_query,
                    interpolation_type='drug',
                    contribution_type='gene')
            elif query_type == 'drug' or query_type == 'gene_two_hop':
                chp_query = self.joint_reasoner.run_query(
                    chp_query,
                    interpolation_type='gene',
                    contribution_type='drug')

            chp_res_dict = chp_query.result
            if chp_query.truth_target in chp_res_dict:
                unnormalized_truth_prob = chp_res_dict[chp_query.truth_target]
            else:
                unnormalized_truth_prob = 0

            normalize = 0
            for target, prob in chp_res_dict.items():
                normalize += prob
            unnormalized_false_prob = normalize - unnormalized_truth_prob

            for target in chp_res_dict.keys():
                chp_res_dict[target] /= normalize

            if chp_query.truth_target in chp_res_dict:
                chp_query.truth_prob = chp_res_dict[chp_query.truth_target]
            else:
                chp_query.truth_target = 0

        # organize the contributions over curie then target
        wildcard_contributions = defaultdict(lambda: defaultdict(int))
        for target, curies in chp_query.contributions.items():
            for curie, contrib in curies.items():
                if curie[1] == 'True':
                    wildcard_contributions[curie[0]][target] = contrib

        #for curie in wildcard_contributions.keys():
        # normalize gene contributions by the target and take relative difference
        for curie in wildcard_contributions.keys():
            truth_target_gene_contrib = 0
            nontruth_target_gene_contrib = 0
            for target, contrib in wildcard_contributions[curie].items():
                try:
                    if target[0] == chp_query.truth_target[0] and target[
                            1] == chp_query.truth_target[1]:
                        truth_target_gene_contrib += contrib / unnormalized_truth_prob
                    else:
                        nontruth_target_gene_contrib += contrib / unnormalized_false_prob
                except ZeroDivisionError:
                    continue
            wildcard_contributions[curie][
                'relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib

        if query_type == 'drug_two_hop' or query_type == 'gene_two_hop':
            # Build relative contribution results and added associated edges into knowledge graph
            unsorted_wildcard_contributions = []
            for wildcard, contrib_dict in wildcard_contributions.items():
                unsorted_wildcard_contributions.append(
                    (contrib_dict['relative'], wildcard))
            truncated_sorted_wildcard_contributions = [
                (contrib, wildcard) for contrib, wildcard in sorted(
                    unsorted_wildcard_contributions,
                    key=lambda x: abs(x[0]),
                    reverse=True)
            ][:self.max_results]
            truncated_contribution_list = [
                curie[1] for curie in truncated_sorted_wildcard_contributions
            ]

            chp_query.contributions = None
            wildcard_contributions = defaultdict(lambda: defaultdict(int))
            for contrib in truncated_contribution_list:
                chp_query_extended = copy.deepcopy(chp_query)
                chp_query_extended.add_meta_evidence(contrib, 'True')
                if query_type == 'drug_two_hop':
                    chp_query_extended = self.joint_reasoner.run_query(
                        chp_query_extended, contribution_type='drug')
                else:
                    chp_query_extended = self.joint_reasoner.run_query(
                        chp_query_extended, contribution_type='gene')

                chp_res_dict = chp_query_extended.result
                if chp_query_extended.truth_target in chp_res_dict:
                    extended_unnormalized_truth_prob = chp_res_dict[
                        chp_query_extended.truth_target]
                else:
                    extended_unnormalized_truth_prob = 0
                normalize = 0
                for target, prob in chp_res_dict.items():
                    normalize += prob
                extended_unnormalized_false_prob = normalize - extended_unnormalized_truth_prob

                # organize the contributions over curie then target
                extended_wildcard_contributions = defaultdict(
                    lambda: defaultdict(int))
                for target, curies in chp_query_extended.contributions.items():
                    for curie, contrib in curies.items():
                        if curie[1] == 'True':
                            extended_wildcard_contributions[
                                curie[0]][target] = contrib

                # normalize gene contributions by the target and take relative difference
                for curie in extended_wildcard_contributions.keys():
                    truth_target_gene_contrib = 0
                    nontruth_target_gene_contrib = 0
                    for target, contrib in extended_wildcard_contributions[
                            curie].items():
                        try:
                            if target[0] == chp_query_extended.truth_target[
                                    0] and target[
                                        1] == chp_query_extended.truth_target[
                                            1]:
                                truth_target_gene_contrib += contrib / extended_unnormalized_truth_prob * unnormalized_truth_prob
                            else:
                                nontruth_target_gene_contrib += contrib / extended_unnormalized_false_prob * unnormalized_false_prob
                        except ZeroDivisionError:
                            continue
                    wildcard_contributions[curie]['relative'] += (
                        truth_target_gene_contrib -
                        nontruth_target_gene_contrib)

        chp_query.report = None
        chp_query.wildcard_contributions = wildcard_contributions

        return chp_query

    def _construct_trapi_message(self, chp_query, query, query_type):

        # Helpful short cuts
        message = query.message
        qg = message.query_graph
        kg = message.knowledge_graph

        edge_bindings = {}
        node_bindings = {}

        # Process nodes
        for qnode_id, qnode in qg.nodes.items():
            if qnode.ids is not None:
                if qnode.categories[0] == BIOLINK_GENE_ENTITY:
                    knode_key = kg.add_node(
                        qnode.ids[0],
                        self.curies[BIOLINK_GENE_ENTITY.get_curie()][
                            qnode.ids[0]][0],
                        qnode.categories[0].get_curie(),
                    )
                elif qnode.categories[0] == BIOLINK_DRUG_ENTITY:
                    knode_key = kg.add_node(
                        qnode.ids[0],
                        self.curies[BIOLINK_DRUG_ENTITY.get_curie()][
                            qnode.ids[0]][0],
                        qnode.categories[0].get_curie(),
                    )
                elif qnode.categories[0] == BIOLINK_DISEASE_ENTITY:
                    #TODO: Add diseases to curies and fix name hack below.
                    knode_key = kg.add_node(
                        qnode.ids[0],
                        qnode.
                        ids[0],  #TODO: Once curies is fixed, make this a name.
                        qnode.categories[0].get_curie(),
                    )
                node_bindings[qnode_id] = [knode_key]
            else:
                wildcard_node = qnode
        if query_type == 'standard':
            for qedge_key, qedge in qg.edges.items():
                kedge_key = kg.add_edge(
                    node_bindings[qedge.subject][0],
                    node_bindings[qedge.object][0],
                    predicate=qedge.predicates[0].get_curie(),
                )
                edge_bindings[qedge_key] = [kedge_key]
                # Add Attribute
                kg.edges[kedge_key].add_attribute(
                    attribute_type_id='Probability of Survival',
                    value=chp_query.truth_prob,
                    value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY.
                    get_curie(),
                )
            message.results.add_result(
                node_bindings,
                edge_bindings,
            )
        else:
            # Build relative contribution results and added associated edges into knowledge graph
            unsorted_wildcard_contributions = []
            for wildcard, contrib_dict in chp_query.wildcard_contributions.items(
            ):
                unsorted_wildcard_contributions.append(
                    (contrib_dict['relative'], wildcard))
            sorted_wildcard_contributions = [
                (contrib, wildcard) for contrib, wildcard in sorted(
                    unsorted_wildcard_contributions,
                    key=lambda x: abs(x[0]),
                    reverse=True)
            ]

            # add kg gene nodes and edges
            edge_count = 0
            node_count = 1
            results = []
            for contrib, wildcard in sorted_wildcard_contributions[:self.
                                                                   max_results]:
                _node_bindings = {}
                _edge_bindings = {}
                # Process node bindings
                bad_wildcard = False
                for qnode_id, qnode in qg.nodes.items():
                    if qnode.categories[
                            0] == BIOLINK_GENE_ENTITY and qnode.ids is None and (
                                query_type == 'gene'
                                or query_type == 'gene_two_hop'):
                        try:
                            knode_id = kg.add_node(
                                wildcard,
                                self.curies[BIOLINK_GENE_ENTITY.get_curie()]
                                [wildcard][0],
                                qnode.categories[0].get_curie(),
                            )
                            _node_bindings[qnode_id] = [knode_id]
                        except KeyError:
                            logger.info(
                                "Couldn't find {} in curies[{}]".format(
                                    wildcard, BIOLINK_GENE))
                            bad_wildcard = True
                    elif qnode.categories[
                            0] == BIOLINK_DRUG_ENTITY and qnode.ids is None and (
                                query_type == 'drug'
                                or query_type == 'drug_two_hop'):
                        knode_id = kg.add_node(
                            wildcard,
                            self.curies[BIOLINK_DRUG_ENTITY.get_curie()]
                            [wildcard][0],
                            qnode.categories[0].get_curie(),
                        )
                        _node_bindings[qnode_id] = [knode_id]
                    else:
                        _node_bindings[qnode_id] = node_bindings[qnode_id]
                if bad_wildcard:
                    continue
                # Process edge bindings
                for qedge_id, qedge in qg.edges.items():
                    kedge_id = kg.add_edge(
                        _node_bindings[qedge.subject][0],
                        _node_bindings[qedge.object][0],
                        predicate=qedge.predicates[0],
                    )
                    kg.edges[kedge_id].add_attribute(
                        attribute_type_id='Contribution',
                        value=contrib,
                        value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(),
                    )
                    _edge_bindings[qedge_id] = [kedge_id]
                # Process node and edge binding results
                message.results.add_result(
                    _node_bindings,
                    _edge_bindings,
                )
        return query
Beispiel #8
0
 def setUp(self):
     self.bkb_handler = BkbDataHandler()
     self.joint_reasoner = ChpJointReasoner(self.bkb_handler)
Beispiel #9
0
class DefaultHandlerMixin:
    def _setup_handler(self):
        # Only do the rest of this if a query is passed
        if self.init_query is not None:
            # Setup queries
            self._setup_queries()

            # Instiatate Reasoners
            if 'default' in self.query_dict:
                if self.dynamic_reasoner is None:
                    self.dynamic_reasoner = ChpDynamicReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)
            if 'simple' in self.query_dict:
                if self.joint_reasoner is None:
                    self.joint_reasoner = ChpJointReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)

    def _setup_queries(self):
        if type(self.init_query) == list:
            self.query_dict = defaultdict(list)
            self.query_map = []
            for query in self.init_query:
                self.query_map.append(query["query_id"])
                if self._is_simple_query(query):
                    self.query_dict['simple'].append(
                        self._setup_single_query(query))
                else:
                    self.query_dict['default'].append(
                        self._setup_single_query(query))
        else:
            if self._is_simple_query(self.init_query):
                self.query_dict = {
                    "simple": [self._setup_single_query(self.init_query)]
                }
            else:
                self.query_dict = {
                    "default": [self._setup_single_query(self.init_query)]
                }

    def _is_simple_query(self, query):
        """ Check if this is a {0 or 1} drug, {0 or 1} gene, one outcome standard query.
        """
        _found_outcome = False
        _found_disease = False
        _found_gene = False
        _found_drug = False
        for node_key, node in query["query_graph"]["nodes"].items():
            if node["category"] == BIOLINK_PHENOTYPIC_FEATURE:
                # If we've already found the target and there's another phenotypic feature, then this isn't simple.
                if _found_outcome:
                    return False
                else:
                    _found_outcome = True
            if node['category'] == BIOLINK_DISEASE:
                # If we've already found disease and there's another disease, then this isn't simple.
                if _found_disease:
                    return False
                else:
                    _found_disease = True
            if node["category"] == BIOLINK_GENE:
                if _found_gene:
                    return False
                else:
                    _found_gene = True
            if node['category'] == BIOLINK_DRUG:
                if _found_drug:
                    return False
                else:
                    _found_drug = True
        return True

    def _extract_chp_query(self, query, query_type=None):
        evidence = {}
        targets = []
        dynamic_evidence = {}
        dynamic_targets = {}
        # ensure we are using all nodes/edges
        total_nodes = 0
        total_edges = 0

        # get phenotype node
        targets = list()
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_PHENOTYPIC_FEATURE:
                target_id = node_key
                total_nodes += 1

        # get disease node info and ensure only 1 disease:
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_DISEASE:
                disease_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge[
                            'subject'] == disease_id and edge[
                                'object'] == target_id:
                        if 'properties' in edge.keys():
                            days = edge['properties']['days']
                            qualifier = edge['properties']['qualifier']
                        else:
                            days = 970
                            qualifier = '>='
                        total_edges += 1
                total_nodes += 1
        # set BKB target
        dynamic_targets[node["id"]] = {
            "op": qualifier,
            "value": days,
        }
        truth_target = (node["id"], '{} {}'.format(qualifier, days))

        # get evidence
        for node_key in query["query_graph"]['nodes'].keys():
            # genes
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_GENE:
                # check for appropriate gene node structure
                gene_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge[
                            'subject'] == gene_id and edge[
                                'object'] == disease_id:
                        total_edges += 1
                # check for appropriate gene node curie
                gene_curie = node['id']
                gene = gene_curie
                evidence["_" + gene] = 'True'
                total_nodes += 1
            # drugs
            if node['category'] == BIOLINK_DRUG:
                # check for appropriate drug node structure
                drug_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge[
                            'subject'] == drug_id and edge[
                                'object'] == disease_id:
                        total_edges += 1
                # check for appropriate drug node curie
                drug_curie = node['id']
                drug = drug_curie
                evidence[node["id"]] = 'True'
                total_nodes += 1

        # produce BKB query
        chp_query = Query(evidence=evidence,
                          targets=targets,
                          dynamic_evidence=dynamic_evidence,
                          dynamic_targets=dynamic_targets,
                          type='updating')
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        chp_query.query_id = query["query_id"] if 'query_id' in query else None
        return chp_query

    def _run_query(self, chp_query, query_type):
        if query_type == 'simple':
            chp_query = self.joint_reasoner.run_query(chp_query)
            # If a probability was found for the target
            if len(chp_query.result) > 0:
                # If a probability was found for the truth target
                if chp_query.truth_target in chp_query.result:
                    total_unnormalized_prob = 0
                    for target, contrib in chp_query.result.items():
                        prob = max(0, contrib)
                        total_unnormalized_prob += prob
                    chp_query.truth_prob = max([
                        0, chp_query.result[(chp_query.truth_target)]
                    ]) / total_unnormalized_prob
                else:
                    chp_query.truth_prob = 0
            else:
                chp_query.truth_prob = -1
            chp_query.report = None
        else:
            chp_query = self.dynamic_reasoner.run_query(chp_query)
            chp_res_dict = chp_query.result.process_updates(normalize=True)
            chp_query.truth_prob = max([
                0, chp_res_dict[chp_query.truth_target[0]][
                    chp_query.truth_target[1]]
            ])
            chp_query.report = None
        return chp_query

    def _construct_trapi_response(self, chp_query, query_type=None):
        # Get orginal query
        if len(self.init_query) == 1:
            query = self.init_query[0]
            query_id = None
        else:
            for _query in self.init_query:
                if _query["query_id"] == chp_query.query_id:
                    query = _query
                    query_id = query["query_id"]
                    break

        kg = copy.deepcopy(query["query_graph"])
        # update target node info and form edge pair combos for results graph

        node_pairs = dict()
        for node_key in list(kg['nodes'].keys())[:]:
            qg_node_curie = kg['nodes'][node_key].pop('id')
            kg['nodes'][qg_node_curie] = kg['nodes'].pop(node_key)
            node_pairs[node_key] = qg_node_curie
            if kg['nodes'][qg_node_curie]['category'] == BIOLINK_GENE:
                kg['nodes'][qg_node_curie]['name'] = self._get_curie_name(
                    BIOLINK_GENE, qg_node_curie)[0]
            elif kg['nodes'][qg_node_curie]['category'] == BIOLINK_DRUG:
                kg['nodes'][qg_node_curie]['name'] = self._get_curie_name(
                    BIOLINK_DRUG, qg_node_curie)[0]

        edge_pairs = dict()
        knowledge_edges = 0
        for edge_key in list(kg['edges'].keys())[:]:
            kg_id = 'kge{}'.format(knowledge_edges)
            knowledge_edges += 1
            kg['edges'][kg_id] = kg['edges'].pop(edge_key)
            kg['edges'][kg_id]['subject'] = node_pairs[kg['edges'][kg_id]
                                                       ['subject']]
            kg['edges'][kg_id]['object'] = node_pairs[kg['edges'][kg_id]
                                                      ['object']]
            edge_pairs[edge_key] = kg_id
            if kg['edges'][kg_id][
                    'predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE:
                if 'properties' in kg['edges'][kg_id].keys():
                    kg['edges'][kg_id].pop('properties')
                kg['edges'][kg_id]['attributes'] = [{
                    'name':
                    'Probability of Survival',
                    'type':
                    BIOLINK_PROBABILITY,
                    'value':
                    chp_query.truth_prob
                }]

        results = []
        results.append({
            'edge_bindings': {},
            'node_bindings': {},
        })
        for edge_pair_key in edge_pairs:
            results[0]['edge_bindings'][edge_pair_key] = [{
                'id':
                edge_pairs[edge_pair_key]
            }]
        for node_pair_key in node_pairs:
            results[0]['node_bindings'][node_pair_key] = [{
                'id':
                node_pairs[node_pair_key]
            }]

        # query response
        trapi_message = {
            'query_graph': query["query_graph"],
            'knowledge_graph': kg,
            'results': results
        }
        trapi_response = {'message': trapi_message}
        return query_id, trapi_response
    def setUpClass(cls):
        super(TestBaseHandler, cls).setUpClass()

        cls.bkb_handler = BkbDataHandler()
        cls.dynamic_reasoner = ChpDynamicReasoner(cls.bkb_handler)
        cls.joint_reasoner = ChpJointReasoner(cls.bkb_handler)
Beispiel #11
0
 def setUpClass(cls):
     super(TestJointReasoner, cls).setUpClass()
     cls.bkb_handler = BkbDataHandler()
     cls.joint_reasoner = ChpJointReasoner(cls.bkb_handler)
Beispiel #12
0
class OneHopHandlerMixin:
    """ OneHopeHandler is the handler for 1-hop queries. That is
        query graphs (QGs) that consists of 2 nodes and a single edge.

        :param query: the query graph sent by the ARA.
        :type query: dict
        :param hosts_filename: a filename for a stored QG. Defaults to None
        :type hosts_filename: str
        :param num_processes_per_host: Not implemented thouroughly, but would be
            used for distributed reasoning.
        :type num_processes_per_host: int
        :param max_results: specific to 1-hop queries, specifies the number of
            wildcard genes to return.
        :type max_results: int
    """
    def _setup_handler(self):
        self.default_survival_target = {
            "EFO:0000714": {
                "op": '>=',
                "value": 970
            }
        }

        # Only do the rest of this if a query is passed
        if self.messages is not None:
            # Setup queries
            self._setup_messages()

            # Instiatate Reasoners
            if self.dynamic_reasoner is None:
                self.dynamic_reasoner = ChpDynamicReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)
            if self.joint_reasoner is None:
                self.joint_reasoner = ChpJointReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)

    def _setup_messages(self):
        self.message_dict = defaultdict(list)
        for message in self.messages:
            self.message_dict[self._get_onehop_type(message)].append(message)

    def _get_onehop_type(self, message):
        wildcard_type = None
        for node_id, node in message.query_graph.nodes.items():
            if node.ids is None:
                if wildcard_type is None:
                    wildcard_type = node.categories[0]
        # If standard onehop query
        if wildcard_type is None:
            return 'standard'
        elif wildcard_type == BIOLINK_DRUG_ENTITY:
            return 'drug'
        elif wildcard_type == BIOLINK_GENE_ENTITY:
            return 'gene'
        else:
            raise ValueError(
                'Did not understand wildcard type {}.'.format(wildcard_type))

    def check_query(self):
        """ Currently not implemented. Would check validity of query.
        """
        return True

    @staticmethod
    def _process_predicate_proxy(qedge):
        dynamic_targets = {}
        predicate_proxy_constraint = qedge.find_constraint('predicate_proxy')
        if predicate_proxy_constraint is None:
            predicate_proxy = get_default_predicate_proxy()
            proxy_constraint = qedge.find_constraint(predicate_proxy)
        else:
            predicate_proxy = predicate_proxy_constraint.value[0]
            proxy_constraint = qedge.find_constraint(predicate_proxy)
        if proxy_constraint is None:
            proxy_operator = get_default_operator(predicate_proxy)
            proxy_value = get_default_value(predicate_proxy)
        else:
            proxy_operator = proxy_constraint.operator
            proxy_value = proxy_constraint.value
        # Setup dynamic target
        dynamic_targets[predicate_proxy] = {
            "op": proxy_operator,
            "value": proxy_value,
        }
        return dynamic_targets

    @staticmethod
    def _process_predicate_context(qedge, message_type):
        evidence = {}
        dynamic_evidence = {}
        predicate_context_constraint = qedge.find_constraint(
            'predicate_context')
        if predicate_context_constraint is not None:
            for context in predicate_context_constraint.value:
                context_curie = get_biolink_entity(context)
                context_constraint = qedge.find_constraint(context)
                if context_constraint is None:
                    raise ValueError(
                        'Provided no context details for {}'.format(context))
                if context_curie == BIOLINK_GENE_ENTITY:
                    if message_type == 'gene':
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                dynamic_evidence[_curie] = {
                                    "op": '==',
                                    "value": 'True',
                                }
                        else:
                            dynamic_evidence[context_constraint.value] = {
                                "op": '==',
                                "value": 'True',
                            }
                    else:
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                evidence['_{}'.format(_curie)] = 'True'
                        else:
                            evidence['_{}'.format(_curie)] = 'True'
                elif context_curie == BIOLINK_DRUG_ENTITY:
                    if message_type == 'drug':
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                dynamic_evidence[_curie] = {
                                    "op": '==',
                                    "value": 'True',
                                }
                        else:
                            dynamic_evidence[context_constraint.value] = {
                                "op": '==',
                                "value": 'True',
                            }
                    else:
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                evidence['_{}'.format(_curie)] = 'True'
                        else:
                            evidence['_{}'.format(_curie)] = 'True'
                else:
                    raise ValueError(
                        'Unsupported context type: {}'.format(context_curie))
        return evidence, dynamic_evidence

    def _extract_chp_query(self, message, message_type):
        evidence = {}
        dynamic_targets = {}
        dynamic_evidence = {}

        if message_type == 'standard':
            # Setup gene and drug evidence
            for qnode_id, qnode in message.query_graph.nodes.items():
                if qnode.categories[
                        0] == BIOLINK_GENE_ENTITY or qnode.categories[
                            0] == BIOLINK_DRUG_ENTITY:
                    evidence['_{}'.format(qnode.ids[0])] = 'True'
        elif message_type == 'gene':
            for qnode_id, qnode in message.query_graph.nodes.items():
                if qnode.categories[0] == BIOLINK_DRUG_ENTITY:
                    #dynamic_evidence[qnode.ids[0]] = {
                    #        "op": '==',
                    #        "value": 'True',
                    #        }
                    evidence['_{}'.format(qnode.ids[0])] = 'True'
        elif message_type == 'drug':
            for qnode_id, qnode in message.query_graph.nodes.items():
                if qnode.categories[0] == BIOLINK_GENE_ENTITY:
                    #dynamic_evidence[qnode.ids[0]] = {
                    #        "op": '==',
                    #        "value": 'True',
                    #        }
                    evidence['_{}'.format(qnode.ids[0])] = 'True'
        # Grab edge
        for qedge_id, qedge in message.query_graph.edges.items():
            break
        # Process predicate proxy
        dynamic_targets = self._process_predicate_proxy(qedge)
        # Process predicate context
        _evidence, _dynamic_evidence = self._process_predicate_context(
            qedge, message_type)
        evidence.update(_evidence)
        dynamic_evidence.update(_dynamic_evidence)
        #TODO: Probably need a more robust solution for when no context is provided in wildcard queries and you need it.
        #if len(evidence) == 0:
        #    raise ValueError('Did not supply context with a query that required context.')

        target = list(dynamic_targets.keys())[0]
        truth_target = (target,
                        '{} {}'.format(dynamic_targets[target]["op"],
                                       dynamic_targets[target]["value"]))
        chp_query = Query(evidence=evidence,
                          targets=None,
                          dynamic_evidence=dynamic_evidence,
                          dynamic_targets=dynamic_targets,
                          type='updating')
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        return chp_query

    def _run_query(self, chp_query, query_type):
        """ Runs build BKB query to calculate probability of survival.
            A probability is returned to specificy survival time w.r.t a drug.
            Contributions for each gene are calculuated and classified under
            their true/false target assignments.
        """
        if query_type == 'standard':
            chp_query = self.joint_reasoner.run_query(chp_query)
            # If a probability was found for the target
            if len(chp_query.result) > 0:
                # If a probability was found for the truth target
                if chp_query.truth_target in chp_query.result:
                    total_unnormalized_prob = 0
                    for target, contrib in chp_query.result.items():
                        prob = max(0, contrib)
                        total_unnormalized_prob += prob
                    chp_query.truth_prob = max([
                        0, chp_query.result[(chp_query.truth_target)]
                    ]) / total_unnormalized_prob
                else:
                    chp_query.truth_prob = 0
            else:
                chp_query.truth_prob = -1
            chp_query.report = None
            return chp_query
        else:
            # Do this if a disease node is present
            if len(chp_query.evidence) == 0:
                # probability of survival
                chp_query = self.joint_reasoner.run_query(chp_query)
                if len(chp_query.result) > 0:
                    # If a probability was found for the truth target
                    if chp_query.truth_target in chp_query.result:
                        total_unnormalized_prob = 0
                        for target, contrib in chp_query.result.items():
                            prob = max(0, contrib)
                            total_unnormalized_prob += prob
                        chp_query.truth_prob = max([
                            0, chp_query.result[(chp_query.truth_target)]
                        ]) / total_unnormalized_prob
                    else:
                        chp_query.truth_prob = 0
                else:
                    chp_query.truth_prob = -1

                # patient_contributions
                num_all = len(self.joint_reasoner.patient_data)
                num_matched = chp_query.truth_prob * num_all
                patient_contributions = defaultdict(lambda: defaultdict(int))
                for patient, feature_dict in self.joint_reasoner.patient_data.items(
                ):
                    for predicate_proxy, proxy_info in chp_query.dynamic_targets.items(
                    ):
                        proxy_op = get_operator(proxy_info["op"])
                        proxy_opp_op = get_opposite_operator(proxy_info["op"])
                        proxy_value = proxy_info["value"]
                        if proxy_op(feature_dict[predicate_proxy],
                                    proxy_value):
                            if num_matched == 0:
                                patient_contributions[(
                                    predicate_proxy,
                                    '{} {}'.format(proxy_op,
                                                   proxy_value))][patient] = 0
                            else:
                                patient_contributions[(
                                    predicate_proxy,
                                    '{} {}'.format(proxy_op, proxy_value)
                                )][patient] = chp_query.truth_prob / num_matched
                        else:
                            if num_matched == 0:
                                patient_contributions[(
                                    predicate_proxy,
                                    '{} {}'.format(proxy_opp_op, proxy_value)
                                )][patient] = (
                                    1 - chp_query.truth_prob) / num_matched
                            else:
                                patient_contributions[(
                                    predicate_proxy,
                                    '{} {}'.format(proxy_opp_op, proxy_value)
                                )][patient] = (1 - chp_query.truth_prob) / (
                                    num_all - num_matched)
                '''
                num_survived = 0
                num_all = len(self.dynamic_reasoner.raw_patient_data.keys())
                str_op = chp_query.dynamic_targets['EFO:0000714']['op']
                opp_op = get_opposite_operator(str_op)
                op = get_operator(str_op)
                days = chp_query.dynamic_targets['EFO:0000714']['value']
                for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items():
                    if op(pat_dict['survival_time'], days):
                        num_survived += 1
                chp_query.truth_prob = num_survived/num_all
                # patient_contributions
                patient_contributions = defaultdict(lambda: defaultdict(int))
                for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items():
                    if op(pat_dict['survival_time'], days):
                        if num_survived == 0:
                            patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = 0
                        else:
                            patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = chp_query.truth_prob/num_survived
                    else:
                        if num_survived == 0:
                            patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/num_all
                        else:
                            patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/(num_all-num_survived)
                '''
            else:
                if query_type == 'gene':
                    chp_query = self.dynamic_reasoner.run_query(
                        chp_query, bkb_type='drug')
                elif query_type == 'drug':
                    chp_query = self.dynamic_reasoner.run_query(
                        chp_query, bkb_type='gene')
                chp_res_dict = chp_query.result.process_updates()
                chp_res_norm_dict = chp_query.result.process_updates(
                    normalize=True)
                #chp_query.result.summary()
                chp_res_contributions = chp_query.result.process_inode_contributions(
                )
                chp_query.truth_prob = max([
                    0, chp_res_norm_dict[chp_query.truth_target[0]][
                        chp_query.truth_target[1]]
                ])

                # Collect all source inodes and process patient hashes
                patient_contributions = defaultdict(lambda: defaultdict(int))
                for target, contrib_dict in chp_res_contributions.items():
                    target_comp_name, target_state_name = target
                    for inode, contrib in contrib_dict.items():
                        comp_name, state_name = inode
                        if '_Source_' in comp_name:
                            # Split source state name to get patient hashes
                            source_hashes_str = state_name.split('_')[-1]
                            source_hashes = [
                                int(source_hash)
                                for source_hash in source_hashes_str.split(',')
                            ]
                            hash_len = len(source_hashes)
                            # Process patient contributions
                            for _hash in source_hashes:
                                # Normalize to get relative contribution
                                patient_contributions[target][
                                    _hash] += contrib / hash_len  #/ chp_res_dict[target_comp_name][target_state_name]

        # Now iterate through the patient data to translate patient contributions to drug/gene contributions
        wildcard_contributions = defaultdict(lambda: defaultdict(int))
        for target, patient_contrib_dict in patient_contributions.items():
            for patient, contrib in patient_contrib_dict.items():
                if query_type == 'gene':
                    for gene_curie in self.dynamic_reasoner.raw_patient_data[
                            int(patient)]["gene_curies"]:
                        wildcard_contributions[gene_curie][target] += contrib
                elif query_type == 'drug':
                    for drug_curie in self.dynamic_reasoner.raw_patient_data[
                            int(patient)]["drug_curies"]:
                        wildcard_contributions[drug_curie][target] += contrib

        # normalize gene contributions by the target and take relative difference
        for curie in wildcard_contributions.keys():
            truth_target_gene_contrib = 0
            nontruth_target_gene_contrib = 0
            for target, contrib in wildcard_contributions[curie].items():
                if target[0] == chp_query.truth_target[0] and target[
                        1] == chp_query.truth_target[1]:
                    truth_target_gene_contrib += contrib / chp_query.truth_prob
                else:
                    nontruth_target_gene_contrib += contrib / (
                        1 - chp_query.truth_prob)
            wildcard_contributions[curie][
                'relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib

        chp_query.report = None
        chp_query.wildcard_contributions = wildcard_contributions

        return chp_query

    def _construct_trapi_message(self, chp_query, message, query_type):

        qg = message.query_graph
        kg = message.knowledge_graph

        edge_bindings = {}
        node_bindings = {}

        # Process nodes
        for qnode_id, qnode in qg.nodes.items():
            if qnode.ids is not None:
                if qnode.categories[0] == BIOLINK_GENE_ENTITY:
                    knode_key = kg.add_node(
                        qnode.ids[0],
                        self.curies[BIOLINK_GENE_ENTITY.get_curie()][
                            qnode.ids[0]][0],
                        qnode.categories[0].get_curie(),
                    )
                elif qnode.categories[0] == BIOLINK_DRUG_ENTITY:
                    knode_key = kg.add_node(
                        qnode.ids[0],
                        self.curies[BIOLINK_DRUG_ENTITY.get_curie()][
                            qnode.ids[0]][0],
                        qnode.categories[0].get_curie(),
                    )
                elif qnode.categories[0] == BIOLINK_DISEASE_ENTITY:
                    #TODO: Add diseases to curies and fix name hack below.
                    knode_key = kg.add_node(
                        qnode.ids[0],
                        qnode.
                        ids[0],  #TODO: Once curies is fixed, make this a name.
                        qnode.categories[0].get_curie(),
                    )
                node_bindings[qnode_id] = [knode_key]
            else:
                wildcard_node = qnode
        if query_type == 'standard':
            for qedge_key, qedge in qg.edges.items():
                kedge_key = kg.add_edge(
                    node_bindings[qedge.subject][0],
                    node_bindings[qedge.object][0],
                    predicate=qedge.predicates[0].get_curie(),
                    relation=qedge.relation,
                )
                edge_bindings[qedge_key] = [kedge_key]
                # Add Attribute
                kg.edges[kedge_key].add_attribute(
                    attribute_type_id='Probability of Survival',
                    value=chp_query.truth_prob,
                    value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY.
                    get_curie(),
                )
            message.results.add_result(
                node_bindings,
                edge_bindings,
            )
        else:
            # Build relative contribution results and added associated edges into knowledge graph
            unsorted_wildcard_contributions = []
            for wildcard, contrib_dict in chp_query.wildcard_contributions.items(
            ):
                unsorted_wildcard_contributions.append(
                    (contrib_dict['relative'], wildcard))
            sorted_wildcard_contributions = [
                (contrib, wildcard) for contrib, wildcard in sorted(
                    unsorted_wildcard_contributions,
                    key=lambda x: abs(x[0]),
                    reverse=True)
            ]

            # add kg gene nodes and edges
            edge_count = 0
            node_count = 1
            results = []
            for contrib, wildcard in sorted_wildcard_contributions[:self.
                                                                   max_results]:
                _node_bindings = {}
                _edge_bindings = {}
                # Process node bindings
                bad_wildcard = False
                for qnode_id, qnode in qg.nodes.items():
                    if qnode.categories[
                            0] == BIOLINK_GENE_ENTITY and query_type == 'gene':
                        try:
                            knode_id = kg.add_node(
                                wildcard,
                                self.curies[BIOLINK_GENE_ENTITY.get_curie()]
                                [wildcard][0],
                                qnode.categories[0].get_curie(),
                            )
                            _node_bindings[qnode_id] = [knode_id]
                        except KeyError:
                            logger.info(
                                "Couldn't find {} in curies[{}]".format(
                                    wildcard, BIOLINK_GENE))
                            bad_wildcard = True
                    elif qnode.categories[
                            0] == BIOLINK_DRUG_ENTITY and query_type == 'drug':
                        knode_id = kg.add_node(
                            wildcard,
                            self.curies[BIOLINK_DRUG_ENTITY.get_curie()]
                            [wildcard][0],
                            qnode.categories[0].get_curie(),
                        )
                        _node_bindings[qnode_id] = [knode_id]
                    else:
                        _node_bindings[qnode_id] = node_bindings[qnode_id]
                if bad_wildcard:
                    continue
                # Process edge bindings
                for qedge_id, qedge in qg.edges.items():
                    kedge_id = kg.add_edge(
                        _node_bindings[qedge.subject][0],
                        _node_bindings[qedge.object][0],
                        predicate=qedge.predicates[0],
                        relation=qedge.relation,
                    )
                    kg.edges[kedge_id].add_attribute(
                        attribute_type_id='Contribution',
                        value=contrib,
                        value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(),
                    )
                    _edge_bindings[qedge_id] = [kedge_id]
                # Process node and edge binding results
                message.results.add_result(
                    _node_bindings,
                    _edge_bindings,
                )

        return message