Python ChpDynamicReasoner Examples

Programming Language: Python

Namespace/Package Name: chp.reasoner

Examples at hotexamples.com: 15

Python ChpDynamicReasoner - 15 examples found. These are the top rated real world Python examples of chp.reasoner.ChpDynamicReasoner extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ChpDynamicReasoner(8)

run_query(7)

Example #1

Show file

    def _setup_handler(self):
        # Only do the rest of this if a query is passed
        if self.init_query is not None:
            # Setup queries
            self._setup_queries()

            # Instiatate Reasoners
            if self.dynamic_reasoner is None:
                self.dynamic_reasoner = ChpDynamicReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)

Example #2

Show file

File: apps.py Project: NCATSTranslator/connections_hypothesis_provider

class ChpBrainApiConfig(AppConfig):
    logger.warning('Running CHP Brain API Configuration. May take a minute.')
    name = 'chp_core_brain'

    # Used for distrbuted reasoning
    # Get Hosts File if it exists
    #parent_dir = os.path.dirname(os.path.realpath(__file__))
    #HOSTS_FILENAME = os.path.join(parent_dir, 'hosts')
    #NUM_PROCESSES_PER_HOST = multiprocessing.cpu_count()
    #if not os.path.exists(HOSTS_FILENAME):
    hosts_filename = None
    num_processes_per_host = 0

    # Instantiate BKB handler
    bkb_handler = BkbDataHandler(disease='tcga_gbm',
                                 bkb_major_version='darwin',
                                 bkb_minor_version='2.0')

    logger.info('Instantiating reasoners.')
    # Instantiate Reasoners
    dynamic_reasoner = ChpDynamicReasoner(
        bkb_handler=bkb_handler,
        hosts_filename=hosts_filename,
        num_processes_per_host=num_processes_per_host)
    joint_reasoner = ChpJointReasoner(
        bkb_handler=bkb_handler,
        hosts_filename=hosts_filename,
        num_processes_per_host=num_processes_per_host)

Example #3

Show file

    def _setup_handler(self):
        self.default_survival_target = {
            "EFO:0000714": {
                "op": '>=',
                "value": 970
            }
        }

        # Only do the rest of this if a query is passed
        if self.init_query is not None:
            # Setup queries
            self._setup_queries()

            # Instiatate Reasoners
            if self.dynamic_reasoner is None:
                self.dynamic_reasoner = ChpDynamicReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)

Example #4

Show file

File: default_handler_mixin.py Project: NCATSTranslator/connections_hypothesis_provider

    def _setup_handler(self):
        # Only do the rest of this if a message is passed
        if self.messages is not None:
            # Setup messages
            self._setup_messages()

            # Instiatate Reasoners
            if 'default' in self.message_dict:
                if self.dynamic_reasoner is None:
                    self.dynamic_reasoner = ChpDynamicReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)
            if 'simple' in self.message_dict:
                if self.joint_reasoner is None:
                    self.joint_reasoner = ChpJointReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)

Example #5

Show file

File: test_trapi_interface.py Project: NCATSTranslator/connections_hypothesis_provider

 def setUpClass(cls):
     super(TestOneHopHandler, cls).setUpClass()
     # load in sample query graphs
     with open('query_samples/onehop/standard_queries.json', 'r') as f_:
         cls.standard_queries = json.load(f_)
     with open('query_samples/onehop/wildcard_queries.json', 'r') as f_:
         cls.wildcard_queries = json.load(f_)
     cls.bkb_handler = BkbDataHandler()
     cls.dynamic_reasoner = ChpDynamicReasoner(cls.bkb_handler)
     cls.joint_reasoner = ChpJointReasoner(cls.bkb_handler)

Example #6

Show file

class WildCardHandlerMixin:
    def _setup_handler(self):
        # Only do the rest of this if a query is passed
        if self.init_query is not None:
            # Setup queries
            self._setup_queries()

            # Instiatate Reasoners
            if self.dynamic_reasoner is None:
                self.dynamic_reasoner = ChpDynamicReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)

    def _setup_queries(self):
        if type(self.init_query) == list:
            self.query_dict = defaultdict(list)
            self.query_map = []
            for query in self.init_query:
                self.query_map.append(query["query_id"])
                self.query_dict[self._get_wildcard_type(query)].append(self._setup_single_query(query))
        else:
            self.query_dict[self._get_wildcard_type(query)].append(self._setup_single_query(query))

    def _get_wildcard_type(self, query):
        wildcard_type = None
        for node_id, node in query["query_graph"]["nodes"].items():
            if 'id' not in node:
                if wildcard_type is None:
                    wildcard_type = node['category']
                else:
                    sys.exit('You can only have one contribution target. Make sure to leave only one node with a black curie.')
        if wildcard_type == BIOLINK_DRUG:
            return 'drug'
        elif wildcard_type == BIOLINK_GENE:
            return 'gene'
        else:
            raise ValueError('Did not understand wildcard type {}.'.format(wildcard_type))

    def _extract_chp_query(self, query, query_type):
        evidence = {}
        targets = []
        dynamic_evidence = {}
        dynamic_targets = {}
        # ensure we are using all nodes/edges
        total_nodes = 0
        total_edges = 0

        # get phenotype node
        targets = list()
        acceptable_target_curies = ['EFO:0000714']
        self.implicit_survival_node = False
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_PHENOTYPIC_FEATURE and node['id'] in acceptable_target_curies:
                target_id = node_key
                total_nodes += 1
        if total_nodes == 0:
            # Use Default Survival
            self.implicit_survival_node = True
            total_nodes += 1
            #acceptable_target_curies_print = ','.join(acceptable_target_curies)
            #sys.exit("Survival Node not found. Node category must be '{}' and id must be in: {}".format(BIOLINK_PHENOTYPIC_FEATURE,
            #                                                                                            acceptable_target_curies_print))
        elif total_nodes > 1:
            sys.exit('Too many target nodes')

        # get disease node info and ensure only 1 disease:
        acceptable_disease_curies = ['MONDO:0007254']
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_DISEASE and node['id'] in acceptable_disease_curies:
                disease_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge['subject'] == disease_id and edge['object'] == target_id:
                        if 'properties' in edge.keys():
                            days = edge['properties']['days']
                            qualifier = edge['properties']['qualifier']
                        else:
                            days = 970
                            qualifier = '>='
                        total_edges += 1
                if total_edges > 1:
                    sys.exit('Disease has too many outgoing edges')
                total_nodes += 1

        if self.implicit_survival_node:
            days=970
            qualifier = '>='
            total_edges += 1

        if total_nodes  == 1:
            acceptable_disease_curies_print = ','.join(acceptable_disease_curies)
            sys.exit("Disease node not found. Node type must be '{}' and curie must be in: {}".format(BIOLINK_DISEASE,
                                                                                                      acceptable_disease_curies_print))
        elif total_nodes > 2:
            sys.exit('Too many disease nodes')
        # set BKB target
        dynamic_targets['EFO:0000714'] = {
            "op": qualifier,
            "value": days,
        }
        truth_target = ('EFO:0000714', '{} {}'.format(qualifier, days))

        # get evidence
        for node_key in query["query_graph"]['nodes'].keys():
            # genes
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_GENE:
                # check for appropriate gene node structure
                gene_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge['subject'] == gene_id and edge['object'] == disease_id:
                        total_edges += 1
                if total_edges == total_nodes - 1:
                    sys.exit("Gene and disease edge not found. Edge type must be '{}'".format(BIOLINK_GENE_TO_DISEASE_PREDICATE))
                elif total_edges > total_nodes:
                    sys.exit('Gene has too many outgoing edges')
                # check for appropriate gene node curie
                if query_type != 'gene':
                    gene_curie = node['id']
                    if gene_curie in self.curies[BIOLINK_GENE]:
                        gene = gene_curie
                    else:
                        sys.exit('Invalid ENSEMBL Identifier. Must be in form ENSEMBL:<ID>.')
                    evidence["_" + gene] = 'True'
                total_nodes += 1
            # drugs
            if node['category'] == BIOLINK_DRUG:
                # check for appropriate drug node structure
                drug_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge['subject'] == drug_id and edge['object'] == disease_id:
                        total_edges += 1
                if total_edges == total_nodes - 1:
                    sys.exit("Drug and disease edge not found. Edge type must be '{}'".format(BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE))
                elif total_edges > total_nodes:
                    sys.exit('Drug has too many outgoing edges')
                # check for appropriate drug node curie
                if query_type != 'drug':
                    drug_curie = node['id']
                    if drug_curie in self.curies[BIOLINK_DRUG]:
                        drug = drug_curie
                    else:
                        sys.exit('Invalid CHEMBL Identifier: {}. Must be in form CHEMBL:<ID>'.format(drug_curie))
                    evidence['_' + drug] = 'True'
                total_nodes += 1

        # Temporary solution to no evidence linking
        if len(evidence.keys()) == 0 and len(dynamic_evidence.keys()) == 0:
            self.no_evidence_probability_check = True
        else:
            self.no_evidence_probability_check = False

        # produce BKB query
        chp_query = Query(
            evidence=evidence,
            targets=targets,
            dynamic_evidence=dynamic_evidence,
            dynamic_targets=dynamic_targets,
            type='updating')
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        chp_query.query_id = query["query_id"] if 'query_id' in query else None
        return chp_query

    def _run_query(self, chp_query, query_type):
        """ Runs build BKB query to calculate probability of survival.
            A probability is returned to specificy survival time w.r.t a drug.
            Contributions for each gene are calculuated and classified under
            their true/false target assignments.
        """

        # temporary solution to no evidence linking
        if not self.no_evidence_probability_check:
            if query_type == 'gene':
                chp_query = self.dynamic_reasoner.run_query(chp_query, bkb_type='drug')
            elif query_type == 'drug':
                chp_query = self.dynamic_reasoner.run_query(chp_query, bkb_type='gene')
            chp_res_dict = chp_query.result.process_updates()
            chp_res_norm_dict = chp_query.result.process_updates(normalize=True)
            #chp_query.result.summary()
            chp_res_contributions = chp_query.result.process_inode_contributions()
            chp_query.truth_prob = max([0, chp_res_norm_dict[chp_query.truth_target[0]][chp_query.truth_target[1]]])

            # Collect all source inodes and process patient hashes
            patient_contributions = defaultdict(lambda: defaultdict(int))
            for target, contrib_dict in chp_res_contributions.items():
                target_comp_name, target_state_name = target
                for inode, contrib in contrib_dict.items():
                    comp_name, state_name = inode
                    if '_Source_' in comp_name:
                        # Split source state name to get patient hashes
                        source_hashes_str = state_name.split('_')[-1]
                        source_hashes = [int(source_hash) for source_hash in source_hashes_str.split(',')]
                        hash_len = len(source_hashes)
                        # Process patient contributions
                        for _hash in source_hashes:
                            # Normalize to get relative contribution
                            patient_contributions[target][_hash] += contrib/hash_len #/ chp_res_dict[target_comp_name][target_state_name]

        else:
            # probability of survival
            num_survived = 0
            num_all = len(self.dynamic_reasoner.raw_patient_data.keys())
            str_op = chp_query.dynamic_targets['EFO:0000714']['op']
            opp_op = get_opposite_operator(str_op)
            op = get_operator(str_op)
            days = chp_query.dynamic_targets['EFO:0000714']['value']
            for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items():
                if op(pat_dict['survival_time'], days):
                    num_survived += 1
            chp_query.truth_prob = num_survived/num_all

            # patient_contributions
            patient_contributions = defaultdict(lambda: defaultdict(int))
            for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items():
                if op(pat_dict['survival_time'], days):
                    if num_survived == 0:
                        patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = 0
                    else:
                        patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = chp_query.truth_prob/num_survived
                else:
                    if num_survived == 0:
                        patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/num_all
                    else:
                        patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/(num_all-num_survived)

        # Now iterate through the patient data to translate patient contributions to drug/gene contributions
        wildcard_contributions = defaultdict(lambda: defaultdict(int))
        for target, patient_contrib_dict in patient_contributions.items():
            for patient, contrib in patient_contrib_dict.items():
                if query_type == 'gene':
                    for gene_curie in self.dynamic_reasoner.raw_patient_data[patient]["gene_curies"]:
                        wildcard_contributions[gene_curie][target] += contrib
                elif query_type == 'drug':
                    for drug_curie in self.dynamic_reasoner.raw_patient_data[patient]["drug_curies"]:
                        wildcard_contributions[drug_curie][target] += contrib

        # normalize gene contributions by the target and take relative difference
        for curie in wildcard_contributions.keys():
            truth_target_gene_contrib = 0
            nontruth_target_gene_contrib = 0
            for target, contrib in wildcard_contributions[curie].items():
                if target[0] == chp_query.truth_target[0] and target[1] == chp_query.truth_target[1]:
                    truth_target_gene_contrib += contrib / chp_query.truth_prob
                else:
                    nontruth_target_gene_contrib += contrib / (1 - chp_query.truth_prob)
            wildcard_contributions[curie]['relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib

        chp_query.report = None
        chp_query.wildcard_contributions = wildcard_contributions

        return chp_query

    def _construct_trapi_response(self, chp_query, query_type):
        # Get orginal query
        if len(self.init_query) == 1:
            query = self.init_query[0]
            query_id = None
        else:
            for _query in self.init_query:
                if _query["query_id"] == chp_query.query_id:
                    query = _query
                    query_id = query["query_id"]
                    break

        # Construct first result which is the result of the standard probablistic query.
        kg = copy.deepcopy(query["query_graph"])
        # Process Nodes
        node_pairs = defaultdict(None)
        contrib_qg_id = None
        for node_key in list(kg["nodes"].keys())[:]:
            qg_node_curie = kg['nodes'][node_key].pop('id', None)
            if qg_node_curie is not None:
                kg['nodes'][qg_node_curie] = kg['nodes'].pop(node_key)
                if kg['nodes'][qg_node_curie]['category'] == BIOLINK_GENE:
                    kg['nodes'][qg_node_curie]['name'] = self.curies["biolink:Gene"][qg_node_curie][0]
                elif kg['nodes'][qg_node_curie]['category'] == BIOLINK_DRUG:
                    kg['nodes'][qg_node_curie]['name'] = self.curies["biolink:Drug"][qg_node_curie][0]
                node_pairs[node_key] = qg_node_curie
            else:
                kg["nodes"].pop(node_key)

        if not self.implicit_survival_node:
            # Process Edges
            edge_pairs = dict()
            knowledge_edges = 0
            for edge_key in list(kg['edges'].keys())[:]:
                subject_node = kg['edges'][edge_key]['subject']
                if kg['edges'][edge_key]['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_GENE and query_type == 'gene':
                    kg['edges'].pop(edge_key)
                elif kg['edges'][edge_key]['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_DRUG and query_type == 'drug':
                    kg['edges'].pop(edge_key)
                else:
                    kg_id = 'kge{}'.format(knowledge_edges)
                    knowledge_edges += 1
                    kg['edges'][kg_id] = kg['edges'].pop(edge_key)
                    kg['edges'][kg_id]['subject'] = node_pairs[kg['edges'][kg_id]['subject']]
                    kg['edges'][kg_id]['object'] = node_pairs[kg['edges'][kg_id]['object']]
                    edge_pairs[edge_key] = kg_id
                    if kg['edges'][kg_id]['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE:
                        if 'properties' in kg['edges'][kg_id].keys():
                            kg['edges'][kg_id].pop('properties')
                        kg['edges'][kg_id]['attributes'] = [{'name':'Probability of Survival',
                                                             'type':BIOLINK_PROBABILITY,
                                                             'value':chp_query.truth_prob}]

            # Put first result of standard prob query of only curie nodes (i.e. no wildcard nodes where used as evidence)
            results = []
            results.append({'edge_bindings':dict(),
                            'node_bindings':dict()})
            for edge_pair_key in edge_pairs:
                results[0]['edge_bindings'][edge_pair_key] = [{ 'id': str(edge_pairs[edge_pair_key])}]
            for node_pair_key in node_pairs:
                results[0]['node_bindings'][node_pair_key] = [{ 'id': str(node_pairs[node_pair_key])}]

        else:
            knowledge_edges = 0
            kg['edges'] = {}
            results = []

        # Build relative contribution results and added associated edges into knowledge graph
        unsorted_wildcard_contributions = []
        for wildcard, contrib_dict in chp_query.wildcard_contributions.items():
            unsorted_wildcard_contributions.append((contrib_dict['relative'], wildcard))
        sorted_wildcard_contributions = [(contrib,wildcard) for contrib, wildcard in sorted(unsorted_wildcard_contributions, key=lambda x: abs(x[0]), reverse=True)]

        for contrib, wildcard in sorted_wildcard_contributions[:self.max_results]:
            rg = copy.deepcopy(query["query_graph"])
            _node_pairs = {}
            _edge_pairs = {}
            # Process node pairs
            for node_id, node in rg["nodes"].items():
                if node["category"] == BIOLINK_GENE and query_type == 'gene':
                    kg["nodes"][wildcard] = copy.deepcopy(node)
                    kg["nodes"][wildcard].update({"name": self.curies[BIOLINK_GENE][wildcard][0]})
                    _node_pairs[node_id] = wildcard
                elif node["category"] == BIOLINK_DRUG and query_type == 'drug':
                    kg["nodes"][wildcard] = copy.deepcopy(node)
                    kg["nodes"][wildcard].update({"name": self.curies[BIOLINK_DRUG][wildcard][0]})
                    _node_pairs[node_id] = wildcard
                else:
                    _node_pairs[node_id] = node_pairs[node_id]
            # Process edge pairs
            for edge_id, edge in rg["edges"].items():
                subject_node = edge['subject']
                if query_type == 'gene'  and edge["predicate"] == BIOLINK_GENE_TO_DISEASE_PREDICATE and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_GENE:
                    knowledge_edges += 1
                    kg_edge_id = 'kge{}'.format(knowledge_edges)
                    kg["edges"][kg_edge_id] = copy.deepcopy(edge)
                    kg["edges"][kg_edge_id]["subject"] = _node_pairs[kg["edges"][kg_edge_id]["subject"]]
                    kg["edges"][kg_edge_id]["object"] = _node_pairs[kg["edges"][kg_edge_id]["object"]]
                    #kg["edges"][kg_edge_id]["value"] = contrib
                    kg["edges"][kg_edge_id]["attributes"] = [{'name':'Contribution',
                                                              'type':BIOLINK_CONTRIBUTION,
                                                              'value':contrib}]
                    _edge_pairs[edge_id] = kg_edge_id
                elif query_type == 'drug'  and edge["predicate"] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_DRUG:
                    knowledge_edges += 1
                    kg_edge_id = 'kge{}'.format(knowledge_edges)
                    kg["edges"][kg_edge_id] = copy.deepcopy(edge)
                    kg["edges"][kg_edge_id]["subject"] = _node_pairs[kg["edges"][kg_edge_id]["subject"]]
                    kg["edges"][kg_edge_id]["object"] = _node_pairs[kg["edges"][kg_edge_id]["object"]]
                    #kg["edges"][kg_edge_id]["value"] = contrib
                    kg["edges"][kg_edge_id]["attributes"] = [{'name':'Contribution',
                                                              'type':BIOLINK_CONTRIBUTION,
                                                              'value':contrib}]
                    _edge_pairs[edge_id] = kg_edge_id
                else:
                    _edge_pairs[edge_id] = edge_pairs[edge_id]
            # Process node and edge binding results
            _res = {"edge_bindings": {},
                    "node_bindings": {}}
            for edge_pair_key in _edge_pairs:
                _res["edge_bindings"][edge_pair_key] = [{ "id": str(_edge_pairs[edge_pair_key])}]
            for node_pair_key in _node_pairs:
                _res["node_bindings"][node_pair_key] = [{ "id": str(_node_pairs[node_pair_key])}]
            results.append(_res)

        # query response
        trapi_message = {'query_graph': query["query_graph"],
                        'knowledge_graph': kg,
                        'results': results}
        trapi_response = {'message' : trapi_message}
        return query_id, trapi_response

Example #7

Show file

class OneHopHandlerMixin:
    """ OneHopeHandler is the handler for 1-hop queries. That is
        query graphs (QGs) that consists of 2 nodes and a single edge.

        :param query: the query graph sent by the ARA.
        :type query: dict
        :param hosts_filename: a filename for a stored QG. Defaults to None
        :type hosts_filename: str
        :param num_processes_per_host: Not implemented thouroughly, but would be
            used for distributed reasoning.
        :type num_processes_per_host: int
        :param max_results: specific to 1-hop queries, specifies the number of
            wildcard genes to return.
        :type max_results: int
    """
    def _setup_handler(self):
        self.default_survival_target = {
            "EFO:0000714": {
                "op": '>=',
                "value": 970
            }
        }

        # Only do the rest of this if a query is passed
        if self.init_query is not None:
            # Setup queries
            self._setup_queries()

            # Instiatate Reasoners
            if self.dynamic_reasoner is None:
                self.dynamic_reasoner = ChpDynamicReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)

    def _setup_queries(self):
        if type(self.init_query) == list:
            self.query_dict = defaultdict(list)
            self.query_map = []
            for query in self.init_query:
                self.query_map.append(query["query_id"])
                self.query_dict[self._get_wildcard_type(query)].append(
                    self._setup_single_query(query))
        else:
            self.query_dict[self._get_wildcard_type(query)].append(
                self._setup_single_query(query))

    def _get_wildcard_type(self, query):
        wildcard_type = None
        for node_id, node in query["query_graph"]["nodes"].items():
            if 'id' not in node:
                if wildcard_type is None:
                    wildcard_type = node['category']
                else:
                    sys.exit(
                        'You can only have one contribution target. Make sure to leave only one node with a black curie.'
                    )
        if wildcard_type == BIOLINK_DRUG:
            return 'drug'
        elif wildcard_type == BIOLINK_GENE:
            return 'gene'
        else:
            raise ValueError(
                'Did not understand wildcard type {}.'.format(wildcard_type))

    def check_query(self):
        """ Currently not implemented. Would check validity of query.
        """
        return True

    def _extract_chp_query(self, query, query_type=None):
        evidence = {}
        dynamic_targets = {}

        if len(query["query_graph"]['nodes']) > 2 or len(
                query["query_graph"]['edges']) > 1:
            sys.exit('1 hop quries can only have 2 nodes and 1 edge')

        # check edge for source and target
        edge_key = list(query["query_graph"]["edges"].keys())[0]
        edge = query["query_graph"]['edges'][edge_key]
        if 'subject' not in edge.keys() or 'object' not in edge.keys():
            sys.exit(
                'Edge must have both a \'subject\' and and \'object\' key')
        subject = edge['subject']
        obj = edge['object']

        # Get non-wildcard node
        if query_type == 'gene':
            if query["query_graph"]['nodes'][subject][
                    'category'] != BIOLINK_GENE:
                sys.exit('Subject node must be \'category\' {}'.format(
                    BIOLINK_GENE))
            drug_curie = query["query_graph"]['nodes'][obj]['id']
            if drug_curie not in self.curies[BIOLINK_DRUG]:
                sys.exit('Invalid CHEMBL Identifier. Must be CHEMBL:<ID>')
            evidence['_{}'.format(drug_curie)] = 'True'
        elif query_type == 'drug':
            if query["query_graph"]['nodes'][subject][
                    'category'] != BIOLINK_DRUG:
                sys.exit('Subject node must be \'category\' {}'.format(
                    BIOLINK_DRUG))
            gene_curie = query["query_graph"]['nodes'][obj]['id']
            if gene_curie not in self.curies[BIOLINK_GENE]:
                sys.exit('Invalid ENSEMBL Identifier. Must be ENSEMBL:<ID>')
            evidence['_{}'.format(gene_curie)] = 'True'

        # default survival time
        dynamic_targets.update(self.default_survival_target)
        truth_target = ('EFO:0000714', '{} {}'.format(
            self.default_survival_target["EFO:0000714"]["op"],
            self.default_survival_target["EFO:0000714"]["value"]))

        chp_query = Query(evidence=evidence,
                          targets=None,
                          dynamic_evidence=None,
                          dynamic_targets=dynamic_targets,
                          type='updating')
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        chp_query.query_id = query["query_id"] if 'query_id' in query else None
        return chp_query

    def _run_query(self, chp_query, query_type):
        """ Runs build BKB query to calculate probability of survival.
            A probability is returned to specificy survival time w.r.t a drug.
            Contributions for each gene are calculuated and classified under
            their true/false target assignments.
        """
        if query_type == 'gene':
            chp_query = self.dynamic_reasoner.run_query(chp_query,
                                                        bkb_type='drug')
        elif query_type == 'drug':
            chp_query = self.dynamic_reasoner.run_query(chp_query,
                                                        bkb_type='gene')
        chp_res_dict = chp_query.result.process_updates()
        chp_res_norm_dict = chp_query.result.process_updates(normalize=True)
        #chp_query.result.summary()
        chp_res_contributions = chp_query.result.process_inode_contributions()
        chp_query.truth_prob = max([
            0, chp_res_norm_dict[chp_query.truth_target[0]][
                chp_query.truth_target[1]]
        ])

        #print(chp_res_contributions)

        # Collect all source inodes and process patient hashes
        patient_contributions = defaultdict(lambda: defaultdict(int))
        for target, contrib_dict in chp_res_contributions.items():
            target_comp_name, target_state_name = target
            for inode, contrib in contrib_dict.items():
                comp_name, state_name = inode
                if '_Source_' in comp_name:
                    # Split source state name to get patient hashes
                    source_hashes_str = state_name.split('_')[-1]
                    source_hashes = [
                        int(source_hash)
                        for source_hash in source_hashes_str.split(',')
                    ]
                    hash_len = len(source_hashes)
                    # Process patient contributions
                    for _hash in source_hashes:
                        # Normalize to get relative contribution
                        patient_contributions[target][
                            _hash] += contrib / hash_len  #/ chp_res_dict[target_comp_name][target_state_name]

        # Now iterate through the patient data to translate patient contributions to drug/gene contributions
        wildcard_contributions = defaultdict(lambda: defaultdict(int))
        for target, patient_contrib_dict in patient_contributions.items():
            for patient, contrib in patient_contrib_dict.items():
                if query_type == 'gene':
                    for gene_curie in self.dynamic_reasoner.raw_patient_data[
                            patient]["gene_curies"]:
                        wildcard_contributions[gene_curie][target] += contrib
                elif query_type == 'drug':
                    for drug_curie in self.dynamic_reasoner.raw_patient_data[
                            patient]["drug_curies"]:
                        wildcard_contributions[drug_curie][target] += contrib

        # normalize gene contributions by the target and take relative difference
        for curie in wildcard_contributions.keys():
            truth_target_gene_contrib = 0
            nontruth_target_gene_contrib = 0
            for target, contrib in wildcard_contributions[curie].items():
                if target[0] == chp_query.truth_target[0] and target[
                        1] == chp_query.truth_target[1]:
                    truth_target_gene_contrib += contrib / chp_res_dict[
                        target[0]][target[1]]
                else:
                    nontruth_target_gene_contrib += contrib / chp_res_dict[
                        target[0]][target[1]]
            wildcard_contributions[curie][
                'relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib

        chp_query.report = None
        chp_query.wildcard_contributions = wildcard_contributions

        return chp_query

    def _construct_trapi_response(self, chp_query, query_type):
        # Get orginal query
        if len(self.init_query) == 1:
            query = self.init_query[0]
            query_id = None
        else:
            for _query in self.init_query:
                if _query["query_id"] == chp_query.query_id:
                    query = _query
                    query_id = query["query_id"]
                    break

        kg = copy.deepcopy(query["query_graph"])

        edge_bindings = {}
        node_bindings = {}

        # get edge subject, object, edge label and pop edge
        edge_key = list(kg['edges'].keys())[0]
        edge = kg['edges'][edge_key]
        edge_label = edge['predicate']
        subject = edge['subject']
        obj = edge['object']
        kg['edges'].pop(edge_key)

        # move curie to key
        non_wildcard_curie = kg['nodes'][obj].pop('id')
        kg['nodes'][non_wildcard_curie] = kg['nodes'].pop(obj)
        if query_type == 'gene':
            kg['nodes'][non_wildcard_curie]['name'] = self._get_curie_name(
                BIOLINK_DRUG, non_wildcard_curie)[0]
        elif query_type == 'drug':
            kg['nodes'][non_wildcard_curie]['name'] = self._get_curie_name(
                BIOLINK_GENE, non_wildcard_curie)[0]
        node_bindings[obj] = non_wildcard_curie

        # remove wildcard gene node from kg
        kg['nodes'].pop(subject)

        # Build relative contribution results and added associated edges into knowledge graph
        unsorted_wildcard_contributions = []
        for wildcard, contrib_dict in chp_query.wildcard_contributions.items():
            unsorted_wildcard_contributions.append(
                (contrib_dict['relative'], wildcard))
        sorted_wildcard_contributions = [
            (contrib, wildcard)
            for contrib, wildcard in sorted(unsorted_wildcard_contributions,
                                            key=lambda x: abs(x[0]),
                                            reverse=True)
        ]

        # add kg gene nodes and edges
        edge_count = 0
        node_count = 1
        results = []
        for contrib, wildcard in sorted_wildcard_contributions[:self.
                                                               max_results]:
            if query_type == 'gene':
                kg['nodes'][wildcard] = {
                    "name": self._get_curie_name(BIOLINK_GENE, wildcard)[0],
                    "category": BIOLINK_GENE
                }
                # add edge
                kg['edges']['kge{}'.format(edge_count)] = {
                    "predicate":
                    BIOLINK_CHEMICAL_TO_GENE_PREDICATE,
                    "subject":
                    wildcard,
                    "object":
                    non_wildcard_curie,
                    "attributes": [{
                        'name': 'Contribution',
                        'type': BIOLINK_CONTRIBUTION,
                        'value': contrib
                    }]
                }
            elif query_type == 'drug':
                kg['nodes'][wildcard] = {
                    "name": self._get_curie_name(BIOLINK_DRUG, wildcard)[0],
                    "category": BIOLINK_DRUG
                }
                # add edge
                kg['edges']['kge{}'.format(edge_count)] = {
                    "predicate":
                    BIOLINK_CHEMICAL_TO_GENE_PREDICATE,
                    "subject":
                    wildcard,
                    "object":
                    non_wildcard_curie,
                    "attributes": [{
                        'name': 'Contribution',
                        'type': BIOLINK_CONTRIBUTION,
                        'value': contrib
                    }]
                }
            # add to results
            node_binding = {
                obj: [{
                    'id': non_wildcard_curie
                }],
                subject: [{
                    'id': wildcard
                }]
            }
            edge_binding = {edge_key: [{'id': 'kge{}'.format(edge_count)}]}
            results.append({
                'node_bindings': node_binding,
                'edge_bindings': edge_binding
            })

            edge_count += 1
            node_count += 1

        # query response
        trapi_message = {
            'query_graph': query["query_graph"],
            'knowledge_graph': kg,
            'results': results
        }
        trapi_response = {'message': trapi_message}
        return query_id, trapi_response

Example #8

Show file

File: default_handler_mixin.py Project: NCATSTranslator/connections_hypothesis_provider

class DefaultHandlerMixin:
    def _setup_handler(self):
        # Only do the rest of this if a message is passed
        if self.messages is not None:
            # Setup messages
            self._setup_messages()

            # Instiatate Reasoners
            if 'default' in self.message_dict:
                if self.dynamic_reasoner is None:
                    self.dynamic_reasoner = ChpDynamicReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)
            if 'simple' in self.message_dict:
                if self.joint_reasoner is None:
                    self.joint_reasoner = ChpJointReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)

    def _setup_messages(self):
        self.message_dict = defaultdict(list)
        for message in self.messages:
            if self._is_simple_message(message):
                self.message_dict['simple'].append(message)
            else:
                self.message_dict['default'].append(message)

    def _is_simple_message(self, message):
        """ Check if this is a {0 or 1} drug, {0 or 1} gene, one outcome standard message.
        """
        _found_outcome = False
        _found_disease = False
        _found_gene = False
        _found_drug = False
        query_graph = message.query_graph
        for node_key, node in query_graph.nodes.items():
            if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY:
                # If we've already found the target and there's another phenotypic feature, then this isn't simple.
                if _found_outcome:
                    return False
                else:
                    _found_outcome = True
            if node.categories[0] == BIOLINK_DISEASE_ENTITY:
                # If we've already found disease and there's another disease, then this isn't simple.
                if _found_disease:
                    return False
                else:
                    _found_disease = True
            if node.categories[0] == BIOLINK_GENE_ENTITY:
                if _found_gene:
                    return False
                else:
                    _found_gene = True
            if node.categories[0] == BIOLINK_DRUG_ENTITY:
                if _found_drug:
                    return False
                else:
                    _found_drug = True
        return True

    def _extract_chp_query(self, message, message_type=None):
        # Initialize Chp Query
        chp_query = ChpQuery(reasoning_type='updating')
        # Ensure we are using all nodes/edges
        total_nodes = 0
        total_edges = 0

        query_graph = message.query_graph

        # get phenotype node
        targets = list()
        for node_key in query_graph.nodes.keys():
            node = query_graph.nodes[node_key]
            if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY:
                target_id = node_key
                total_nodes += 1

        survival_value = 970
        survival_operator = '>='

        # get disease node info and ensure only 1 disease:
        for node_key in query_graph.nodes.keys():
            node = query_graph.nodes[node_key]
            if node.categories[0] == BIOLINK_DISEASE_ENTITY:
                disease_id = node_key
                for edge_key in query_graph.edges.keys():
                    edge = query_graph.edges[edge_key]
                    if self.check_predicate_support(
                            edge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY
                    ) and edge.subject == disease_id and edge.object == target_id:
                        survival_time_constraint = edge.find_constraint(
                            name='survival_time')
                        if survival_time_constraint is not None:
                            survival_value = survival_time_constraint.value
                            survival_operator = survival_time_constraint.operator
                            if survival_operator == 'matches':
                                survival_operator = '=='
                        total_edges += 1
                total_nodes += 1
        # set BKB target
        chp_query.add_dynamic_target(node.ids[0], survival_operator,
                                     survival_value)
        truth_target = (node.ids[0], '{} {}'.format(survival_operator,
                                                    survival_value))

        # get evidence
        for node_key in query_graph.nodes.keys():
            # genes
            node = query_graph.nodes[node_key]
            if node.categories[0] == BIOLINK_GENE_ENTITY:
                # check for appropriate gene node structure
                gene_id = node_key
                for edge_key in query_graph.edges.keys():
                    edge = query_graph.edges[edge_key]
                    if self.check_predicate_support(
                            edge.predicates[0],
                            BIOLINK_GENE_ASSOCIATED_WITH_CONDITION_ENTITY
                    ) and edge.subject == gene_id and edge.object == disease_id:
                        total_edges += 1
                # check for appropriate gene node curie
                gene_curie = node.ids[0]
                gene = gene_curie
                chp_query.add_meta_evidence(gene, 'True')
                total_nodes += 1
            # drugs
            if node.categories[0] == BIOLINK_DRUG_ENTITY:
                # check for appropriate drug node structure
                drug_id = node_key
                for edge_key in query_graph.edges.keys():
                    edge = query_graph.edges[edge_key]
                    if self.check_predicate_support(
                            edge.predicates[0], BIOLINK_TREATS_ENTITY
                    ) and edge.subject == drug_id and edge.object == disease_id:
                        total_edges += 1
                # check for appropriate drug node curie
                drug_curie = node.ids[0]
                drug = drug_curie
                chp_query.add_dynamic_evidence(node.ids[0], '==', 'True')
                total_nodes += 1

        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        return chp_query

    def _run_query(self, chp_query, query_type):
        if query_type == 'simple':
            chp_query = self.joint_reasoner.run_query(chp_query)
            # If a probability was found for the target
            if len(chp_query.result) > 0:
                # If a probability was found for the truth target
                if chp_query.truth_target in chp_query.result:
                    total_unnormalized_prob = 0
                    for target, contrib in chp_query.result.items():
                        prob = max(0, contrib)
                        total_unnormalized_prob += prob
                    chp_query.truth_prob = max([
                        0, chp_query.result[(chp_query.truth_target)]
                    ]) / total_unnormalized_prob
                else:
                    chp_query.truth_prob = 0
            else:
                chp_query.truth_prob = -1
            chp_query.report = None
        else:
            chp_query = self.dynamic_reasoner.run_query(chp_query)
            chp_res_dict = chp_query.result.process_updates(normalize=True)
            try:
                chp_query.truth_prob = max([
                    0, chp_res_dict[chp_query.truth_target[0]][
                        chp_query.truth_target[1]]
                ])
            except KeyError:
                # May need to come back and fix this.
                chp_query.truth_prob = -1

            chp_query.report = None
        return chp_query

    def _construct_trapi_message(self, chp_query, message, query_type=None):

        # update target node info and form edge pair combos for results graph

        qg = message.query_graph
        kg = message.knowledge_graph
        node_bindings = {}
        for qnode_key, qnode in qg.nodes.items():
            if qnode.categories[0] == BIOLINK_GENE_ENTITY:
                knode_key = kg.add_node(
                    qnode.ids[0],
                    self.curies[BIOLINK_GENE_ENTITY.get_curie()][qnode.ids[0]]
                    [0],
                    qnode.categories[0].get_curie(),
                )
            elif qnode.categories[0] == BIOLINK_DRUG_ENTITY:
                knode_key = kg.add_node(
                    qnode.ids[0],
                    self.curies[BIOLINK_DRUG_ENTITY.get_curie()][qnode.ids[0]]
                    [0],
                    qnode.categories[0].get_curie(),
                )
            else:
                knode_key = kg.add_node(
                    qnode.ids[0],
                    qnode.ids[0],
                    qnode.categories[0].get_curie(),
                )
            node_bindings[qnode_key] = [knode_key]

        edge_bindings = {}
        for qedge_key, qedge in qg.edges.items():
            kedge_key = kg.add_edge(
                node_bindings[qedge.subject][0],
                node_bindings[qedge.object][0],
                predicate=qedge.predicates[0].get_curie(),
                relation=qedge.relation,
            )
            edge_bindings[qedge_key] = [kedge_key]
            # Add Attribute
            if self.check_predicate_support(qedge.predicates[0],
                                            BIOLINK_HAS_PHENOTYPE_ENTITY):
                kg.edges[kedge_key].add_attribute(
                    attribute_type_id='Probability of Survival',
                    value=chp_query.truth_prob,
                    value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY.
                    get_curie(),
                )
        # Proces results
        message.results.add_result(
            node_bindings,
            edge_bindings,
        )
        return message

Example #9

Show file

File: test_reasoner.py Project: ehinderer/chp

 def setUp(self):
     self.bkb_handler = BkbDataHandler(
         bkb_major_version='coulomb',
         bkb_minor_version='1.0',
     )
     self.dynamic_reasoner = ChpDynamicReasoner(self.bkb_handler)

Example #10

Show file

File: test_reasoner.py Project: ehinderer/chp

class TestDynamicReasoner(unittest.TestCase):
    def setUp(self):
        self.bkb_handler = BkbDataHandler(
            bkb_major_version='coulomb',
            bkb_minor_version='1.0',
        )
        self.dynamic_reasoner = ChpDynamicReasoner(self.bkb_handler)

    def test_dynamic_reasoner_one_gene(self):
        # Specify evidence
        evidence = {'_ENSEMBL:ENSG00000155657': 'True'}
        # Specify targets
        dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}}
        # Setup query
        query = Query(evidence=evidence, dynamic_targets=dynamic_targets)
        query = self.dynamic_reasoner.run_query(query)
        query.result.summary(include_contributions=False)

    def test_dynamic_reasoner_one_gene_one_drug(self):
        # Specify evidence
        evidence = {
            '_ENSEMBL:ENSG00000155657': 'True',
            'CHEMBL:CHEMBL83': 'True',
        }
        # Specify targets
        dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}}
        # Setup query
        query = Query(evidence=evidence, dynamic_targets=dynamic_targets)
        query = self.dynamic_reasoner.run_query(query)
        query.result.summary(include_contributions=False)

    def test_dynamic_reasoner_two_gene_one_drug(self):
        # Specify evidence
        evidence = {
            '_ENSEMBL:ENSG00000155657': 'True',
            '_ENSEMBL:ENSG00000241973': 'True',
            'CHEMBL:CHEMBL83': 'True',
        }
        # Specify targets
        dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}}
        # Setup query
        query = Query(evidence=evidence, dynamic_targets=dynamic_targets)
        query = self.dynamic_reasoner.run_query(query)
        query.result.summary(include_contributions=False)

    def test_dynamic_reasoner_one_drug_survival(self):
        # Specify evidence
        evidence = {
            '_CHEMBL:CHEMBL83': 'True',
        }
        # Specify targets
        dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}}
        # Setup query
        query = Query(evidence=evidence, dynamic_targets=dynamic_targets)
        query = self.dynamic_reasoner.run_query(query, bkb_type='drug')
        query.result.summary(include_contributions=False)

    def test_dynamic_reasoner_two_drug_survival(self):
        # Specify evidence
        evidence = {
            '_CHEMBL:CHEMBL83': 'True',
            '_CHEMBL:CHEMBL1201247': 'True',
        }
        # Specify targets
        dynamic_targets = {"EFO:0000714": {"op": '>=', "value": 1000}}
        # Setup query
        query = Query(evidence=evidence, dynamic_targets=dynamic_targets)
        query = self.dynamic_reasoner.run_query(query, bkb_type='drug')
        query.result.summary(include_contributions=False)

Example #11

Show file

class DefaultHandlerMixin:
    def _setup_handler(self):
        # Only do the rest of this if a query is passed
        if self.init_query is not None:
            # Setup queries
            self._setup_queries()

            # Instiatate Reasoners
            if 'default' in self.query_dict:
                if self.dynamic_reasoner is None:
                    self.dynamic_reasoner = ChpDynamicReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)
            if 'simple' in self.query_dict:
                if self.joint_reasoner is None:
                    self.joint_reasoner = ChpJointReasoner(
                        bkb_handler=self.bkb_data_handler,
                        hosts_filename=self.hosts_filename,
                        num_processes_per_host=self.num_processes_per_host)

    def _setup_queries(self):
        if type(self.init_query) == list:
            self.query_dict = defaultdict(list)
            self.query_map = []
            for query in self.init_query:
                self.query_map.append(query["query_id"])
                if self._is_simple_query(query):
                    self.query_dict['simple'].append(
                        self._setup_single_query(query))
                else:
                    self.query_dict['default'].append(
                        self._setup_single_query(query))
        else:
            if self._is_simple_query(self.init_query):
                self.query_dict = {
                    "simple": [self._setup_single_query(self.init_query)]
                }
            else:
                self.query_dict = {
                    "default": [self._setup_single_query(self.init_query)]
                }

    def _is_simple_query(self, query):
        """ Check if this is a {0 or 1} drug, {0 or 1} gene, one outcome standard query.
        """
        _found_outcome = False
        _found_disease = False
        _found_gene = False
        _found_drug = False
        for node_key, node in query["query_graph"]["nodes"].items():
            if node["category"] == BIOLINK_PHENOTYPIC_FEATURE:
                # If we've already found the target and there's another phenotypic feature, then this isn't simple.
                if _found_outcome:
                    return False
                else:
                    _found_outcome = True
            if node['category'] == BIOLINK_DISEASE:
                # If we've already found disease and there's another disease, then this isn't simple.
                if _found_disease:
                    return False
                else:
                    _found_disease = True
            if node["category"] == BIOLINK_GENE:
                if _found_gene:
                    return False
                else:
                    _found_gene = True
            if node['category'] == BIOLINK_DRUG:
                if _found_drug:
                    return False
                else:
                    _found_drug = True
        return True

    def _extract_chp_query(self, query, query_type=None):
        evidence = {}
        targets = []
        dynamic_evidence = {}
        dynamic_targets = {}
        # ensure we are using all nodes/edges
        total_nodes = 0
        total_edges = 0

        # get phenotype node
        targets = list()
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_PHENOTYPIC_FEATURE:
                target_id = node_key
                total_nodes += 1

        # get disease node info and ensure only 1 disease:
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_DISEASE:
                disease_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge[
                            'subject'] == disease_id and edge[
                                'object'] == target_id:
                        if 'properties' in edge.keys():
                            days = edge['properties']['days']
                            qualifier = edge['properties']['qualifier']
                        else:
                            days = 970
                            qualifier = '>='
                        total_edges += 1
                total_nodes += 1
        # set BKB target
        dynamic_targets[node["id"]] = {
            "op": qualifier,
            "value": days,
        }
        truth_target = (node["id"], '{} {}'.format(qualifier, days))

        # get evidence
        for node_key in query["query_graph"]['nodes'].keys():
            # genes
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_GENE:
                # check for appropriate gene node structure
                gene_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge[
                            'subject'] == gene_id and edge[
                                'object'] == disease_id:
                        total_edges += 1
                # check for appropriate gene node curie
                gene_curie = node['id']
                gene = gene_curie
                evidence["_" + gene] = 'True'
                total_nodes += 1
            # drugs
            if node['category'] == BIOLINK_DRUG:
                # check for appropriate drug node structure
                drug_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge[
                            'subject'] == drug_id and edge[
                                'object'] == disease_id:
                        total_edges += 1
                # check for appropriate drug node curie
                drug_curie = node['id']
                drug = drug_curie
                evidence[node["id"]] = 'True'
                total_nodes += 1

        # produce BKB query
        chp_query = Query(evidence=evidence,
                          targets=targets,
                          dynamic_evidence=dynamic_evidence,
                          dynamic_targets=dynamic_targets,
                          type='updating')
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        chp_query.query_id = query["query_id"] if 'query_id' in query else None
        return chp_query

    def _run_query(self, chp_query, query_type):
        if query_type == 'simple':
            chp_query = self.joint_reasoner.run_query(chp_query)
            # If a probability was found for the target
            if len(chp_query.result) > 0:
                # If a probability was found for the truth target
                if chp_query.truth_target in chp_query.result:
                    total_unnormalized_prob = 0
                    for target, contrib in chp_query.result.items():
                        prob = max(0, contrib)
                        total_unnormalized_prob += prob
                    chp_query.truth_prob = max([
                        0, chp_query.result[(chp_query.truth_target)]
                    ]) / total_unnormalized_prob
                else:
                    chp_query.truth_prob = 0
            else:
                chp_query.truth_prob = -1
            chp_query.report = None
        else:
            chp_query = self.dynamic_reasoner.run_query(chp_query)
            chp_res_dict = chp_query.result.process_updates(normalize=True)
            chp_query.truth_prob = max([
                0, chp_res_dict[chp_query.truth_target[0]][
                    chp_query.truth_target[1]]
            ])
            chp_query.report = None
        return chp_query

    def _construct_trapi_response(self, chp_query, query_type=None):
        # Get orginal query
        if len(self.init_query) == 1:
            query = self.init_query[0]
            query_id = None
        else:
            for _query in self.init_query:
                if _query["query_id"] == chp_query.query_id:
                    query = _query
                    query_id = query["query_id"]
                    break

        kg = copy.deepcopy(query["query_graph"])
        # update target node info and form edge pair combos for results graph

        node_pairs = dict()
        for node_key in list(kg['nodes'].keys())[:]:
            qg_node_curie = kg['nodes'][node_key].pop('id')
            kg['nodes'][qg_node_curie] = kg['nodes'].pop(node_key)
            node_pairs[node_key] = qg_node_curie
            if kg['nodes'][qg_node_curie]['category'] == BIOLINK_GENE:
                kg['nodes'][qg_node_curie]['name'] = self._get_curie_name(
                    BIOLINK_GENE, qg_node_curie)[0]
            elif kg['nodes'][qg_node_curie]['category'] == BIOLINK_DRUG:
                kg['nodes'][qg_node_curie]['name'] = self._get_curie_name(
                    BIOLINK_DRUG, qg_node_curie)[0]

        edge_pairs = dict()
        knowledge_edges = 0
        for edge_key in list(kg['edges'].keys())[:]:
            kg_id = 'kge{}'.format(knowledge_edges)
            knowledge_edges += 1
            kg['edges'][kg_id] = kg['edges'].pop(edge_key)
            kg['edges'][kg_id]['subject'] = node_pairs[kg['edges'][kg_id]
                                                       ['subject']]
            kg['edges'][kg_id]['object'] = node_pairs[kg['edges'][kg_id]
                                                      ['object']]
            edge_pairs[edge_key] = kg_id
            if kg['edges'][kg_id][
                    'predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE:
                if 'properties' in kg['edges'][kg_id].keys():
                    kg['edges'][kg_id].pop('properties')
                kg['edges'][kg_id]['attributes'] = [{
                    'name':
                    'Probability of Survival',
                    'type':
                    BIOLINK_PROBABILITY,
                    'value':
                    chp_query.truth_prob
                }]

        results = []
        results.append({
            'edge_bindings': {},
            'node_bindings': {},
        })
        for edge_pair_key in edge_pairs:
            results[0]['edge_bindings'][edge_pair_key] = [{
                'id':
                edge_pairs[edge_pair_key]
            }]
        for node_pair_key in node_pairs:
            results[0]['node_bindings'][node_pair_key] = [{
                'id':
                node_pairs[node_pair_key]
            }]

        # query response
        trapi_message = {
            'query_graph': query["query_graph"],
            'knowledge_graph': kg,
            'results': results
        }
        trapi_response = {'message': trapi_message}
        return query_id, trapi_response

Example #12

Show file

File: test_trapi_interface.py Project: NCATSTranslator/connections_hypothesis_provider

    def setUpClass(cls):
        super(TestBaseHandler, cls).setUpClass()

        cls.bkb_handler = BkbDataHandler()
        cls.dynamic_reasoner = ChpDynamicReasoner(cls.bkb_handler)
        cls.joint_reasoner = ChpJointReasoner(cls.bkb_handler)

Example #13

Show file

 def setUpClass(cls):
     super(TestDynamicReasoner, cls).setUpClass()
     cls.bkb_handler = BkbDataHandler(disease='tcga_brca')
     cls.dynamic_reasoner = ChpDynamicReasoner(cls.bkb_handler)

Example #14

Show file

File: wildcard_handler_mixin.py Project: NCATSTranslator/connections_hypothesis_provider

class WildCardHandlerMixin:
    def _setup_handler(self):
        # Only do the rest of this if a query is passed
        if self.messages is not None:
            # Setup messages
            self._setup_messages()

            # Instiatate Reasoners
            if self.dynamic_reasoner is None:
                self.dynamic_reasoner = ChpDynamicReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)

    def _setup_messages(self):
        if type(self.messages) == list:
            self.message_dict = defaultdict(list)
            for message in self.messages:
                self.message_dict[self._get_wildcard_type(message)].append(message)

    def _get_wildcard_type(self, message):
        wildcard_type = None
        for node_id, node in message.query_graph.nodes.items():
            if node.ids is None:
                if wildcard_type is None:
                    wildcard_type = node.categories[0]
        if wildcard_type == BIOLINK_DRUG_ENTITY:
            return 'drug'
        elif wildcard_type == BIOLINK_GENE_ENTITY:
            return 'gene'
        else:
            raise ValueError('Did not understand wildcard type {}.'.format(wildcard_type))

    def _extract_chp_query(self, message, message_type):
        # Initialize CHP BKB Query
        chp_query = ChpQuery(reasoning_type='updating')
        # ensure we are using all nodes/edges
        total_nodes = 0
        total_edges = 0

        query_graph = message.query_graph
        # get phenotype node
        targets = list()
        acceptable_target_curies = ['EFO:0000714']
        self.implicit_survival_node = False
        for node_key in query_graph.nodes.keys():
            node = query_graph.nodes[node_key]
            if node.categories[0] == BIOLINK_PHENOTYPIC_FEATURE_ENTITY and node.ids[0] in acceptable_target_curies:
                target_id = node_key
                total_nodes += 1
        if total_nodes == 0:
            # Use Default Survival
            self.implicit_survival_node = True
            total_nodes += 1
            #acceptable_target_curies_print = ','.join(acceptable_target_curies)
            #sys.exit("Survival Node not found. Node category must be '{}' and id must be in: {}".format(Biolink(BIOLINK_PHENOTYPIC_FEATURE),
            #                                                                                            acceptable_target_curies_print))

        survival_value = 970
        survival_operator = '>='
        # get disease node info and ensure only 1 disease:
        acceptable_disease_curies = ['MONDO:0007254']
        for node_key in query_graph.nodes.keys():
            node = query_graph.nodes[node_key]
            if node.categories[0] == BIOLINK_DISEASE_ENTITY and node.ids[0] in acceptable_disease_curies:
                disease_id = node_key
                for edge_key in query_graph.edges.keys():
                    edge = query_graph.edges[edge_key]
                    if self.check_predicate_support(edge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY) and edge.subject == disease_id and edge.object == target_id:
                        survival_time_constraint = edge.find_constraint(name='survival_time')
                        if survival_time_constraint is not None:
                            survival_value = survival_time_constraint.value
                            survival_operator = survival_time_constraint.operator
                            if survival_operator == 'matches':
                                survival_operator = '=='
                        total_edges += 1
                total_nodes += 1

        if self.implicit_survival_node:
            days=970
            qualifier = '>='
            total_edges += 1

        # set BKB target
        chp_query.add_dynamic_target('EFO:0000714', survival_operator, survival_value)
        truth_target = ('EFO:0000714', '{} {}'.format(survival_operator, survival_value))

        # get evidence
        for node_key in query_graph.nodes.keys():
            # genes
            node = query_graph.nodes[node_key]
            if node.categories[0] == BIOLINK_GENE_ENTITY:
                # check for appropriate gene node structure
                gene_id = node_key
                for edge_key in query_graph.edges.keys():
                    edge = query_graph.edges[edge_key]
                    if self.check_predicate_support(edge.predicates[0], BIOLINK_GENE_ASSOCIATED_WITH_CONDITION_ENTITY) and edge.subject == gene_id and edge.object == disease_id:
                        total_edges += 1
                # check for appropriate gene node curie
                if message_type != 'gene':
                    gene_curie = node.ids[0]
                    if gene_curie in self.curies[BIOLINK_GENE_ENTITY.get_curie()]:
                        gene = gene_curie
                    chp_query.add_meta_evidence(gene, 'True')
                total_nodes += 1
            # drugs
            if node.categories[0] == BIOLINK_DRUG_ENTITY:
                # check for appropriate drug node structure
                drug_id = node_key
                for edge_key in query_graph.edges.keys():
                    edge = query_graph.edges[edge_key]
                    if self.check_predicate_support(edge.predicates[0], BIOLINK_TREATS_ENTITY) and edge.subject == drug_id and edge.object == disease_id:
                        total_edges += 1
                # check for appropriate drug node curie
                if message_type != 'drug':
                    drug_curie = node.ids[0]
                    if drug_curie in self.curies[BIOLINK_DRUG_ENTITY.get_curie()]:
                        drug = drug_curie
                    chp_query.add_meta_evidence(drug, 'True')
                total_nodes += 1

        # Temporary solution to no evidence linking
        if len(chp_query.evidence.keys()) == 0 and len(chp_query.dynamic_evidence.keys()) == 0:
            self.no_evidence_probability_check = True
        else:
            self.no_evidence_probability_check = False

        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        return chp_query

    def _run_query(self, chp_query, query_type):
        """ Runs build BKB query to calculate probability of survival.
            A probability is returned to specificy survival time w.r.t a drug.
            Contributions for each gene are calculuated and classified under
            their true/false target assignments.
        """

        # temporary solution to no evidence linking
        if not self.no_evidence_probability_check:
            if query_type == 'gene':
                chp_query = self.dynamic_reasoner.run_query(chp_query, bkb_type='drug')
            elif query_type == 'drug':
                chp_query = self.dynamic_reasoner.run_query(chp_query, bkb_type='gene')
            chp_res_dict = chp_query.result.process_updates()
            chp_res_norm_dict = chp_query.result.process_updates(normalize=True)
            #chp_query.result.summary()
            chp_res_contributions = chp_query.result.process_inode_contributions()
            try:
                chp_query.truth_prob = max([0, chp_res_dict[chp_query.truth_target[0]][chp_query.truth_target[1]]])
            except KeyError:
                # May need to come back and fix this.
                chp_query.truth_prob = -1

            # Collect all source inodes and process patient hashes
            patient_contributions = defaultdict(lambda: defaultdict(int))
            for target, contrib_dict in chp_res_contributions.items():
                target_comp_name, target_state_name = target
                for inode, contrib in contrib_dict.items():
                    comp_name, state_name = inode
                    if '_Source_' in comp_name:
                        # Split source state name to get patient hashes
                        source_hashes_str = state_name.split('_')[-1]
                        source_hashes = [int(source_hash) for source_hash in source_hashes_str.split(',')]
                        hash_len = len(source_hashes)
                        # Process patient contributions
                        for _hash in source_hashes:
                            # Normalize to get relative contribution
                            patient_contributions[target][_hash] += contrib/hash_len #/ chp_res_dict[target_comp_name][target_state_name]

        else:
            # probability of survival
            num_survived = 0
            num_all = len(self.dynamic_reasoner.raw_patient_data.keys())
            str_op = chp_query.dynamic_targets['EFO:0000714']['op']
            opp_op = get_opposite_operator(str_op)
            op = get_operator(str_op)
            days = chp_query.dynamic_targets['EFO:0000714']['value']
            for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items():
                if op(pat_dict['survival_time'], days):
                    num_survived += 1
            chp_query.truth_prob = num_survived/num_all

            # patient_contributions
            patient_contributions = defaultdict(lambda: defaultdict(int))
            for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items():
                if op(pat_dict['survival_time'], days):
                    if num_survived == 0:
                        patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = 0
                    else:
                        patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = chp_query.truth_prob/num_survived
                else:
                    if num_survived == 0:
                        patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/num_all
                    else:
                        patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/(num_all-num_survived)

        # Now iterate through the patient data to translate patient contributions to drug/gene contributions
        wildcard_contributions = defaultdict(lambda: defaultdict(int))
        for target, patient_contrib_dict in patient_contributions.items():
            for patient, contrib in patient_contrib_dict.items():
                if query_type == 'gene':
                    for gene_curie in self.dynamic_reasoner.raw_patient_data[patient]["gene_curies"]:
                        wildcard_contributions[gene_curie][target] += contrib
                elif query_type == 'drug':
                    for drug_curie in self.dynamic_reasoner.raw_patient_data[patient]["drug_curies"]:
                        wildcard_contributions[drug_curie][target] += contrib

        # normalize gene contributions by the target and take relative difference
        for curie in wildcard_contributions.keys():
            truth_target_gene_contrib = 0
            nontruth_target_gene_contrib = 0
            for target, contrib in wildcard_contributions[curie].items():
                if target[0] == chp_query.truth_target[0] and target[1] == chp_query.truth_target[1]:
                    truth_target_gene_contrib += contrib / chp_query.truth_prob
                else:
                    nontruth_target_gene_contrib += contrib / (1 - chp_query.truth_prob)
            wildcard_contributions[curie]['relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib

        chp_query.report = None
        chp_query.wildcard_contributions = wildcard_contributions

        return chp_query

    def _construct_trapi_message(self, chp_query, message, query_type=None):

        # update target node info and form edge pair combos for results graph

        qg = message.query_graph
        kg = message.knowledge_graph

        # Process Standard QUery as first result.
        # Process Nodes
        node_bindings = {}
        contrib_qg_id = None
        for qnode_key, qnode in qg.nodes.items():
            if qnode.ids is not None:
                if qnode.categories[0] == BIOLINK_GENE_ENTITY:
                    knode_key = kg.add_node(
                            qnode.ids[0],
                            self.curies[BIOLINK_GENE_ENTITY.get_curie()][qnode.ids[0]][0],
                            qnode.categories[0].get_curie(),
                            )
                elif qnode.categories[0] == BIOLINK_DRUG_ENTITY:
                    knode_key = kg.add_node(
                            qnode.ids[0],
                            self.curies[BIOLINK_DRUG_ENTITY.get_curie()][qnode.ids[0]][0],
                            qnode.categories[0].get_curie(),
                            )
                else:
                    knode_key = kg.add_node(
                            qnode.ids[0],
                            qnode.ids[0],
                            qnode.categories[0].get_curie(),
                            )
                node_bindings[qnode_key] = [knode_key]
        if not self.implicit_survival_node:
            # Process Edges
            edge_bindings = {}
            knowledge_edges = 0
            for qedge_key, qedge in qg.edges.items():
                if not qedge.subject in node_bindings or not qedge.object in node_bindings:
                    continue
                kedge_key = kg.add_edge(
                        node_bindings[qedge.subject][0],
                        node_bindings[qedge.object][0],
                        predicate=qedge.predicates[0].get_curie(),
                        relation=qedge.relation,
                        )
                edge_bindings[qedge_key] = [kedge_key]
                # Add Attribute
                if self.check_predicate_support(qedge.predicates[0], BIOLINK_HAS_PHENOTYPE_ENTITY):
                    kg.edges[kedge_key].add_attribute(
                            attribute_type_id='Probability of Survival',
                            value=chp_query.truth_prob,
                            value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY.get_curie(),
                            )
                '''
                subject_node = kg['edges'][edge_key]['subject']
                if kg['edges'][edge_key]['predicate'] == BIOLINK_GENE_ENTITY_TO_DISEASE_PREDICATE, is_slot=True) and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_GENE_ENTITY and query_type == 'gene':
                    kg['edges'].pop(edge_key)
                elif kg['edges'][edge_key]['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE, is_slot=True) and query['query_graph']['nodes'][subject_node]['category'] == BIOLINK_DRUG_ENTITY and query_type == 'drug':
                    kg['edges'].pop(edge_key)
                else:
                    kg_id = 'kge{}'.format(knowledge_edges)
                    knowledge_edges += 1
                    kg['edges'][kg_id] = kg['edges'].pop(edge_key)
                    kg['edges'][kg_id]['subject'] = node_pairs[kg['edges'][kg_id]['subject']]
                    kg['edges'][kg_id]['object'] = node_pairs[kg['edges'][kg_id]['object']]
                    edge_pairs[edge_key] = kg_id
                    if kg['edges'][kg_id]['predicate'] == BIOLINK_DISEASE_ENTITY_TO_PHENOTYPIC_FEATURE_PREDICATE, is_slot=True):
                        if 'properties' in kg['edges'][kg_id].keys():
                            kg['edges'][kg_id].pop('properties')
                        kg['edges'][kg_id]['attributes'] = [{'name':'Probability of Survival',
                                                             'type':BIOLINK_PROBABILITY,
                                                             'value':chp_query.truth_prob}]
                '''
            # Proces results
            message.results.add_result(
                    node_bindings,
                    edge_bindings,
                    )
            '''
            # Put first result of standard prob query of only curie nodes (i.e. no wildcard nodes where used as evidence)
            results = []
            results.append({'edge_bindings':dict(),
                            'node_bindings':dict()})
            for edge_pair_key in edge_pairs:
                results[0]['edge_bindings'][edge_pair_key] = [{ 'id': str(edge_pairs[edge_pair_key])}]
            for node_pair_key in node_pairs:
                results[0]['node_bindings'][node_pair_key] = [{ 'id': str(node_pairs[node_pair_key])}]
            '''
        #else:
        #    knowledge_edges = 0
        #    kg['edges'] = {}
        #    results = []

        # Build relative contribution results and added associated edges into knowledge graph
        unsorted_wildcard_contributions = []
        for wildcard, contrib_dict in chp_query.wildcard_contributions.items():
            unsorted_wildcard_contributions.append((contrib_dict['relative'], wildcard))
        sorted_wildcard_contributions = [(contrib,wildcard) for contrib, wildcard in sorted(unsorted_wildcard_contributions, key=lambda x: abs(x[0]), reverse=True)]

        for contrib, wildcard in sorted_wildcard_contributions[:self.max_results]:
            #TODO: Fix this!
            if wildcard == 'missing':
                continue
            #rg = copy.deepcopy(query["query_graph"])
            _node_bindings = {}
            _edge_bindings = {}
            # Process node bindings
            bad_wildcard = False
            for qnode_id, qnode in qg.nodes.items():
                if qnode.categories[0] == BIOLINK_GENE_ENTITY and query_type == 'gene':
                    try:
                        knode_id = kg.add_node(
                                wildcard,
                                self.curies[BIOLINK_GENE_ENTITY.get_curie()][wildcard][0],
                                qnode.categories[0].get_curie(),
                                )
                        _node_bindings[qnode_id] = [knode_id]
                    except KeyError:
                        logger.info("Couldn't find {} in curies[{}]".format(wildcard, BIOLINK_GENE_ENTITY.get_curie()))
                        bad_wildcard = True
                elif qnode.categories[0] == BIOLINK_DRUG_ENTITY and query_type == 'drug':
                    knode_id = kg.add_node(
                            wildcard,
                            self.curies[BIOLINK_DRUG_ENTITY.get_curie()][wildcard][0],
                            qnode.categories[0].get_curie(),
                            )
                    _node_bindings[qnode_id] = [knode_id]
                else:
                    _node_bindings[qnode_id] = node_bindings[qnode_id]
            if bad_wildcard:
                continue
            # Process edge bindings
            for qedge_id, qedge in qg.edges.items():
                subject_node = qedge.subject
                object_node = qedge.object
                if query_type == 'gene' and self.check_predicate_support(qedge.predicates[0], BIOLINK_GENE_ASSOCIATED_WITH_CONDITION_ENTITY) and qg.nodes[subject_node].categories[0] == BIOLINK_GENE_ENTITY:
                    kedge_id = kg.add_edge(
                            _node_bindings[qedge.subject][0],
                            _node_bindings[qedge.object][0],
                            predicate=qedge.predicates[0],
                            relation=qedge.relation,
                            )
                    kg.edges[kedge_id].add_attribute(
                            attribute_type_id='Contribution',
                            value=contrib,
                            value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(),
                            )
                    _edge_bindings[qedge_id] = [kedge_id]
                elif query_type == 'gene' and self.check_predicate_support(qedge.predicates[0], BIOLINK_CONDITION_ASSOCIATED_WITH_GENE_ENTITY) and qg.nodes[object_node].categories[0] == BIOLINK_GENE_ENTITY:
                    kedge_id = kg.add_edge(
                            _node_bindings[qedge.subject][0],
                            _node_bindings[qedge.object][0],
                            predicate=qedge.predicates[0],
                            relation=qedge.relation,
                            )
                    kg.edges[kedge_id].add_attribute(
                            attribute_type_id='Contribution',
                            value=contrib,
                            value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(),
                            )
                    _edge_bindings[qedge_id] = [kedge_id]
                elif query_type == 'drug' and self.check_predicate_support(qedge.predicates[0], BIOLINK_TREATS_ENTITY) and qg.nodes[subject_node].categories[0] == BIOLINK_DRUG_ENTITY:
                    kedge_id = kg.add_edge(
                            _node_bindings[qedge.subject][0],
                            _node_bindings[qedge.object][0],
                            predicate=qedge.predicates[0],
                            relation=qedge.relation,
                            )
                    kg.edges[kedge_id].add_attribute(
                            attribute_type_id='Contribution',
                            value=contrib,
                            value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(),
                            )
                    _edge_bindings[qedge_id] = [kedge_id]
                elif query_type == 'drug' and self.check_predicate_support(qedge.predicates[0], BIOLINK_TREATED_BY_ENTITY) and qg.nodes[object_node].categories[0] == BIOLINK_DRUG_ENTITY:
                    kedge_id = kg.add_edge(
                            _node_bindings[qedge.subject][0],
                            _node_bindings[qedge.object][0],
                            predicate=qedge.predicates[0],
                            relation=qedge.relation,
                            )
                    kg.edges[kedge_id].add_attribute(
                            attribute_type_id='Contribution',
                            value=contrib,
                            value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(),
                            )
                    _edge_bindings[qedge_id] = [kedge_id]
                else:
                    _edge_bindings[qedge_id] = edge_bindings[qedge_id]
            # Process node and edge binding results
            message.results.add_result(
                    _node_bindings,
                    _edge_bindings,
                    )
        return message

Example #15

Show file

File: one_hop_handler_mixin.py Project: di2ag/chp

class OneHopHandlerMixin:
    """ OneHopeHandler is the handler for 1-hop queries. That is
        query graphs (QGs) that consists of 2 nodes and a single edge.

        :param query: the query graph sent by the ARA.
        :type query: dict
        :param hosts_filename: a filename for a stored QG. Defaults to None
        :type hosts_filename: str
        :param num_processes_per_host: Not implemented thouroughly, but would be
            used for distributed reasoning.
        :type num_processes_per_host: int
        :param max_results: specific to 1-hop queries, specifies the number of
            wildcard genes to return.
        :type max_results: int
    """
    def _setup_handler(self):
        self.default_survival_target = {
            "EFO:0000714": {
                "op": '>=',
                "value": 970
            }
        }

        # Only do the rest of this if a query is passed
        if self.messages is not None:
            # Setup queries
            self._setup_messages()

            # Instiatate Reasoners
            if self.dynamic_reasoner is None:
                self.dynamic_reasoner = ChpDynamicReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)
            if self.joint_reasoner is None:
                self.joint_reasoner = ChpJointReasoner(
                    bkb_handler=self.bkb_data_handler,
                    hosts_filename=self.hosts_filename,
                    num_processes_per_host=self.num_processes_per_host)

    def _setup_messages(self):
        self.message_dict = defaultdict(list)
        for message in self.messages:
            self.message_dict[self._get_onehop_type(message)].append(message)

    def _get_onehop_type(self, message):
        wildcard_type = None
        for node_id, node in message.query_graph.nodes.items():
            if node.ids is None:
                if wildcard_type is None:
                    wildcard_type = node.categories[0]
        # If standard onehop query
        if wildcard_type is None:
            return 'standard'
        elif wildcard_type == BIOLINK_DRUG_ENTITY:
            return 'drug'
        elif wildcard_type == BIOLINK_GENE_ENTITY:
            return 'gene'
        else:
            raise ValueError(
                'Did not understand wildcard type {}.'.format(wildcard_type))

    def check_query(self):
        """ Currently not implemented. Would check validity of query.
        """
        return True

    @staticmethod
    def _process_predicate_proxy(qedge):
        dynamic_targets = {}
        predicate_proxy_constraint = qedge.find_constraint('predicate_proxy')
        if predicate_proxy_constraint is None:
            predicate_proxy = get_default_predicate_proxy()
            proxy_constraint = qedge.find_constraint(predicate_proxy)
        else:
            predicate_proxy = predicate_proxy_constraint.value[0]
            proxy_constraint = qedge.find_constraint(predicate_proxy)
        if proxy_constraint is None:
            proxy_operator = get_default_operator(predicate_proxy)
            proxy_value = get_default_value(predicate_proxy)
        else:
            proxy_operator = proxy_constraint.operator
            proxy_value = proxy_constraint.value
        # Setup dynamic target
        dynamic_targets[predicate_proxy] = {
            "op": proxy_operator,
            "value": proxy_value,
        }
        return dynamic_targets

    @staticmethod
    def _process_predicate_context(qedge, message_type):
        evidence = {}
        dynamic_evidence = {}
        predicate_context_constraint = qedge.find_constraint(
            'predicate_context')
        if predicate_context_constraint is not None:
            for context in predicate_context_constraint.value:
                context_curie = get_biolink_entity(context)
                context_constraint = qedge.find_constraint(context)
                if context_constraint is None:
                    raise ValueError(
                        'Provided no context details for {}'.format(context))
                if context_curie == BIOLINK_GENE_ENTITY:
                    if message_type == 'gene':
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                dynamic_evidence[_curie] = {
                                    "op": '==',
                                    "value": 'True',
                                }
                        else:
                            dynamic_evidence[context_constraint.value] = {
                                "op": '==',
                                "value": 'True',
                            }
                    else:
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                evidence['_{}'.format(_curie)] = 'True'
                        else:
                            evidence['_{}'.format(_curie)] = 'True'
                elif context_curie == BIOLINK_DRUG_ENTITY:
                    if message_type == 'drug':
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                dynamic_evidence[_curie] = {
                                    "op": '==',
                                    "value": 'True',
                                }
                        else:
                            dynamic_evidence[context_constraint.value] = {
                                "op": '==',
                                "value": 'True',
                            }
                    else:
                        if type(context_constraint.value) is list:
                            for _curie in context_constraint.value:
                                evidence['_{}'.format(_curie)] = 'True'
                        else:
                            evidence['_{}'.format(_curie)] = 'True'
                else:
                    raise ValueError(
                        'Unsupported context type: {}'.format(context_curie))
        return evidence, dynamic_evidence

    def _extract_chp_query(self, message, message_type):
        evidence = {}
        dynamic_targets = {}
        dynamic_evidence = {}

        if message_type == 'standard':
            # Setup gene and drug evidence
            for qnode_id, qnode in message.query_graph.nodes.items():
                if qnode.categories[
                        0] == BIOLINK_GENE_ENTITY or qnode.categories[
                            0] == BIOLINK_DRUG_ENTITY:
                    evidence['_{}'.format(qnode.ids[0])] = 'True'
        elif message_type == 'gene':
            for qnode_id, qnode in message.query_graph.nodes.items():
                if qnode.categories[0] == BIOLINK_DRUG_ENTITY:
                    #dynamic_evidence[qnode.ids[0]] = {
                    #        "op": '==',
                    #        "value": 'True',
                    #        }
                    evidence['_{}'.format(qnode.ids[0])] = 'True'
        elif message_type == 'drug':
            for qnode_id, qnode in message.query_graph.nodes.items():
                if qnode.categories[0] == BIOLINK_GENE_ENTITY:
                    #dynamic_evidence[qnode.ids[0]] = {
                    #        "op": '==',
                    #        "value": 'True',
                    #        }
                    evidence['_{}'.format(qnode.ids[0])] = 'True'
        # Grab edge
        for qedge_id, qedge in message.query_graph.edges.items():
            break
        # Process predicate proxy
        dynamic_targets = self._process_predicate_proxy(qedge)
        # Process predicate context
        _evidence, _dynamic_evidence = self._process_predicate_context(
            qedge, message_type)
        evidence.update(_evidence)
        dynamic_evidence.update(_dynamic_evidence)
        #TODO: Probably need a more robust solution for when no context is provided in wildcard queries and you need it.
        #if len(evidence) == 0:
        #    raise ValueError('Did not supply context with a query that required context.')

        target = list(dynamic_targets.keys())[0]
        truth_target = (target,
                        '{} {}'.format(dynamic_targets[target]["op"],
                                       dynamic_targets[target]["value"]))
        chp_query = Query(evidence=evidence,
                          targets=None,
                          dynamic_evidence=dynamic_evidence,
                          dynamic_targets=dynamic_targets,
                          type='updating')
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        return chp_query

    def _run_query(self, chp_query, query_type):
        """ Runs build BKB query to calculate probability of survival.
            A probability is returned to specificy survival time w.r.t a drug.
            Contributions for each gene are calculuated and classified under
            their true/false target assignments.
        """
        if query_type == 'standard':
            chp_query = self.joint_reasoner.run_query(chp_query)
            # If a probability was found for the target
            if len(chp_query.result) > 0:
                # If a probability was found for the truth target
                if chp_query.truth_target in chp_query.result:
                    total_unnormalized_prob = 0
                    for target, contrib in chp_query.result.items():
                        prob = max(0, contrib)
                        total_unnormalized_prob += prob
                    chp_query.truth_prob = max([
                        0, chp_query.result[(chp_query.truth_target)]
                    ]) / total_unnormalized_prob
                else:
                    chp_query.truth_prob = 0
            else:
                chp_query.truth_prob = -1
            chp_query.report = None
            return chp_query
        else:
            # Do this if a disease node is present
            if len(chp_query.evidence) == 0:
                # probability of survival
                chp_query = self.joint_reasoner.run_query(chp_query)
                if len(chp_query.result) > 0:
                    # If a probability was found for the truth target
                    if chp_query.truth_target in chp_query.result:
                        total_unnormalized_prob = 0
                        for target, contrib in chp_query.result.items():
                            prob = max(0, contrib)
                            total_unnormalized_prob += prob
                        chp_query.truth_prob = max([
                            0, chp_query.result[(chp_query.truth_target)]
                        ]) / total_unnormalized_prob
                    else:
                        chp_query.truth_prob = 0
                else:
                    chp_query.truth_prob = -1

                # patient_contributions
                num_all = len(self.joint_reasoner.patient_data)
                num_matched = chp_query.truth_prob * num_all
                patient_contributions = defaultdict(lambda: defaultdict(int))
                for patient, feature_dict in self.joint_reasoner.patient_data.items(
                ):
                    for predicate_proxy, proxy_info in chp_query.dynamic_targets.items(
                    ):
                        proxy_op = get_operator(proxy_info["op"])
                        proxy_opp_op = get_opposite_operator(proxy_info["op"])
                        proxy_value = proxy_info["value"]
                        if proxy_op(feature_dict[predicate_proxy],
                                    proxy_value):
                            if num_matched == 0:
                                patient_contributions[(
                                    predicate_proxy,
                                    '{} {}'.format(proxy_op,
                                                   proxy_value))][patient] = 0
                            else:
                                patient_contributions[(
                                    predicate_proxy,
                                    '{} {}'.format(proxy_op, proxy_value)
                                )][patient] = chp_query.truth_prob / num_matched
                        else:
                            if num_matched == 0:
                                patient_contributions[(
                                    predicate_proxy,
                                    '{} {}'.format(proxy_opp_op, proxy_value)
                                )][patient] = (
                                    1 - chp_query.truth_prob) / num_matched
                            else:
                                patient_contributions[(
                                    predicate_proxy,
                                    '{} {}'.format(proxy_opp_op, proxy_value)
                                )][patient] = (1 - chp_query.truth_prob) / (
                                    num_all - num_matched)
                '''
                num_survived = 0
                num_all = len(self.dynamic_reasoner.raw_patient_data.keys())
                str_op = chp_query.dynamic_targets['EFO:0000714']['op']
                opp_op = get_opposite_operator(str_op)
                op = get_operator(str_op)
                days = chp_query.dynamic_targets['EFO:0000714']['value']
                for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items():
                    if op(pat_dict['survival_time'], days):
                        num_survived += 1
                chp_query.truth_prob = num_survived/num_all
                # patient_contributions
                patient_contributions = defaultdict(lambda: defaultdict(int))
                for patient, pat_dict in self.dynamic_reasoner.raw_patient_data.items():
                    if op(pat_dict['survival_time'], days):
                        if num_survived == 0:
                            patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = 0
                        else:
                            patient_contributions[('EFO:0000714', '{} {}'.format(str_op, days))][patient] = chp_query.truth_prob/num_survived
                    else:
                        if num_survived == 0:
                            patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/num_all
                        else:
                            patient_contributions[('EFO:0000714', '{} {}'.format(opp_op, days))][patient] = (1-chp_query.truth_prob)/(num_all-num_survived)
                '''
            else:
                if query_type == 'gene':
                    chp_query = self.dynamic_reasoner.run_query(
                        chp_query, bkb_type='drug')
                elif query_type == 'drug':
                    chp_query = self.dynamic_reasoner.run_query(
                        chp_query, bkb_type='gene')
                chp_res_dict = chp_query.result.process_updates()
                chp_res_norm_dict = chp_query.result.process_updates(
                    normalize=True)
                #chp_query.result.summary()
                chp_res_contributions = chp_query.result.process_inode_contributions(
                )
                chp_query.truth_prob = max([
                    0, chp_res_norm_dict[chp_query.truth_target[0]][
                        chp_query.truth_target[1]]
                ])

                # Collect all source inodes and process patient hashes
                patient_contributions = defaultdict(lambda: defaultdict(int))
                for target, contrib_dict in chp_res_contributions.items():
                    target_comp_name, target_state_name = target
                    for inode, contrib in contrib_dict.items():
                        comp_name, state_name = inode
                        if '_Source_' in comp_name:
                            # Split source state name to get patient hashes
                            source_hashes_str = state_name.split('_')[-1]
                            source_hashes = [
                                int(source_hash)
                                for source_hash in source_hashes_str.split(',')
                            ]
                            hash_len = len(source_hashes)
                            # Process patient contributions
                            for _hash in source_hashes:
                                # Normalize to get relative contribution
                                patient_contributions[target][
                                    _hash] += contrib / hash_len  #/ chp_res_dict[target_comp_name][target_state_name]

        # Now iterate through the patient data to translate patient contributions to drug/gene contributions
        wildcard_contributions = defaultdict(lambda: defaultdict(int))
        for target, patient_contrib_dict in patient_contributions.items():
            for patient, contrib in patient_contrib_dict.items():
                if query_type == 'gene':
                    for gene_curie in self.dynamic_reasoner.raw_patient_data[
                            int(patient)]["gene_curies"]:
                        wildcard_contributions[gene_curie][target] += contrib
                elif query_type == 'drug':
                    for drug_curie in self.dynamic_reasoner.raw_patient_data[
                            int(patient)]["drug_curies"]:
                        wildcard_contributions[drug_curie][target] += contrib

        # normalize gene contributions by the target and take relative difference
        for curie in wildcard_contributions.keys():
            truth_target_gene_contrib = 0
            nontruth_target_gene_contrib = 0
            for target, contrib in wildcard_contributions[curie].items():
                if target[0] == chp_query.truth_target[0] and target[
                        1] == chp_query.truth_target[1]:
                    truth_target_gene_contrib += contrib / chp_query.truth_prob
                else:
                    nontruth_target_gene_contrib += contrib / (
                        1 - chp_query.truth_prob)
            wildcard_contributions[curie][
                'relative'] = truth_target_gene_contrib - nontruth_target_gene_contrib

        chp_query.report = None
        chp_query.wildcard_contributions = wildcard_contributions

        return chp_query

    def _construct_trapi_message(self, chp_query, message, query_type):

        qg = message.query_graph
        kg = message.knowledge_graph

        edge_bindings = {}
        node_bindings = {}

        # Process nodes
        for qnode_id, qnode in qg.nodes.items():
            if qnode.ids is not None:
                if qnode.categories[0] == BIOLINK_GENE_ENTITY:
                    knode_key = kg.add_node(
                        qnode.ids[0],
                        self.curies[BIOLINK_GENE_ENTITY.get_curie()][
                            qnode.ids[0]][0],
                        qnode.categories[0].get_curie(),
                    )
                elif qnode.categories[0] == BIOLINK_DRUG_ENTITY:
                    knode_key = kg.add_node(
                        qnode.ids[0],
                        self.curies[BIOLINK_DRUG_ENTITY.get_curie()][
                            qnode.ids[0]][0],
                        qnode.categories[0].get_curie(),
                    )
                elif qnode.categories[0] == BIOLINK_DISEASE_ENTITY:
                    #TODO: Add diseases to curies and fix name hack below.
                    knode_key = kg.add_node(
                        qnode.ids[0],
                        qnode.
                        ids[0],  #TODO: Once curies is fixed, make this a name.
                        qnode.categories[0].get_curie(),
                    )
                node_bindings[qnode_id] = [knode_key]
            else:
                wildcard_node = qnode
        if query_type == 'standard':
            for qedge_key, qedge in qg.edges.items():
                kedge_key = kg.add_edge(
                    node_bindings[qedge.subject][0],
                    node_bindings[qedge.object][0],
                    predicate=qedge.predicates[0].get_curie(),
                    relation=qedge.relation,
                )
                edge_bindings[qedge_key] = [kedge_key]
                # Add Attribute
                kg.edges[kedge_key].add_attribute(
                    attribute_type_id='Probability of Survival',
                    value=chp_query.truth_prob,
                    value_type_id=BIOLINK_HAS_CONFIDENCE_LEVEL_ENTITY.
                    get_curie(),
                )
            message.results.add_result(
                node_bindings,
                edge_bindings,
            )
        else:
            # Build relative contribution results and added associated edges into knowledge graph
            unsorted_wildcard_contributions = []
            for wildcard, contrib_dict in chp_query.wildcard_contributions.items(
            ):
                unsorted_wildcard_contributions.append(
                    (contrib_dict['relative'], wildcard))
            sorted_wildcard_contributions = [
                (contrib, wildcard) for contrib, wildcard in sorted(
                    unsorted_wildcard_contributions,
                    key=lambda x: abs(x[0]),
                    reverse=True)
            ]

            # add kg gene nodes and edges
            edge_count = 0
            node_count = 1
            results = []
            for contrib, wildcard in sorted_wildcard_contributions[:self.
                                                                   max_results]:
                _node_bindings = {}
                _edge_bindings = {}
                # Process node bindings
                bad_wildcard = False
                for qnode_id, qnode in qg.nodes.items():
                    if qnode.categories[
                            0] == BIOLINK_GENE_ENTITY and query_type == 'gene':
                        try:
                            knode_id = kg.add_node(
                                wildcard,
                                self.curies[BIOLINK_GENE_ENTITY.get_curie()]
                                [wildcard][0],
                                qnode.categories[0].get_curie(),
                            )
                            _node_bindings[qnode_id] = [knode_id]
                        except KeyError:
                            logger.info(
                                "Couldn't find {} in curies[{}]".format(
                                    wildcard, BIOLINK_GENE))
                            bad_wildcard = True
                    elif qnode.categories[
                            0] == BIOLINK_DRUG_ENTITY and query_type == 'drug':
                        knode_id = kg.add_node(
                            wildcard,
                            self.curies[BIOLINK_DRUG_ENTITY.get_curie()]
                            [wildcard][0],
                            qnode.categories[0].get_curie(),
                        )
                        _node_bindings[qnode_id] = [knode_id]
                    else:
                        _node_bindings[qnode_id] = node_bindings[qnode_id]
                if bad_wildcard:
                    continue
                # Process edge bindings
                for qedge_id, qedge in qg.edges.items():
                    kedge_id = kg.add_edge(
                        _node_bindings[qedge.subject][0],
                        _node_bindings[qedge.object][0],
                        predicate=qedge.predicates[0],
                        relation=qedge.relation,
                    )
                    kg.edges[kedge_id].add_attribute(
                        attribute_type_id='Contribution',
                        value=contrib,
                        value_type_id=BIOLINK_HAS_EVIDENCE_ENTITY.get_curie(),
                    )
                    _edge_bindings[qedge_id] = [kedge_id]
                # Process node and edge binding results
                message.results.add_result(
                    _node_bindings,
                    _edge_bindings,
                )

        return message