def _extract_chp_query(self, query, query_type=None):
        evidence = {}
        dynamic_targets = {}

        # check edge for source and target
        edge_key = list(query["query_graph"]["edges"].keys())[0]
        edge = query["query_graph"]['edges'][edge_key]
        subject = edge['subject']
        obj = edge['object']

        # Get non-wildcard node
        if query_type == 'gene':
            drug_curie = query["query_graph"]['nodes'][obj]['id']
            evidence['_{}'.format(drug_curie)] = 'True'
        elif query_type == 'drug':
            gene_curie = query["query_graph"]['nodes'][obj]['id']
            evidence['_{}'.format(gene_curie)] = 'True'

        # default survival time
        dynamic_targets.update(self.default_survival_target)
        truth_target = ('EFO:0000714', '{} {}'.format(self.default_survival_target["EFO:0000714"]["op"],
                                                      self.default_survival_target["EFO:0000714"]["value"]))

        chp_query = Query(evidence=evidence,
                      targets=None,
                      dynamic_evidence=None,
                      dynamic_targets=dynamic_targets,
                      type='updating')
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        chp_query.query_id = query["query_id"] if 'query_id' in query else None
        return chp_query
예제 #2
0
    def _extract_chp_query(self, query, query_type=None):
        evidence = {}
        dynamic_targets = {}

        if len(query["query_graph"]['nodes']) > 2 or len(
                query["query_graph"]['edges']) > 1:
            sys.exit('1 hop quries can only have 2 nodes and 1 edge')

        # check edge for source and target
        edge_key = list(query["query_graph"]["edges"].keys())[0]
        edge = query["query_graph"]['edges'][edge_key]
        if 'subject' not in edge.keys() or 'object' not in edge.keys():
            sys.exit(
                'Edge must have both a \'subject\' and and \'object\' key')
        subject = edge['subject']
        obj = edge['object']

        # Get non-wildcard node
        if query_type == 'gene':
            if query["query_graph"]['nodes'][subject][
                    'category'] != BIOLINK_GENE:
                sys.exit('Subject node must be \'category\' {}'.format(
                    BIOLINK_GENE))
            drug_curie = query["query_graph"]['nodes'][obj]['id']
            if drug_curie not in self.curies[BIOLINK_DRUG]:
                sys.exit('Invalid CHEMBL Identifier. Must be CHEMBL:<ID>')
            evidence['_{}'.format(drug_curie)] = 'True'
        elif query_type == 'drug':
            if query["query_graph"]['nodes'][subject][
                    'category'] != BIOLINK_DRUG:
                sys.exit('Subject node must be \'category\' {}'.format(
                    BIOLINK_DRUG))
            gene_curie = query["query_graph"]['nodes'][obj]['id']
            if gene_curie not in self.curies[BIOLINK_GENE]:
                sys.exit('Invalid ENSEMBL Identifier. Must be ENSEMBL:<ID>')
            evidence['_{}'.format(gene_curie)] = 'True'

        # default survival time
        dynamic_targets.update(self.default_survival_target)
        truth_target = ('EFO:0000714', '{} {}'.format(
            self.default_survival_target["EFO:0000714"]["op"],
            self.default_survival_target["EFO:0000714"]["value"]))

        chp_query = Query(evidence=evidence,
                          targets=None,
                          dynamic_evidence=None,
                          dynamic_targets=dynamic_targets,
                          type='updating')
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        chp_query.query_id = query["query_id"] if 'query_id' in query else None
        return chp_query
예제 #3
0
    def _extract_chp_query(self, query, query_type):
        evidence = {}
        targets = []
        dynamic_evidence = {}
        dynamic_targets = {}
        # ensure we are using all nodes/edges
        total_nodes = 0
        total_edges = 0

        # get phenotype node
        targets = list()
        acceptable_target_curies = ['EFO:0000714']
        self.implicit_survival_node = False
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_PHENOTYPIC_FEATURE and node['id'] in acceptable_target_curies:
                target_id = node_key
                total_nodes += 1
        if total_nodes == 0:
            # Use Default Survival
            self.implicit_survival_node = True
            total_nodes += 1
            #acceptable_target_curies_print = ','.join(acceptable_target_curies)
            #sys.exit("Survival Node not found. Node category must be '{}' and id must be in: {}".format(BIOLINK_PHENOTYPIC_FEATURE,
            #                                                                                            acceptable_target_curies_print))
        elif total_nodes > 1:
            sys.exit('Too many target nodes')

        # get disease node info and ensure only 1 disease:
        acceptable_disease_curies = ['MONDO:0007254']
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_DISEASE and node['id'] in acceptable_disease_curies:
                disease_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge['subject'] == disease_id and edge['object'] == target_id:
                        if 'properties' in edge.keys():
                            days = edge['properties']['days']
                            qualifier = edge['properties']['qualifier']
                        else:
                            days = 970
                            qualifier = '>='
                        total_edges += 1
                if total_edges > 1:
                    sys.exit('Disease has too many outgoing edges')
                total_nodes += 1

        if self.implicit_survival_node:
            days=970
            qualifier = '>='
            total_edges += 1

        if total_nodes  == 1:
            acceptable_disease_curies_print = ','.join(acceptable_disease_curies)
            sys.exit("Disease node not found. Node type must be '{}' and curie must be in: {}".format(BIOLINK_DISEASE,
                                                                                                      acceptable_disease_curies_print))
        elif total_nodes > 2:
            sys.exit('Too many disease nodes')
        # set BKB target
        dynamic_targets['EFO:0000714'] = {
            "op": qualifier,
            "value": days,
        }
        truth_target = ('EFO:0000714', '{} {}'.format(qualifier, days))

        # get evidence
        for node_key in query["query_graph"]['nodes'].keys():
            # genes
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_GENE:
                # check for appropriate gene node structure
                gene_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge['subject'] == gene_id and edge['object'] == disease_id:
                        total_edges += 1
                if total_edges == total_nodes - 1:
                    sys.exit("Gene and disease edge not found. Edge type must be '{}'".format(BIOLINK_GENE_TO_DISEASE_PREDICATE))
                elif total_edges > total_nodes:
                    sys.exit('Gene has too many outgoing edges')
                # check for appropriate gene node curie
                if query_type != 'gene':
                    gene_curie = node['id']
                    if gene_curie in self.curies[BIOLINK_GENE]:
                        gene = gene_curie
                    else:
                        sys.exit('Invalid ENSEMBL Identifier. Must be in form ENSEMBL:<ID>.')
                    evidence["_" + gene] = 'True'
                total_nodes += 1
            # drugs
            if node['category'] == BIOLINK_DRUG:
                # check for appropriate drug node structure
                drug_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge['subject'] == drug_id and edge['object'] == disease_id:
                        total_edges += 1
                if total_edges == total_nodes - 1:
                    sys.exit("Drug and disease edge not found. Edge type must be '{}'".format(BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE))
                elif total_edges > total_nodes:
                    sys.exit('Drug has too many outgoing edges')
                # check for appropriate drug node curie
                if query_type != 'drug':
                    drug_curie = node['id']
                    if drug_curie in self.curies[BIOLINK_DRUG]:
                        drug = drug_curie
                    else:
                        sys.exit('Invalid CHEMBL Identifier: {}. Must be in form CHEMBL:<ID>'.format(drug_curie))
                    evidence['_' + drug] = 'True'
                total_nodes += 1

        # Temporary solution to no evidence linking
        if len(evidence.keys()) == 0 and len(dynamic_evidence.keys()) == 0:
            self.no_evidence_probability_check = True
        else:
            self.no_evidence_probability_check = False

        # produce BKB query
        chp_query = Query(
            evidence=evidence,
            targets=targets,
            dynamic_evidence=dynamic_evidence,
            dynamic_targets=dynamic_targets,
            type='updating')
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        chp_query.query_id = query["query_id"] if 'query_id' in query else None
        return chp_query
예제 #4
0
    def _extract_chp_query(self, query, query_type=None):
        evidence = {}
        targets = []
        dynamic_evidence = {}
        dynamic_targets = {}
        # ensure we are using all nodes/edges
        total_nodes = 0
        total_edges = 0

        # get phenotype node
        targets = list()
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_PHENOTYPIC_FEATURE:
                target_id = node_key
                total_nodes += 1

        # get disease node info and ensure only 1 disease:
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_DISEASE:
                disease_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge[
                            'subject'] == disease_id and edge[
                                'object'] == target_id:
                        if 'properties' in edge.keys():
                            days = edge['properties']['days']
                            qualifier = edge['properties']['qualifier']
                        else:
                            days = 970
                            qualifier = '>='
                        total_edges += 1
                total_nodes += 1
        # set BKB target
        dynamic_targets[node["id"]] = {
            "op": qualifier,
            "value": days,
        }
        truth_target = (node["id"], '{} {}'.format(qualifier, days))

        # get evidence
        for node_key in query["query_graph"]['nodes'].keys():
            # genes
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_GENE:
                # check for appropriate gene node structure
                gene_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge[
                            'subject'] == gene_id and edge[
                                'object'] == disease_id:
                        total_edges += 1
                # check for appropriate gene node curie
                gene_curie = node['id']
                gene = gene_curie
                evidence["_" + gene] = 'True'
                total_nodes += 1
            # drugs
            if node['category'] == BIOLINK_DRUG:
                # check for appropriate drug node structure
                drug_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge[
                            'subject'] == drug_id and edge[
                                'object'] == disease_id:
                        total_edges += 1
                # check for appropriate drug node curie
                drug_curie = node['id']
                drug = drug_curie
                evidence[node["id"]] = 'True'
                total_nodes += 1

        # produce BKB query
        chp_query = Query(evidence=evidence,
                          targets=targets,
                          dynamic_evidence=dynamic_evidence,
                          dynamic_targets=dynamic_targets,
                          type='updating')
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        chp_query.query_id = query["query_id"] if 'query_id' in query else None
        return chp_query
예제 #5
0
    def _extract_chp_query(self, query, query_type=None):
        evidence = {}
        targets = []
        dynamic_evidence = {}
        dynamic_targets = {}
        # ensure we are using all nodes/edges
        total_nodes = 0
        total_edges = 0

        # get phenotype node
        targets = list()
        acceptable_target_curies = ['EFO:0000714']
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_PHENOTYPIC_FEATURE and node[
                    'id'] in acceptable_target_curies:
                target_id = node_key
                total_nodes += 1
        if total_nodes == 0:
            acceptable_target_curies_print = ','.join(acceptable_target_curies)
            sys.exit(
                'Survival Node not found. Node category must be \'biolink:PhenotypicFeature\' and id must be in: '
                + acceptable_target_curies_print)
        elif total_nodes > 1:
            sys.exit('Too many target nodes')

        # get disease node info and ensure only 1 disease:
        acceptable_disease_curies = ['MONDO:0007254']
        for node_key in query["query_graph"]['nodes'].keys():
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_DISEASE and node[
                    'id'] in acceptable_disease_curies:
                disease_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge[
                            'subject'] == disease_id and edge[
                                'object'] == target_id:
                        if 'properties' in edge.keys():
                            days = edge['properties']['days']
                            qualifier = edge['properties']['qualifier']
                        else:
                            days = 970
                            qualifier = '>='
                        total_edges += 1
                if total_edges == 0:
                    sys.exit(
                        'Disease and target edge not found. Edge type must be \'biolink:DiseaseToPhenotypicFeatureAssociation\''
                    )
                elif total_edges > 1:
                    sys.exit('Disease has too many outgoing edges')
                total_nodes += 1
        if total_nodes == 1:
            acceptable_disease_curies_print = ','.join(
                acceptable_disease_curies)
            sys.exit(
                'Disease node not found. Node type must be \'biolink:Disease\' and curie must be in: '
                + acceptable_disease_curies_print)
        elif total_nodes > 2:
            sys.exit('Too many disease nodes')
        # set BKB target
        dynamic_targets[node["id"]] = {
            "op": qualifier,
            "value": days,
        }
        truth_target = (node["id"], '{} {}'.format(qualifier, days))

        # get evidence
        for node_key in query["query_graph"]['nodes'].keys():
            # genes
            node = query["query_graph"]['nodes'][node_key]
            if node['category'] == BIOLINK_GENE:
                # check for appropriate gene node structure
                gene_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge[
                            'subject'] == gene_id and edge[
                                'object'] == disease_id:
                        total_edges += 1
                if total_edges == total_nodes - 1:
                    sys.exit(
                        "Gene and disease edge not found. Edge type must be '{}'"
                        .format(BIOLINK_GENE_TO_DISEASE_PREDICATE))
                elif total_edges > total_nodes:
                    sys.exit('Gene has too many outgoing edges')
                # check for appropriate gene node curie
                gene_curie = node['id']
                if gene_curie in self.curies[BIOLINK_GENE]:
                    gene = gene_curie
                else:
                    sys.exit(
                        'Invalid ENSEMBL Identifier. Must be in form ENSEMBL:<ID>.'
                    )
                evidence["_" + gene] = 'True'
                total_nodes += 1
            # drugs
            if node['category'] == BIOLINK_DRUG:
                # check for appropriate drug node structure
                drug_id = node_key
                for edge_key in query["query_graph"]['edges'].keys():
                    edge = query["query_graph"]['edges'][edge_key]
                    if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge[
                            'subject'] == drug_id and edge[
                                'object'] == disease_id:
                        total_edges += 1
                if total_edges == total_nodes - 1:
                    sys.exit(
                        "Drug and disease edge not found. Edge type must be '{}'"
                        .format(
                            BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE
                        ))
                elif total_edges > total_nodes:
                    sys.exit('Drug has too many outgoing edges')
                # check for appropriate drug node curie
                drug_curie = node['id']
                if drug_curie in self.curies[BIOLINK_DRUG]:
                    drug = drug_curie
                else:
                    sys.exit(
                        'Invalid CHEMBL Identifier: {}. Must be in form CHEMBL:<ID>'
                        .format(drug_curie))
                evidence[node["id"]] = 'True'
                total_nodes += 1

        if total_nodes != len(
                query["query_graph"]['nodes']) or total_edges != len(
                    query["query_graph"]['edges']):
            sys.exit('There are extra components in the provided QG structure')

        # produce BKB query
        chp_query = Query(evidence=evidence,
                          targets=targets,
                          dynamic_evidence=dynamic_evidence,
                          dynamic_targets=dynamic_targets,
                          type='updating')
        # Set some other helpful attributes
        chp_query.truth_target = truth_target
        chp_query.query_id = query["query_id"] if 'query_id' in query else None
        return chp_query