def _extract_chp_query(self, query, query_type=None): evidence = {} dynamic_targets = {} # check edge for source and target edge_key = list(query["query_graph"]["edges"].keys())[0] edge = query["query_graph"]['edges'][edge_key] subject = edge['subject'] obj = edge['object'] # Get non-wildcard node if query_type == 'gene': drug_curie = query["query_graph"]['nodes'][obj]['id'] evidence['_{}'.format(drug_curie)] = 'True' elif query_type == 'drug': gene_curie = query["query_graph"]['nodes'][obj]['id'] evidence['_{}'.format(gene_curie)] = 'True' # default survival time dynamic_targets.update(self.default_survival_target) truth_target = ('EFO:0000714', '{} {}'.format(self.default_survival_target["EFO:0000714"]["op"], self.default_survival_target["EFO:0000714"]["value"])) chp_query = Query(evidence=evidence, targets=None, dynamic_evidence=None, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query
def _extract_chp_query(self, query, query_type=None): evidence = {} dynamic_targets = {} if len(query["query_graph"]['nodes']) > 2 or len( query["query_graph"]['edges']) > 1: sys.exit('1 hop quries can only have 2 nodes and 1 edge') # check edge for source and target edge_key = list(query["query_graph"]["edges"].keys())[0] edge = query["query_graph"]['edges'][edge_key] if 'subject' not in edge.keys() or 'object' not in edge.keys(): sys.exit( 'Edge must have both a \'subject\' and and \'object\' key') subject = edge['subject'] obj = edge['object'] # Get non-wildcard node if query_type == 'gene': if query["query_graph"]['nodes'][subject][ 'category'] != BIOLINK_GENE: sys.exit('Subject node must be \'category\' {}'.format( BIOLINK_GENE)) drug_curie = query["query_graph"]['nodes'][obj]['id'] if drug_curie not in self.curies[BIOLINK_DRUG]: sys.exit('Invalid CHEMBL Identifier. Must be CHEMBL:<ID>') evidence['_{}'.format(drug_curie)] = 'True' elif query_type == 'drug': if query["query_graph"]['nodes'][subject][ 'category'] != BIOLINK_DRUG: sys.exit('Subject node must be \'category\' {}'.format( BIOLINK_DRUG)) gene_curie = query["query_graph"]['nodes'][obj]['id'] if gene_curie not in self.curies[BIOLINK_GENE]: sys.exit('Invalid ENSEMBL Identifier. Must be ENSEMBL:<ID>') evidence['_{}'.format(gene_curie)] = 'True' # default survival time dynamic_targets.update(self.default_survival_target) truth_target = ('EFO:0000714', '{} {}'.format( self.default_survival_target["EFO:0000714"]["op"], self.default_survival_target["EFO:0000714"]["value"])) chp_query = Query(evidence=evidence, targets=None, dynamic_evidence=None, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query
def _extract_chp_query(self, query, query_type): evidence = {} targets = [] dynamic_evidence = {} dynamic_targets = {} # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 # get phenotype node targets = list() acceptable_target_curies = ['EFO:0000714'] self.implicit_survival_node = False for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_PHENOTYPIC_FEATURE and node['id'] in acceptable_target_curies: target_id = node_key total_nodes += 1 if total_nodes == 0: # Use Default Survival self.implicit_survival_node = True total_nodes += 1 #acceptable_target_curies_print = ','.join(acceptable_target_curies) #sys.exit("Survival Node not found. Node category must be '{}' and id must be in: {}".format(BIOLINK_PHENOTYPIC_FEATURE, # acceptable_target_curies_print)) elif total_nodes > 1: sys.exit('Too many target nodes') # get disease node info and ensure only 1 disease: acceptable_disease_curies = ['MONDO:0007254'] for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_DISEASE and node['id'] in acceptable_disease_curies: disease_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge['subject'] == disease_id and edge['object'] == target_id: if 'properties' in edge.keys(): days = edge['properties']['days'] qualifier = edge['properties']['qualifier'] else: days = 970 qualifier = '>=' total_edges += 1 if total_edges > 1: sys.exit('Disease has too many outgoing edges') total_nodes += 1 if self.implicit_survival_node: days=970 qualifier = '>=' total_edges += 1 if total_nodes == 1: acceptable_disease_curies_print = ','.join(acceptable_disease_curies) sys.exit("Disease node not found. Node type must be '{}' and curie must be in: {}".format(BIOLINK_DISEASE, acceptable_disease_curies_print)) elif total_nodes > 2: sys.exit('Too many disease nodes') # set BKB target dynamic_targets['EFO:0000714'] = { "op": qualifier, "value": days, } truth_target = ('EFO:0000714', '{} {}'.format(qualifier, days)) # get evidence for node_key in query["query_graph"]['nodes'].keys(): # genes node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_GENE: # check for appropriate gene node structure gene_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge['subject'] == gene_id and edge['object'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit("Gene and disease edge not found. Edge type must be '{}'".format(BIOLINK_GENE_TO_DISEASE_PREDICATE)) elif total_edges > total_nodes: sys.exit('Gene has too many outgoing edges') # check for appropriate gene node curie if query_type != 'gene': gene_curie = node['id'] if gene_curie in self.curies[BIOLINK_GENE]: gene = gene_curie else: sys.exit('Invalid ENSEMBL Identifier. Must be in form ENSEMBL:<ID>.') evidence["_" + gene] = 'True' total_nodes += 1 # drugs if node['category'] == BIOLINK_DRUG: # check for appropriate drug node structure drug_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge['subject'] == drug_id and edge['object'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit("Drug and disease edge not found. Edge type must be '{}'".format(BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE)) elif total_edges > total_nodes: sys.exit('Drug has too many outgoing edges') # check for appropriate drug node curie if query_type != 'drug': drug_curie = node['id'] if drug_curie in self.curies[BIOLINK_DRUG]: drug = drug_curie else: sys.exit('Invalid CHEMBL Identifier: {}. Must be in form CHEMBL:<ID>'.format(drug_curie)) evidence['_' + drug] = 'True' total_nodes += 1 # Temporary solution to no evidence linking if len(evidence.keys()) == 0 and len(dynamic_evidence.keys()) == 0: self.no_evidence_probability_check = True else: self.no_evidence_probability_check = False # produce BKB query chp_query = Query( evidence=evidence, targets=targets, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query
def _extract_chp_query(self, query, query_type=None): evidence = {} targets = [] dynamic_evidence = {} dynamic_targets = {} # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 # get phenotype node targets = list() for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_PHENOTYPIC_FEATURE: target_id = node_key total_nodes += 1 # get disease node info and ensure only 1 disease: for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_DISEASE: disease_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == disease_id and edge[ 'object'] == target_id: if 'properties' in edge.keys(): days = edge['properties']['days'] qualifier = edge['properties']['qualifier'] else: days = 970 qualifier = '>=' total_edges += 1 total_nodes += 1 # set BKB target dynamic_targets[node["id"]] = { "op": qualifier, "value": days, } truth_target = (node["id"], '{} {}'.format(qualifier, days)) # get evidence for node_key in query["query_graph"]['nodes'].keys(): # genes node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_GENE: # check for appropriate gene node structure gene_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge[ 'subject'] == gene_id and edge[ 'object'] == disease_id: total_edges += 1 # check for appropriate gene node curie gene_curie = node['id'] gene = gene_curie evidence["_" + gene] = 'True' total_nodes += 1 # drugs if node['category'] == BIOLINK_DRUG: # check for appropriate drug node structure drug_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == drug_id and edge[ 'object'] == disease_id: total_edges += 1 # check for appropriate drug node curie drug_curie = node['id'] drug = drug_curie evidence[node["id"]] = 'True' total_nodes += 1 # produce BKB query chp_query = Query(evidence=evidence, targets=targets, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query
def _extract_chp_query(self, query, query_type=None): evidence = {} targets = [] dynamic_evidence = {} dynamic_targets = {} # ensure we are using all nodes/edges total_nodes = 0 total_edges = 0 # get phenotype node targets = list() acceptable_target_curies = ['EFO:0000714'] for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_PHENOTYPIC_FEATURE and node[ 'id'] in acceptable_target_curies: target_id = node_key total_nodes += 1 if total_nodes == 0: acceptable_target_curies_print = ','.join(acceptable_target_curies) sys.exit( 'Survival Node not found. Node category must be \'biolink:PhenotypicFeature\' and id must be in: ' + acceptable_target_curies_print) elif total_nodes > 1: sys.exit('Too many target nodes') # get disease node info and ensure only 1 disease: acceptable_disease_curies = ['MONDO:0007254'] for node_key in query["query_graph"]['nodes'].keys(): node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_DISEASE and node[ 'id'] in acceptable_disease_curies: disease_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_DISEASE_TO_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == disease_id and edge[ 'object'] == target_id: if 'properties' in edge.keys(): days = edge['properties']['days'] qualifier = edge['properties']['qualifier'] else: days = 970 qualifier = '>=' total_edges += 1 if total_edges == 0: sys.exit( 'Disease and target edge not found. Edge type must be \'biolink:DiseaseToPhenotypicFeatureAssociation\'' ) elif total_edges > 1: sys.exit('Disease has too many outgoing edges') total_nodes += 1 if total_nodes == 1: acceptable_disease_curies_print = ','.join( acceptable_disease_curies) sys.exit( 'Disease node not found. Node type must be \'biolink:Disease\' and curie must be in: ' + acceptable_disease_curies_print) elif total_nodes > 2: sys.exit('Too many disease nodes') # set BKB target dynamic_targets[node["id"]] = { "op": qualifier, "value": days, } truth_target = (node["id"], '{} {}'.format(qualifier, days)) # get evidence for node_key in query["query_graph"]['nodes'].keys(): # genes node = query["query_graph"]['nodes'][node_key] if node['category'] == BIOLINK_GENE: # check for appropriate gene node structure gene_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_GENE_TO_DISEASE_PREDICATE and edge[ 'subject'] == gene_id and edge[ 'object'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit( "Gene and disease edge not found. Edge type must be '{}'" .format(BIOLINK_GENE_TO_DISEASE_PREDICATE)) elif total_edges > total_nodes: sys.exit('Gene has too many outgoing edges') # check for appropriate gene node curie gene_curie = node['id'] if gene_curie in self.curies[BIOLINK_GENE]: gene = gene_curie else: sys.exit( 'Invalid ENSEMBL Identifier. Must be in form ENSEMBL:<ID>.' ) evidence["_" + gene] = 'True' total_nodes += 1 # drugs if node['category'] == BIOLINK_DRUG: # check for appropriate drug node structure drug_id = node_key for edge_key in query["query_graph"]['edges'].keys(): edge = query["query_graph"]['edges'][edge_key] if edge['predicate'] == BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE and edge[ 'subject'] == drug_id and edge[ 'object'] == disease_id: total_edges += 1 if total_edges == total_nodes - 1: sys.exit( "Drug and disease edge not found. Edge type must be '{}'" .format( BIOLINK_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_PREDICATE )) elif total_edges > total_nodes: sys.exit('Drug has too many outgoing edges') # check for appropriate drug node curie drug_curie = node['id'] if drug_curie in self.curies[BIOLINK_DRUG]: drug = drug_curie else: sys.exit( 'Invalid CHEMBL Identifier: {}. Must be in form CHEMBL:<ID>' .format(drug_curie)) evidence[node["id"]] = 'True' total_nodes += 1 if total_nodes != len( query["query_graph"]['nodes']) or total_edges != len( query["query_graph"]['edges']): sys.exit('There are extra components in the provided QG structure') # produce BKB query chp_query = Query(evidence=evidence, targets=targets, dynamic_evidence=dynamic_evidence, dynamic_targets=dynamic_targets, type='updating') # Set some other helpful attributes chp_query.truth_target = truth_target chp_query.query_id = query["query_id"] if 'query_id' in query else None return chp_query