def make_one_hop_query(): query = build_query( genes=["ENSEMBL:ENSG00000121879"], therapeutic_wildcard=True, one_hop=True, ) with open('one_hop.json', 'w') as f_: json.dump(query, f_)
def make_drug_wildcard_query(): for i in [1,2,5,10]: query = build_query( genes=["ENSEMBL:ENSG00000012048"], disease="MONDO:0007254", outcome=("EFO:0000714", ">=", 365*i), therapeutic_wildcard=True, ) with open('dw_brac1_{}yr.json'.format(i), 'w') as f_: json.dump(query, f_)
def make_standard_probablistic_query_one_gene(): """ Build a standard probablistic query with one gene and one drug. """ query = build_query( genes=["ENSEMBL:ENSG00000141510"], therapeutic="CHEMBL:CHEMBL88", disease="MONDO:0007254", outcome=("EFO:0000714", ">=", 500) ) with open('standard_one_gene.json', 'w') as f_: json.dump(query, f_)
def make_gene_wildcard_query(): """ Builds a gene wildcard query """ query = build_query( therapeutic="CHEMBL:CHEMBL88", disease="MONDO:0007254", outcome=("EFO:0000714", ">=", 500), num_gene_wildcards=1, ) with open('gene_wildcard.json', 'w') as f_: json.dump(query, f_)
# Set number of queries to build NUM_QUERIES = 10 # Set seed random.seed(111) # Get client #client = get_client() # Get curies logger.info('Getting curies.') curies = TrapiInterface().get_curies() #curies = client.curies() logger.info('Got curies.') # Build all simple single gene, single drug, breast cancer, survival queries. queries = [] for _ in range(NUM_QUERIES): drug = [random.choice(list(curies["biolink:Drug"].keys()))] q = build_query( therapeutic=drug, num_gene_wildcards=1, one_hop=True, ) print(json.dumps(q, indent=2)) queries.append(q) # Pickle the queries with open('random_gene_one_hop_queries.pk', 'wb') as f_: pickle.dump(queries, f_)
# Set number of queries to build NUM_QUERIES = 10 # Set seed random.seed(111) # Get client #client = get_client() # Get curies logger.info('Getting curies.') curies = TrapiInterface().get_curies() #curies = client.curies() logger.info('Got curies.') # Build all simple single gene, single drug, breast cancer, survival queries. queries = [] for _ in range(NUM_QUERIES): genes = [random.choice(list(curies["biolink:Gene"].keys()))] q = build_query( genes=genes, therapeutic_wildcard=True, one_hop=True, ) print(json.dumps(q, indent=2)) queries.append(q) # Pickle the queries with open('random_drug_one_hop_queries.pk', 'wb') as f_: pickle.dump(queries, f_)
def _answer_query_using_CHP_client( self, query_graph: QueryGraph, log: ARAXResponse) -> QGOrganizedKnowledgeGraph: qedge_key = next(qedge_key for qedge_key in query_graph.edges) log.debug( f"Processing query results for edge {qedge_key} by using CHP client" ) final_kg = QGOrganizedKnowledgeGraph() gene_label_list = ['gene'] drug_label_list = ['drug', 'chemicalsubstance'] # use for checking the requirement source_pass_nodes = None source_category = None target_pass_nodes = None target_category = None qedge = query_graph.edges[qedge_key] source_qnode_key = qedge.subject target_qnode_key = qedge.object source_qnode = query_graph.nodes[source_qnode_key] target_qnode = query_graph.nodes[target_qnode_key] # check if both ends of edge have no curie if (source_qnode.id is None) and (target_qnode.id is None): log.error(f"Both ends of edge {qedge_key} are None", error_code="BadEdge") return final_kg # check if the query nodes are drug or disease if source_qnode.id is not None: if type(source_qnode.id) is str: source_pass_nodes = [source_qnode.id] else: source_pass_nodes = source_qnode.id has_error, pass_nodes, not_pass_nodes = self._check_id( source_qnode.id, log) if has_error: return final_kg else: if len(not_pass_nodes) == 0 and len(pass_nodes) != 0: source_pass_nodes = pass_nodes elif len(not_pass_nodes) != 0 and len(pass_nodes) != 0: source_pass_nodes = pass_nodes if len(not_pass_nodes) == 1: log.warning( f"The curie id of {not_pass_nodes[0]} is not allowable based on CHP client" ) else: log.warning( f"The curie ids of these nodes {not_pass_nodes} are not allowable based on CHP client" ) else: if type(source_qnode.id) is str: log.error( f"The curie id of {source_qnode.id} is not allowable based on CHP client", error_code="NotAllowable") return final_kg else: log.error( f"The curie ids of {source_qnode.id} are not allowable based on CHP client", error_code="NotAllowable") return final_kg else: category = source_qnode.category[0].replace( 'biolink:', '').replace('_', '').lower() source_category = category if (category in drug_label_list) or (category in gene_label_list): source_category = category else: log.error( f"The category of query node {source_qnode_key} is unsatisfiable. It has to be drug/chemical_substance or gene", error_code="CategoryError") return final_kg if target_qnode.id is not None: if type(target_qnode.id) is str: target_pass_nodes = [target_qnode.id] else: target_pass_nodes = target_qnode.id has_error, pass_nodes, not_pass_nodes = self._check_id( target_qnode.id, log) if has_error: return final_kg else: if len(not_pass_nodes) == 0 and len(pass_nodes) != 0: target_pass_nodes = pass_nodes elif len(not_pass_nodes) != 0 and len(pass_nodes) != 0: target_pass_nodes = pass_nodes if len(not_pass_nodes) == 1: log.warning( f"The curie id of {not_pass_nodes[0]} is not allowable based on CHP client" ) else: log.warning( f"The curie ids of these nodes {not_pass_nodes} are not allowable based on CHP client" ) else: if type(target_qnode.id) is str: log.error( f"The curie id of {target_qnode.id} is not allowable based on CHP client", error_code="CategoryError") return final_kg else: log.error( f"The curie ids of {target_qnode.id} are not allowable based on CHP client", error_code="CategoryError") return final_kg else: category = target_qnode.category[0].replace( 'biolink:', '').replace('_', '').lower() target_category = category if (category in drug_label_list) or (category in gene_label_list): target_category = category else: log.error( f"The category of query node {target_qnode_key} is unsatisfiable. It has to be drug/chemical_substance or gene", error_code="CategoryError") return final_kg if (source_pass_nodes is None) and (target_pass_nodes is None): return final_kg elif (source_pass_nodes is not None) and (target_pass_nodes is not None): source_dict = dict() target_dict = dict() if source_pass_nodes[0] in self.allowable_drug_curies: source_category_temp = 'drug' else: source_category_temp = 'gene' if target_pass_nodes[0] in self.allowable_drug_curies: target_category_temp = 'drug' else: target_category_temp = 'gene' if source_category_temp == target_category_temp: log.error( f"The query nodes in both ends of edge are the same type which is {source_category_temp}", error_code="CategoryError") return final_kg else: for (source_curie, target_curie) in itertools.product( source_pass_nodes, target_pass_nodes): if source_category_temp == 'drug': source_curie_temp = source_curie.replace( 'CHEMBL.COMPOUND:', 'CHEMBL:') # Let's build a simple single query q = build_query(genes=[target_curie], therapeutic=source_curie_temp, disease='MONDO:0007254', outcome=('EFO:0000714', '>=', self.CHP_survival_threshold)) response = self.client.query(q) max_probability = self.client.get_outcome_prob( response) swagger_edge_key, swagger_edge = self._convert_to_swagger_edge( target_curie, source_curie, "paired_with", max_probability) else: target_curie_temp = target_curie.replace( 'CHEMBL.COMPOUND:', 'CHEMBL:') # Let's build a simple single query q = build_query(genes=[source_curie], therapeutic=target_curie_temp, disease='MONDO:0007254', outcome=('EFO:0000714', '>=', self.CHP_survival_threshold)) response = self.client.query(q) max_probability = self.client.get_outcome_prob( response) swagger_edge_key, swagger_edge = self._convert_to_swagger_edge( source_curie, target_curie, "paired_with", max_probability) source_dict[source_curie] = source_qnode_key target_dict[target_curie] = target_qnode_key # Finally add the current edge to our answer knowledge graph final_kg.add_edge(swagger_edge_key, swagger_edge, qedge_key) # Add the nodes to our answer knowledge graph if len(source_dict) != 0: for source_curie in source_dict: swagger_node_key, swagger_node = self._convert_to_swagger_node( source_curie) final_kg.add_node(swagger_node_key, swagger_node, source_dict[source_curie]) if len(target_dict) != 0: for target_curie in target_dict: swagger_node_key, swagger_node = self._convert_to_swagger_node( target_curie) final_kg.add_node(swagger_node_key, swagger_node, target_dict[target_curie]) return final_kg elif source_pass_nodes is not None: source_dict = dict() target_dict = dict() if source_pass_nodes[0] in self.allowable_drug_curies: source_category_temp = 'drug' else: source_category_temp = 'gene' if target_category in drug_label_list: target_category_temp = 'drug' else: target_category_temp = 'gene' if source_category_temp == target_category_temp: log.error( f"The query nodes in both ends of edge are the same type which is {source_category_temp}", error_code="CategoryError") return final_kg else: if source_category_temp == 'drug': for source_curie in source_pass_nodes: genes = [ curie for curie in self.allowable_gene_curies if self.synonymizer.get_canonical_curies(curie) [curie] is not None and target_category in [ category.replace('biolink:', '').replace( '_', '').lower() for category in list( self.synonymizer.get_canonical_curies( curie, return_all_categories=True) [curie]['all_categories'].keys()) ] ] therapeutic = source_curie.replace( 'CHEMBL.COMPOUND:', 'CHEMBL:') disease = 'MONDO:0007254' outcome = ('EFO:0000714', '>=', self.CHP_survival_threshold) queries = [] for gene in genes: queries.append( build_query( genes=[gene], therapeutic=therapeutic, disease=disease, outcome=outcome, )) # use the query_all endpoint to run the batch of queries res = self.client.query_all(queries) for result, gene in zip(res["message"], genes): prob = self.client.get_outcome_prob(result) swagger_edge_key, swagger_edge = self._convert_to_swagger_edge( gene, source_curie, "paired_with", prob) source_dict[source_curie] = source_qnode_key target_dict[gene] = target_qnode_key # Finally add the current edge to our answer knowledge graph final_kg.add_edge(swagger_edge_key, swagger_edge, qedge_key) else: for source_curie in source_pass_nodes: genes = [source_curie] therapeutic = [ curie.replace('CHEMBL.COMPOUND:', 'CHEMBL:') for curie in self.allowable_drug_curies if self.synonymizer.get_canonical_curies( curie.replace('CHEMBL:', 'CHEMBL.COMPOUND:')) [curie.replace('CHEMBL:', 'CHEMBL.COMPOUND:')] is not None and target_category in [ category.replace('biolink:', '').replace( '_', '').lower() for category in list( self.synonymizer.get_canonical_curies( curie.replace('CHEMBL:', 'CHEMBL.COMPOUND:'), return_all_categories=True)[ curie.replace( 'CHEMBL:', 'CHEMBL.COMPOUND:')] ['all_categories'].keys()) ] ] disease = 'MONDO:0007254' outcome = ('EFO:0000714', '>=', self.CHP_survival_threshold) queries = [] for drug in therapeutic: queries.append( build_query( genes=genes, therapeutic=drug, disease=disease, outcome=outcome, )) # use the query_all endpoint to run the batch of queries res = self.client.query_all(queries) for result, drug in zip(res["message"], therapeutic): drug = drug.replace('CHEMBL:', 'CHEMBL.COMPOUND:') prob = self.client.get_outcome_prob(result) swagger_edge_key, swagger_edge = self._convert_to_swagger_edge( source_curie, drug, "paired_with", prob) source_dict[source_curie] = source_qnode_key target_dict[drug] = target_qnode_key # Finally add the current edge to our answer knowledge graph final_kg.add_edge(swagger_edge_key, swagger_edge, qedge_key) # Add the nodes to our answer knowledge graph if len(source_dict) != 0: for source_curie in source_dict: swagger_node_key, swagger_node = self._convert_to_swagger_node( source_curie) final_kg.add_node(swagger_node_key, swagger_node, source_dict[source_curie]) if len(target_dict) != 0: for target_curie in target_dict: swagger_node_key, swagger_node = self._convert_to_swagger_node( target_curie) final_kg.add_node(swagger_node_key, swagger_node, target_dict[target_curie]) return final_kg else: source_dict = dict() target_dict = dict() if target_pass_nodes[0] in self.allowable_drug_curies: target_category_temp = 'drug' else: target_category_temp = 'gene' if source_category in drug_label_list: source_category_temp = 'drug' else: source_category_temp = 'gene' if source_category_temp == target_category_temp: log.error( f"The query nodes in both ends of edge are the same type which is {source_category_temp}", error_code="CategoryError") return final_kg else: if target_category_temp == 'drug': for target_curie in target_pass_nodes: genes = [ curie for curie in self.allowable_gene_curies if self.synonymizer.get_canonical_curies(curie) [curie] is not None and source_category in [ category.replace('biolink:', '').replace( '_', '').lower() for category in list( self.synonymizer.get_canonical_curies( curie, return_all_categories=True) [curie]['all_categories'].keys()) ] ] therapeutic = target_curie.replace( 'CHEMBL.COMPOUND:', 'CHEMBL:') disease = 'MONDO:0007254' outcome = ('EFO:0000714', '>=', self.CHP_survival_threshold) queries = [] for gene in genes: queries.append( build_query( genes=[gene], therapeutic=therapeutic, disease=disease, outcome=outcome, )) # use the query_all endpoint to run the batch of queries res = self.client.query_all(queries) for result, gene in zip(res["message"], genes): prob = self.client.get_outcome_prob(result) swagger_edge_key, swagger_edge = self._convert_to_swagger_edge( gene, target_curie, "paired_with", prob) source_dict[gene] = source_qnode_key target_dict[target_curie] = target_qnode_key # Finally add the current edge to our answer knowledge graph final_kg.add_edge(swagger_edge_key, swagger_edge, qedge_key) else: for target_curie in target_pass_nodes: genes = [target_curie] therapeutic = [ curie.replace('CHEMBL.COMPOUND:', 'CHEMBL:') for curie in self.allowable_drug_curies if self.synonymizer.get_canonical_curies( curie.replace('CHEMBL:', 'CHEMBL.COMPOUND:')) [curie.replace('CHEMBL:', 'CHEMBL.COMPOUND:')] is not None and source_category in [ category.replace('biolink:', '').replace( '_', '').lower() for category in list( self.synonymizer.get_canonical_curies( curie.replace('CHEMBL:', 'CHEMBL.COMPOUND:'), return_all_categories=True)[ curie.replace( 'CHEMBL:', 'CHEMBL.COMPOUND:')] ['all_categories'].keys()) ] ] disease = 'MONDO:0007254' outcome = ('EFO:0000714', '>=', self.CHP_survival_threshold) queries = [] for drug in therapeutic: queries.append( build_query( genes=genes, therapeutic=drug, disease=disease, outcome=outcome, )) # use the query_all endpoint to run the batch of queries res = self.client.query_all(queries) for result, drug in zip(res["message"], therapeutic): drug = drug.replace('CHEMBL:', 'CHEMBL.COMPOUND:') prob = self.client.get_outcome_prob(result) swagger_edge_key, swagger_edge = self._convert_to_swagger_edge( target_curie, drug, "paired_with", prob) source_dict[drug] = source_qnode_key target_dict[target_curie] = target_qnode_key # Finally add the current edge to our answer knowledge graph final_kg.add_edge(swagger_edge_key, swagger_edge, qedge_key) # Add the nodes to our answer knowledge graph if len(source_dict) != 0: for source_curie in source_dict: swagger_node_key, swagger_node = self._convert_to_swagger_node( source_curie) final_kg.add_node(swagger_node_key, swagger_node, source_dict[source_curie]) if len(target_dict) != 0: for target_curie in target_dict: swagger_node_key, swagger_node = self._convert_to_swagger_node( target_curie) final_kg.add_node(swagger_node_key, swagger_node, target_dict[target_curie]) return final_kg
import pickle import numpy as np from chp.trapi_interface import TrapiInterface logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # Get curies logger.info('Getting curies.') curies = TrapiInterface().get_curies() logger.info('Got curies.') survival_times = list(np.linspace(350, 5000, 10)) genes = [gene for gene in curies["gene"]] queries = [] for gene_curie in tqdm.tqdm(genes, desc='Building gene queries', leave=False): for st in survival_times: queries.append( build_query( genes=[gene_curie], disease='MONDO:0007254', outcome=('EFO:0000714', '>=', st), )) # Pickle the queries with open('simple_queries_1.pk', 'wb') as f_: pickle.dump(queries, f_)
logger.setLevel(logging.INFO) # Get curies logger.info('Getting curies.') curies = TrapiInterface().get_curies() #curies = client.curies() logger.info('Got curies.') # Build queries queries = [] # One gene curie queries.append( build_query( genes=['ENSEMBL:ENSG00000155657'], disease='MONDO:0007254', outcome=('EFO:0000714', '>=', 1000), )) # One gene one drug curie queries.append( build_query( genes=['ENSEMBL:ENSG00000155657'], therapeutic='CHEMBL:CHEMBL83', disease='MONDO:0007254', outcome=('EFO:0000714', '>=', 1000), )) # Two gene one drug curie queries.append( build_query(
logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # Get curies logger.info('Getting curies.') curies = TrapiInterface().get_curies() logger.info('Got curies.') survival_times = list(np.linspace(350, 5000, 10)) genes = [gene for gene in curies["gene"]] drugs = [drug for drug in curies["chemical_substance"]] queries = [] for gene_curie in tqdm.tqdm(genes, desc='Building gene queries', leave=False): for drug_curie in drugs: for st in survival_times: queries.append( build_query( genes=[gene_curie], therapeutic=drug_curie, disease='MONDO:0007254', outcome=('EFO:0000714', '>=', st), ) ) print(len(queries)) # Pickle the queries with open('all_simple_queries.pk', 'wb') as f_: pickle.dump(queries, f_)