def kegg_get_gene_id(g="CDK5", species_code="hsa"): """ Return the KEGG gene identifier for a given common gene symbol. @param g: The gene common name, like "OAS1" @param species_code: The code for the genome where this gene is from. See KEGG for more details. hsa is the human genome, mmu is mouse. @return: The kegg gene id. """ rc = fetch_data("http://rest.kegg.jp/find/%s/%s" % (species_code, g), {}, method="POST") if rc[1] != 200: print "Query failed with error code: %d" % rc[1] return None if len(rc[0]) < 1 or rc[0] == ["\n"]: print "Query succeeded, but has no data for gene: %s" % g return None kegg_id = None for line in rc[0]: tmp = line.split("\t")[1].split(";")[0].split(",") tmp = [t.strip() for t in tmp] # print "Debug: ", tmp if g in tmp: kegg_id = line.split("\t")[0].strip() break return kegg_id
def kegg_get_pathways_with_gene(g="OAS1", species_code="hsa"): """ Determines the set of known pathways associated to a specific gene @param g: The common gene symbol @param species_code: Like "hsa" for human, "mmu" for mouse. See the KEGG documentation for available organism codes. @return: Tuple (kegg_path_names, kegg_path_ids) """ kegg_id = kegg_get_gene_id(g, species_code) if kegg_id is None: print "Error: Could not resolve KEGG path id for gene name %s" % g return None rc = fetch_data("http://rest.kegg.jp/link/pathway/%s" % kegg_id, {}, method="POST") if rc[1] != 200: print "Query failed with error code: %d" % rc[1] return None if len(rc[0]) < 1 or rc[0] == ["\n"]: print "Query succeeded, but has no pathway ids that include gene: %s" % g return None kegg_path_ids = [line.split("\t")[1].strip() for line in rc[0]] kegg_path_names = [] for pid in kegg_path_ids: tmp = kegg_get_pathway_name(pid) if tmp is None: tmp = "unknown" kegg_path_names.append(tmp) return (kegg_path_names, kegg_path_ids)
def kegg_get_pathway_info(p="path:hsa05222"): """ Returns all the information for a given kegg pathway identifier """ rc = fetch_data("http://rest.kegg.jp/get/%s" % p, {}, method="POST") if rc[1] != 200: print "Query failed with error code: %d" % rc[1] return None if len(rc[0]) < 1 or rc[0] == ["\n"]: print "Query succeeded, but has no data for pathway: %s" % p return None return rc[0]
def kegg_get_pathway_name(p="path:hsa05164"): """ Returns the kegg pathway name (description) for a given path identifier @param p: The kegg pathway identifier """ rc = fetch_data("http://rest.kegg.jp/list/%s" % p, {}, method="POST") if rc[1] != 200: print "Query failed with error code: %d" % rc[1] return None if len(rc[0]) < 1 or rc[0] == ["\n"]: print "Query succeeded, but has no data for pathway: %s" % p return None return rc[0][0].split("\t")[1].strip()
def query(self, genelist, homologs=True): ''' Processes the query for the given list of genes. The result will be to cache the results in self.data. Use other methods of this class to query the cached data, for example, to get the data for annotation GO:0009615, which is 'response to virus', use getDataForAnnotation() @param genelist: A list of gene strings like ['RSAD2', 'SERPING1',....] @param homologs: If True, then the query includes annotations for homologs to the input genelist, else it does not. @return: A return code indicating status of request. The data will be stored in self.data[] as a list of lines. ''' self.data = [] gene_box = ",".join(genelist) h = '1' if homologs else '0' p = {'annot_type': 'gene_ontology', 'cmd': 'report', 'homologs': h, 'network': '0', 'tax_id': '9606'} p['gene_box'] = gene_box raw_data, rc = ifr.fetch_data(self.url, p, method='POST') #post doesn't work? for ln in raw_data: self.data.append( ln.split('\n')[0]) return rc