def kooo(cccc): mydog5 = "" mydog = REST.kegg_find("genes", cccc).read() #print(result) mydog1 = re.findall('^\S+', mydog)[0] #print(mydog1) mydog3 = REST.kegg_link("ko", mydog1).read() print("xxx", mydog3) if (len(mydog3) < 4): return (mydog5) mydog4 = re.findall('ko:\S+', mydog3)[0] #print(mydog4) mydog5 = REST.kegg_link("genes", mydog4).read() return (mydog5)
def add_kegg_descript2(hit): try: desc = REST.kegg_find("genes", hit).read() try: K = re.search(r"K[0-9]{5}", desc) KEGG = K.group(0) except: KEGG = "none" try: a = re.search(r"(?<=K[0-9]{5}).*", desc).replace("\n", "") ann = a.group(0) except: try: ann = desc.split("\t")[1].split(";")[0].replace("\n", "") except: ann = "none" try: mod = REST.kegg_link('module', hit).read() module = mod.split(":")[2].split("_")[-1].replace("\n", "") except: module = "none" except: module = "none" KEGG = "none" ann = "none" ann = reduce_func_len(ann) return strip_lines_list([module, KEGG, ann])
def __get_all_information(self): if self.byModules: info = self.get_raw_data().split("\n") self.modules = [] go = False for text in info: if re.search("\AMODULE", text): new_line = re.sub('\s+', "\t", text) module = new_line.split("\t")[1] self.modules.append(module) go = True elif (re.search("\ADISEASE", text) or re.search("\ADBLINKS", text) or re.search("\AREFERENCE", text) or re.search("\AKO_PATHWAY", text)): go = False elif go: new_line = re.sub('\s+', "\t", text) module = new_line.split("\t")[1] self.modules.append(module) else: data = kegg_api.kegg_link("reaction", "path:" + self.id) self.reactions = [] for line in data: reaction = line.strip().split("\t")[1].split(":")[1] self.reactions.append(reaction)
def __retrieve_reactions(self, pathway): 'Gets all reactions of the given pathway using KEGG API.' reactions_kegg = REST.kegg_link('rn', pathway).read() df_reactions = self.__to_dataframe(reactions_kegg)[1] return [reaction.replace('rn:', '') for reaction in df_reactions]
def add_kegg_descript2(hit): try: desc= REST.kegg_find("genes", hit).read() try: K=re.search(r"K[0-9]{5}", desc) KEGG=K.group(0) except: KEGG="none" try: a=re.search(r"(?<=K[0-9]{5}).*", desc).replace("\n","") ann=a.group(0) except: try: ann=desc.split("\t")[1].split(";")[0].replace("\n","") except: ann="none" try: mod=REST.kegg_link('module', hit).read() module=mod.split(":")[2].split("_")[-1].replace("\n","") except: module="none" except: module="none" KEGG="none" ann="none" ann=reduce_func_len(ann) return strip_lines_list([module, KEGG, ann])
def _download_links(self, dbs=["pathway", "enzyme", "reaction", "compound"]): """ Returns jsons of mappings between each db (default: map (pathway), ec, rn, cpd). """ for sourcedb, targetdb in itertools.permutations(dbs, 2): links_raw = REST.kegg_link(targetdb, sourcedb) links = [s.split('\t') for s in links_raw.read().splitlines()] d = dict() for i in links: if i[0] in d: d[i[0]].append(i[1]) else: d[i[0]] = [i[1]] ## Write json of all entry ids and names link_fname = sourcedb + "_" + targetdb links_path = os.path.join(self.path, 'links') if not os.path.exists(links_path): os.makedirs(links_path) link_path = os.path.join(links_path, link_fname + ".json") with open(link_path, 'w') as f: json.dump(d, f, indent=2)
def get_kegg_path_to_gene_map(species): """Map kegg paths to genes.""" kegg_list = REST.kegg_link(species, "pathway") clean_kegg_path_to_gene = re.compile(r"path:{0}|{0}:|\n".format(species)) rowdicts = [] for kegg_info in kegg_list: kegg_info = re.sub(clean_kegg_path_to_gene, "", kegg_info) kegg_data = kegg_info.split("\t") rowdict = {"kegg_pathway": kegg_data[0], "kegg_gene": kegg_data[1]} rowdicts.append(rowdict) return DataFrame.from_dict(rowdicts)
def get_interaction(target, source): try: interaction = REST.kegg_link(target, source).readlines() source_db = [] target_db = [] for line in interaction: individual = line.strip().split('\t') source_db.append(individual[0]) target_db.append(individual[1]) df_interaction = pd.DataFrame({source: source_db, target: target_db}) df_interaction.replace('.+(?<=\:)','', regex=True, inplace=True) df_interaction.to_csv("./database/"+source+'_'+target+'.tsv', sep = '\t', index = None) except HTTPError: sys.exit("ERROR: Please select the available database according to the following URL. (http://www.kegg.jp/kegg/rest/keggapi.html) ") return df_interaction
def kegg_rest(type: str, argument: str, optional_argument: str = "", sleep_time: float = .5) -> List[str]: """This function calls Biopython's KEGG REST function and returns the lines as a string list. All empty lines are deleted from the list as they do not contain any information. Arguments ---------- * type: str ~ The KEGG REST action. Can be either 'info', 'get', 'link' or 'list. * argument: str ~ The argument for the KEGG order. * optional_argument: str="" ~ The second argument which is necessary for 'link' and 'list' actions to work correctly. * sleep_time: float=10.0 ~ The time that shall be waited after a REST action is performed. Its default value of 10.0 seconds is in accordance with the NCBI rule that its servers shall not be contacted more often than every 10 seconds. KEGG might have lower or higher required sleep times, but I did not find any specified time step. """ # Execute correct Biotpython KEGG REST function. if type == "info": kegg_data = REST.kegg_info(argument) elif type == "get": kegg_data = REST.kegg_get(argument) elif type == "link": kegg_data = REST.kegg_link(argument, optional_argument) elif type == "list": kegg_data = REST.kegg_list(argument, optional_argument) elif type == "find": kegg_data = REST.kegg_find(argument, optional_argument) # Wait the sleep time doing nothing. time.sleep(sleep_time) # Get one string per line of the KEGG REST result. lines: List[str] = kegg_data.read().split("\n") # Delete empty lines. not_empty_lines: List[str] = [i for i in lines if len(i) > 0] return not_empty_lines
def get_kegg_path_to_gene_map(species): """Map kegg paths to genes.""" kegg_list = REST.kegg_link(species, "pathway") clean_kegg_path_to_gene = re.compile(r"path:{0}|{0}:|\n".format(species)) rowdicts = [] for kegg_info in kegg_list: try: kegg_info = kegg_info.decode("utf-8") except AttributeError: pass kegg_info = re.sub(clean_kegg_path_to_gene, "", kegg_info) kegg_data = kegg_info.split("\t") rowdict = {"kegg_pathway": kegg_data[0], "entrezgene": kegg_data[1]} rowdicts.append(rowdict) return DataFrame.from_dict(rowdicts)
def create_link_dicts(target_db, source_db): ## Get KEGG ko-reaction mapping--this may be useful for verifying module reaction-kos are associated correctly ## The ordering of source and target doesn't matter for the content of the output, just the ordering of the output raw_links = REST.kegg_link(target_db, source_db) link_list = [s.split('\t') for s in raw_links.read().splitlines()] target_source_dict = dict() source_target_dict = dict() for row in link_list: s = row[0].split(":")[1] t = row[1].split(":")[1] if s in source_target_dict: source_target_dict[s].add(t) else: source_target_dict[s] = {t} if t in target_source_dict: target_source_dict[t].add(s) else: target_source_dict[t] = {s} return source_target_dict, target_source_dict
def create_a_to_b_dict(a,b): """Create dictionary mapping from KEGG database `a` entries to database `b` entries""" abbrv_dict = {"pathway": "path", "brite": "br", "module": "md", "orthology": "ko", "genome": "gn", "compound": "cpd", "glycan": "gl", "reaction": "rn", "rclass": "rc", "enzyme": "ec", "network": "ne", "variant": "hsa_var", "disease": "ds", "drug": "dr", "dgroup": "dg"} ab_link = REST.kegg_link(a,b) ab_list = [i.split() for i in ab_link] ab_dict = dict() for pair in ab_list: if pair[0].startswith(abbrv_dict[a]): aid = pair[0].split(abbrv_dict[a]+":")[1] bid = pair[1].split(abbrv_dict[b]+":")[1] else: aid = pair[1].split(abbrv_dict[a]+":")[1] bid = pair[0].split(abbrv_dict[b]+":")[1] if aid in ab_dict: ab_dict[aid].append(bid) else: ab_dict[aid] = [bid] for aid,blist in ab_dict.items(): ab_dict[aid] = set(blist) return ab_dict
if fields[0] == 'GENE': want = 1 ## The line with GENE is different print(fields[2].rstrip(';')) ## We reached the next section of the file elif want == 1 and re.match('^\S', line): sys.exit() ## We're still in the list of genes if want == 1 and len(fields) > 1: print(fields[1].rstrip(';')) # Get genes involved with fatty-acid biosynthesis in Kitasatospora ####Get gene ontology terms######### result = REST.kegg_link("compound", "map00061").read() #print(human_pathways) # Filter all human pathways for repair pathways ####repair_pathways = [] ####for line in human_pathways.rstrip().split("n"): #entry, description = line.split("\t") ####print(line) """ if "repair" in description: repair_pathways.append(entry) #print(repair_pathways) # Get the genes for pathways and add them to a list repair_genes = [] for pathway in repair_pathways: pathway_file = REST.kegg_get(pathway).read() # query and read each pathway print(pathway_file) # iterate through each KEGG pathway file, keeping track of which section
def t_KEGG_Query(): """Tests Bio.KEGG API Wrapper""" print("Testing Bio.KEGG.query\n\n") # info tests resp = REST.kegg_info("kegg") resp.read() print(resp.url) resp = REST.kegg_info("pathway") resp.read() print(resp.url) # list tests resp = REST.kegg_list("pathway") resp.read() print(resp.url) resp = REST.kegg_list("pathway", "hsa") resp.read() print(resp.url) resp = REST.kegg_list("organism") resp.read() print(resp.url) resp = REST.kegg_list("hsa") resp.read() print(resp.url) resp = REST.kegg_list("T01001") resp.read() print(resp.url) resp = REST.kegg_list("hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_list(["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) resp = REST.kegg_list("cpd:C01290+gl:G00092") resp.read() print(resp.url) resp = REST.kegg_list(["cpd:C01290", "gl:G00092"]) resp.read() print(resp.url) resp = REST.kegg_list("C01290+G00092") resp.read() print(resp.url) resp = REST.kegg_list(["C01290", "G00092"]) resp.read() print(resp.url) # find tests resp = REST.kegg_find("genes", "shiga+toxin") resp.read() print(resp.url) resp = REST.kegg_find("genes", ["shiga", "toxin"]) resp.read() print(resp.url) resp = REST.kegg_find("compound", "C7H10O5", "formula") resp.read() print(resp.url) resp = REST.kegg_find("compound", "O5C7", "formula") resp.read() print(resp.url) resp = REST.kegg_find("compound", "174.05", "exact_mass") resp.read() print(resp.url) resp = REST.kegg_find("compound", "300-310", "mol_weight") resp.read() print(resp.url) # get tests resp = REST.kegg_get("cpd:C01290+gl:G00092") resp.read() print(resp.url) resp = REST.kegg_get(["cpd:C01290", "gl:G00092"]) resp.read() print(resp.url) resp = REST.kegg_get("C01290+G00092") resp.read() print(resp.url) resp = REST.kegg_get(["C01290", "G00092"]) resp.read() print(resp.url) resp = REST.kegg_get("hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_get(["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq") resp.read() print(resp.url) resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq") resp.read() print(resp.url) resp = REST.kegg_get("hsa05130", "image") resp.read() print(resp.url) # conv tests resp = REST.kegg_conv("eco", "ncbi-geneid") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-geneid", "eco") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) # link tests resp = REST.kegg_link("pathway", "hsa") resp.read() print(resp.url) resp = REST.kegg_link("hsa", "pathway") resp.read() print(resp.url) resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url)
] # Dictionary of lists for each pathways' enzymes' ec number pathway_ec = {} # Dictionary of lists of enzymes a bacteria has in KEGG pathway organism_enzymes = {} # Populate dictionary with the organism list for organism in organism_list: organism_enzymes[organism] = {} # Populate dictionary with lists of enzymes in that # pathway for pathway in pathway_list: ec_read = REST.kegg_link('ec', pathway).read() pathway_ec[pathway] = [] for line in ec_read.rstrip().split('\n'): path, ec = line.split('\t') pathway_ec[pathway].append(ec[3:]) count = 0 # Go through each pathway and collect all the orthologs for pathway in pathway_list: print(pathway) orthology_read = REST.kegg_link('ko', pathway).read() # For each ortholog, find the ec number and if # an organism has the gene for it for line in orthology_read.rstrip().split('\n'): pathway, ortholog = line.split('\t') to_ec = REST.kegg_link('ec', ortholog[3:]).read()
def get_kos(pathway): kos = REST.kegg_link('ko', pathway).read() kos = [i.split('\t') for i in kos.split('\n')] return kos
def get_genes(ko): genes = REST.kegg_link('genes', ko).read() genes = [i.split('\t') for i in genes.split('\n')] return genes