def download_all_kegg_pathways(species_code='mmu'): """ """ pathways_str = REST.kegg_list("pathway", species_code).read() pathways = {p.split('\t')[0]:{'name':p.split('\t')[1]} for p in pathways_str.rstrip().split('\n')} def get_genes_for(pathways): for pathway in pathways: pathways[pathway]['geneid'] = set(); pathways[pathway]['gene_symbol'] = set() pathway_file = REST.kegg_get(pathway).read() # query and read each pathway # iterate through each KEGG pathway file, keeping track of which section # of the file we're in, only read the gene in each pathway current_section = None for line in pathway_file.rstrip().split("\n"): section = line[:12].strip() # section names are within 12 columns if not section == "": current_section = section if current_section == "GENE": try: gene_identifiers, _ = line[12:].split("; ")[:2] geneid, gene_symbol = gene_identifiers.split() pathways[pathway]['geneid'].add(int(geneid)) pathways[pathway]['gene_symbol'].add(gene_symbol) except: pass#print('Discarded:', line); get_genes_for(pathways) return pathways
def load_taxon_id(self, in_code=None): ''' lets try getting it directly from KEGG based on inputted organism 3-letter code a bit hairy but it works! TODO: cache the org_table and gen_table in cache/''' if self.taxon_id is not None: return self.taxon_id import Bio.KEGG.REST as kegg ## requires BioPython 1.65 or later! if in_code is None: in_code = self.tables['run_infos'].organism[0] org_table = kegg.kegg_list('organism').readlines() org_table = ''.join(org_table) buf = cStringIO.StringIO(org_table) org_table = pd.read_table(buf, sep='\t', header=None) #full_org_name = org_table.ix[org_table[1]==in_code][2].values[0] buf.close() kegg_code = org_table.ix[org_table[1] == in_code][0].values[0] gen_table = kegg.kegg_list('genome').readlines() gen_table = ''.join(gen_table) buf = cStringIO.StringIO(gen_table) gen_table = pd.read_table(buf, sep='\t', header=None) buf.close() taxon_id = int( gen_table.ix[gen_table[0] == 'genome:' + kegg_code][1].values[0].split(', ')[2].split('; ')[0]) self.taxon_id = taxon_id return taxon_id
def get_kegg_gene_to_external_map(species): """Maps kegg genes to external gene names. Legacy function for goverlap. Deprecated. """ kegg_list = REST.kegg_list(species) clean_kegg_info = re.compile(r"{}:|\n".format(species)) parse_kegg_info = re.compile(r"[^\t;\n]+") rowdicts = [] for kegg_info in kegg_list: try: kegg_info = kegg_info.decode("utf-8") except AttributeError: pass kegg_info = re.sub(clean_kegg_info, "", kegg_info) kegg_data = re.findall(parse_kegg_info, kegg_info) for gene in kegg_data[1].split(", "): rowdict = {"entrezgene": kegg_data[0], "gene": gene} rowdicts.append(rowdict) return DataFrame.from_dict(rowdicts)
def fetch_entry_list(database): "connects to appropriate KEGG database and fetches list of all entries" all_entries_df = pd.read_csv(REST.kegg_list(database), sep='\t', header=None, names=['id', 'description']) return all_entries_df
def get_kegg(self, K_number): print(K_number) self.cur.execute( """SELECT K_number from kegg_reference where K_number = ?""", (K_number, )) if self.cur.fetchone(): return name = definition = identifier = ec_number = None try: kegg_list = REST.kegg_list(K_number).read() identifier, definition = kegg_list.strip().split("\t", 1)[1:][0].split( ";", 1) definition = definition.rstrip("]").split("[EC:") name = definition[0] ec_number = None if len(definition) == 2: ec_number = definition[1] except Exception as e: sys.stderr.write("\t".join([K_number, str(e)])) finally: self.cur.execute( """INSERT OR IGNORE INTO kegg_reference(K_number, name, identifier, ec_number) VALUES(?,?,?,?)""", (K_number, name, identifier, ec_number)) sys.stderr.write(str(self.cur.lastrowid) + "\n") self.conx.commit()
def create_id_name_dict(db): ## Grab list of ids in db id_name_dict = dict() raw_list = REST.kegg_list(db) id_name_list = [s.split('\t') for s in raw_list.read().splitlines()] for i in id_name_list: id_name_dict[i[0]] = i[1] return id_name_dict
def queryAllPathway(fpathway=None, fpathwayInfo=None, hsa='hsa'): human_pathways = REST.kegg_list("pathway", hsa).read() repair_pathways = [] repair_pathways_info = [] for line in human_pathways.rstrip().split("\n"): entry, description = line.split("\t") entry = entry.split(':')[1] repair_pathways.append(entry) repair_pathways_info.append((entry, description)) if fpathway: saveList(repair_pathways, fpathway) if fpathwayInfo: saveList(repair_pathways_info, fpathwayInfo) return repair_pathways
def _retrieve_lists(dbs): lists = dict() for db in dbs: ## Retreive all entry ids and names id_name_dict = dict() raw_list = REST.kegg_list(db) id_name_list = [s.split('\t') for s in raw_list.read().splitlines()] for i in id_name_list: id_name_dict[i[0]] = i[1] lists[db] = list(id_name_dict.keys()) return lists
def get_pathway_to_definition_map(species): """Map kegg paths to their definition.""" kegg_list = REST.kegg_list("pathway", species) clean_kegg_path = re.compile(r"path:{}|\n".format(species)) rowdicts = [] for kegg_path_line in kegg_list: kegg_info = re.sub(clean_kegg_path, "", kegg_path_line) pathway, definition = kegg_info.split("\t") definition = definition.split(" - ")[0] # Remove species info rowdict = {"kegg_pathway": pathway, "kegg_pathway_definition": definition} rowdicts.append(rowdict) return DataFrame.from_dict(rowdicts)
def get_individual(target): try: individual = REST.kegg_list(target).readlines() target_db = [] name = [] for line in individual: entity = line.strip().split('\t') target_db.append(entity[0]) name.append(entity[1]) df_individual = pd.DataFrame({target : target_db, "name" : name}) df_individual.replace('.+(?<=\:)','', regex=True, inplace=True) df_individual.to_csv("./database/"+'kegg_'+target+'.tsv', sep = '\t', index = None) except HTTPError: sys.exit("ERROR: Please select the available database according to the following URL. (http://www.kegg.jp/kegg/rest/keggapi.html) ") return df_individual
def kegg_rest(type: str, argument: str, optional_argument: str = "", sleep_time: float = .5) -> List[str]: """This function calls Biopython's KEGG REST function and returns the lines as a string list. All empty lines are deleted from the list as they do not contain any information. Arguments ---------- * type: str ~ The KEGG REST action. Can be either 'info', 'get', 'link' or 'list. * argument: str ~ The argument for the KEGG order. * optional_argument: str="" ~ The second argument which is necessary for 'link' and 'list' actions to work correctly. * sleep_time: float=10.0 ~ The time that shall be waited after a REST action is performed. Its default value of 10.0 seconds is in accordance with the NCBI rule that its servers shall not be contacted more often than every 10 seconds. KEGG might have lower or higher required sleep times, but I did not find any specified time step. """ # Execute correct Biotpython KEGG REST function. if type == "info": kegg_data = REST.kegg_info(argument) elif type == "get": kegg_data = REST.kegg_get(argument) elif type == "link": kegg_data = REST.kegg_link(argument, optional_argument) elif type == "list": kegg_data = REST.kegg_list(argument, optional_argument) elif type == "find": kegg_data = REST.kegg_find(argument, optional_argument) # Wait the sleep time doing nothing. time.sleep(sleep_time) # Get one string per line of the KEGG REST result. lines: List[str] = kegg_data.read().split("\n") # Delete empty lines. not_empty_lines: List[str] = [i for i in lines if len(i) > 0] return not_empty_lines
def get_kegg_gene_to_external_map(species): """Maps kegg genes to external gene names.""" kegg_list = REST.kegg_list(species) clean_kegg_info = re.compile(r"{}:|\n".format(species)) parse_kegg_info = re.compile(r"[^\t;\n]+") rowdicts = [] for kegg_info in kegg_list: kegg_info = re.sub(clean_kegg_info, "", kegg_info) kegg_data = re.findall(parse_kegg_info, kegg_info) for gene in kegg_data[1].split(", "): rowdict = {"kegg_gene": kegg_data[0], "gene": gene} rowdicts.append(rowdict) return DataFrame.from_dict(rowdicts)
def save_all_kegg_pathway_files(paths): """Uses the KEGG REST API to find and save all pathway data files for each species in the input dictionary. Args: paths (dict of str:str): A mapping between strings referencing species and paths to the output directory for each. """ for species, path in paths.items(): pathways = REST.kegg_list("pathway", species) for pathway in pathways: # Get the pathway file contents through the REST API. pathway_id = pathway.split()[0] pathway_file = REST.kegg_get(dbentries=pathway_id).read() # Where should the contents of the obtained file be written? pathway_id_str = pathway_id.replace(":", "_") filename = os.path.join(path, "{}.txt".format(pathway_id_str)) if not os.path.exists(path): os.makedirs(path) with open(filename, "w") as outfile: outfile.write(pathway_file)
def main(): # 'hsa', 'dme' for pwid in ['dre', 'cel', 'sce', 'mmu']: human_pathways = REST.kegg_list('pathway', pwid).read() pathways = [line for line in human_pathways.strip().split('\n')] # Get the genes for pathways and add them to a list pathways_dict = defaultdict(list) for pathway in pathways: entry, description = pathway.split('\t') pathway_file = REST.kegg_get( entry).read() # query and read each pathway # iterate through each KEGG pathway file, keeping track of which section # of the file we're in, only read the gene in each pathway current_section = None for line in pathway_file.rstrip().split('\n'): section = line[:12].strip( ) # section names are within 12 columns if not section == '': current_section = section if current_section == 'GENE': if len(line[12:].split('; ')) > 1: gene_identifiers, *gene_description = line[12:].split( '; ') gene_id, gene_symbol = gene_identifiers.split() pathways_dict[description.split(' - ')[0]].append( gene_symbol) with open('in/{0}.gmt'.format(pwid), 'w') as pw_file: out_pw = [ '{0}\t\t{1}\n'.format(desc, '\t'.join(sorted(pathways_dict[desc]))) for desc in sorted(pathways_dict) if len(pathways_dict[desc]) > 4 ] pw_file.writelines(out_pw) return None
def find_kegg(genes): count=0 lpl_pathways = REST.kegg_list("pathway", "lpl").read() entries = [] for line in lpl_pathways.rstrip().split("\n"): entry, description = line.split("\t") #print(line) entries.append(entry) print(entries) pathway = {} for i in genes: for entry in entries: count+=1 get = REST.kegg_get(entry, option=None) get_read = get.readlines() if any(i in s for s in get_read): print(entry) print(i) #checkt of j als een k in de dictionary staat, maakt een lijst van alle values van de key wanneer dit zo is en updat de key met de ljst+ nieuwe gen id) if i in pathway: k = pathway.get(i) #print(k) m = [] if isinstance(k, list): for l in k: m.append(l) else: m.append(k) if entry not in m: m.append(entry) pathway.update({i:m}) #voegt j als een nieuwe key toe aan de dictionary else: pathway[i] = [] pathway.update({i:entry}) print(pathway, count) print(pathway) return pathway
def get_pathway_to_definition_map(species): """Map kegg paths to their definition.""" kegg_list = REST.kegg_list("pathway", species) clean_kegg_path = re.compile(r"path:{}|\n".format(species)) rowdicts = [] for kegg_path_line in kegg_list: try: kegg_path_line = kegg_path_line.decode("utf-8") except AttributeError: pass kegg_info = re.sub(clean_kegg_path, "", kegg_path_line) pathway, definition = kegg_info.split("\t") definition = definition.split(" - ")[0] # Remove species info rowdict = {"kegg_pathway": pathway, "kegg_pathway_definition": definition} rowdicts.append(rowdict) return DataFrame.from_dict(rowdicts)
def update_files(base_dir="/data/databases/kegg/"): for db in ["pathway", "ko", "cpd", "brite"]: with open(base_dir + db + ".txt", "w") as h: data = REST.kegg_list(db).read() h.write(data) # wget http://www.kegg.jp/kegg-bin/download_htext?htext=br08901.keg&format=json&filedir= # wget http://www.kegg.jp/kegg-bin/download_htext?htext=br08001.keg&format=json&filedir= L = list(open(base_dir + "pathway.txt")) for pathway in tqdm(L): pw = "ko" + pathway.split()[0].split(":map")[1] kgmlpath = base_dir + "ko/" + pw + ".kgml" if not os.path.exists(kgmlpath): with open(kgmlpath, "w") as h: try: data = REST.kegg_get(pw, option="kgml").read() h.write(data) sleep(1) except: pass
def get_network(org, opt='ec'): # Creating a Parser Object graph = k.KeggParser() # Store pathways that doesn't have EC numbers error = [] # Getting organism list1 = r.kegg_list('pathway', org).read() list1 = list1.split('\n') list1.remove('') print('Retrieving data from KEGG PATHWAY database. ' + str(time.ctime())) # For each path getting enzymes and reactions for path in list1: try: path = path.split('\t') # print (path[0]) graph.genes = parse.read(r.kegg_get(path[0], 'kgml')) graph.genes_default = parse.read( r.kegg_get("path:" + opt + path[0][-5:], 'kgml')) graph.path = path except Exception: error.append(path[0]) continue # print ("getting relations") graph.get_relations() # print ("getting reaction") graph.get_reactions() # print ('Unretrieved data',error) graph.ref = opt genes = 0 for i in graph.ec_org_target.items(): genes += len(i[1].split()) # print (graph.ec_org_target.keys()) # Building Graph graph.building_graph(2) return (graph)
def amount_pathway_genes(): """ Function to measure the amount repair genes in a human pathway. :return: """ human_pathways = REST.kegg_list("pathway", "hsa").read() # Filter all human pathways for repair pathways repair_pathways = [] for line in human_pathways.rstrip().split("\n"): entry, description = line.split("\t") if "repair" in description: repair_pathways.append(entry) # Get the genes for pathways and add them to a list repair_genes = [] for pathway in repair_pathways: pathway_file = REST.kegg_get(pathway).read() # query and read each pathway # iterate through each KEGG pathway file, keeping track of which section # of the file we're in, only read the gene in each pathway current_section = None for line in pathway_file.rstrip().split("\n"): section = line[:12].strip() # section names are within 12 columns if not section == "": current_section = section if current_section == "GENE": gene_identifiers, gene_description = line[12:].split("; ") gene_id, gene_symbol = gene_identifiers.split() if not gene_symbol in repair_genes: repair_genes.append(gene_symbol) print("There are %d repair pathways and %d repair genes. The genes are:" % \ (len(repair_pathways), len(repair_genes))) print(", ".join(repair_genes))
def t_KEGG_Query(): """Tests Bio.KEGG API Wrapper""" print("Testing Bio.KEGG.query\n\n") # info tests resp = REST.kegg_info("kegg") resp.read() print(resp.url) resp = REST.kegg_info("pathway") resp.read() print(resp.url) # list tests resp = REST.kegg_list("pathway") resp.read() print(resp.url) resp = REST.kegg_list("pathway", "hsa") resp.read() print(resp.url) resp = REST.kegg_list("organism") resp.read() print(resp.url) resp = REST.kegg_list("hsa") resp.read() print(resp.url) resp = REST.kegg_list("T01001") resp.read() print(resp.url) resp = REST.kegg_list("hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_list(["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) resp = REST.kegg_list("cpd:C01290+gl:G00092") resp.read() print(resp.url) resp = REST.kegg_list(["cpd:C01290", "gl:G00092"]) resp.read() print(resp.url) resp = REST.kegg_list("C01290+G00092") resp.read() print(resp.url) resp = REST.kegg_list(["C01290", "G00092"]) resp.read() print(resp.url) # find tests resp = REST.kegg_find("genes", "shiga+toxin") resp.read() print(resp.url) resp = REST.kegg_find("genes", ["shiga", "toxin"]) resp.read() print(resp.url) resp = REST.kegg_find("compound", "C7H10O5", "formula") resp.read() print(resp.url) resp = REST.kegg_find("compound", "O5C7", "formula") resp.read() print(resp.url) resp = REST.kegg_find("compound", "174.05", "exact_mass") resp.read() print(resp.url) resp = REST.kegg_find("compound", "300-310", "mol_weight") resp.read() print(resp.url) # get tests resp = REST.kegg_get("cpd:C01290+gl:G00092") resp.read() print(resp.url) resp = REST.kegg_get(["cpd:C01290", "gl:G00092"]) resp.read() print(resp.url) resp = REST.kegg_get("C01290+G00092") resp.read() print(resp.url) resp = REST.kegg_get(["C01290", "G00092"]) resp.read() print(resp.url) resp = REST.kegg_get("hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_get(["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq") resp.read() print(resp.url) resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq") resp.read() print(resp.url) resp = REST.kegg_get("hsa05130", "image") resp.read() print(resp.url) # conv tests resp = REST.kegg_conv("eco", "ncbi-geneid") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-geneid", "eco") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url) # link tests resp = REST.kegg_link("pathway", "hsa") resp.read() print(resp.url) resp = REST.kegg_link("hsa", "pathway") resp.read() print(resp.url) resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100") resp.read() print(resp.url) resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"]) resp.read() print(resp.url)
#!/usr/bin/env python # encoding: utf-8 """ @author: zhuhz @file: keggAPI_v1.py @time: 2020/6/17 9:13 """ from Bio.KEGG import REST orthologies = REST.kegg_list('orthology').read() res = 'ko' + '\t' + 'ko name' + '\t' + 'ko des' + '\t' + 'map' + '\t' + 'map name' + '\t' + 'map class' + '\n' for orth in orthologies.rstrip().split('\n'): ko_entry, ko_des = orth.strip().split('\t') ko_id = ko_entry.strip().split(':')[1] try: ko_name, ko_func = ko_des.strip().split('; ', 1) except ValueError: ko_name = ko_des ko_func = ko_des ko_info = REST.kegg_get(ko_entry).read() current_section = None for line in ko_info.rstrip().split('\n'): section = line[:12].strip() if not section == '': current_section = section if current_section == 'PATHWAY': maps = line[12:] for map in maps.rstrip().split('\n'): map_entry, map_name = (
def get_organisms(): organisms = REST.kegg_list('organism').read() organisms = [i.split('\t') for i in organisms.split('\n')] return organisms
#!/usr/bin/env python # encoding: utf-8 """ @author: zhuhz @file: keggAPI.py @time: 2020/6/16 13:58 """ from Bio.KEGG import REST pathways = REST.kegg_list('pathway').read() # print(type(pathways)) res = 'ko' + '\t' + 'ko name' + '\t' + 'ko des' + '\t' + 'module' + '\t' + 'module name' + '\t' + 'map' + '\t' + 'map name' + '\t' + 'map class' + '\n' for pathway in pathways.rstrip().split('\n'): map_entry, map_description = pathway.split('\t') # print(map_entry) map = REST.kegg_get(map_entry).read() # print(type(map)) current_section = None for line in map.rstrip().split('\n'): section = line[:12].strip() if not section == '': current_section = section if current_section == 'NAME': map_name = line[12:] elif current_section == 'CLASS': map_class = line[12:] # elif current_section == 'PATHWAY_MAP': # map_des = line[22:] elif current_section == 'MODULE':
def get_pathways(organism): pathways = REST.kegg_list('pathway', organism).read() pathways = [i.split('\t')[0] for i in pathways.split('\n')] return pathways
from Bio.KEGG import REST drug = REST.kegg_list('drug').read() # 가져올 수 있는 것: brite, pathway, genome(gene은 안됨), module, enzyme, glycan, compound, reaction, network, drug, disease drug_list = [] drug_name=input("찾고 싶은 약과 관련된 것을 입력해주세요: ") # 찾고 싶은 약과 관련된 것을 입력받아 검색한다(예: vaccine) for line in drug.rstrip().split("\n"): entry, description = line.split("\t") if drug_name in description: drug_list.append(description) print(drug_list)
from Bio.KEGG import REST from bioservices import Reactome import csv from input import inp #gene_list = ['POLD1', 'POLE3', 'ABO', 'TP53'] gene_list = inp specie = "hsa" human_pathways = REST.kegg_list("pathway", specie).read() human_pathways_dict = {} repair_pathways = [] repair_pathways_dict = {} for line in human_pathways.rstrip().split("\n"): entry, description = line.split("\t") human_pathways_dict[entry] = description if "DNA" in description: repair_pathways.append(entry) repair_pathways_dict[entry] = description rejected = [] gene_dict = dict((gene,[]) for gene in gene_list) i = 0 len_ = len(human_pathways_dict.keys()) for pathway in human_pathways_dict.keys(): i += 1 print str(i) + ' // ' + str(len_) #print pathway
def KEGG(input1, input2): # Perform the query result = REST.kegg_info("kegg").read() # Print overview if input1 == "info" and input2 == "alt": return print(result) # Get all entries in the PATHWAY database as a dataframe elif input1 == "pathway_overview" and input2 == "alle": result = REST.kegg_list("pathway").read() return to_df(result) #Print alle biosynteseveje elif input1 == "print_pathway": if input2 == "alle": result = REST.kegg_get("map01100", "image").read() img = Image(result, width=1500, height=1000) else: result = REST.kegg_get(input2, "image").read() img = Image(result) return img #Find the compund vanillin elif input1 == "find_molekyle" and input2 != None: result = REST.kegg_find("compound", input2).read() #cpd:C00755 return print(result) elif input1 == "info_molekyle" and input2 != None: #cpd:C00755 # Get the entry information for vanillin result = REST.kegg_get(input2).read() return print(result) # Display molekylær struktur for cpd:C00051 (vanillin) elif input1 == "molekyle billede" and input2 != None: result = REST.kegg_get(input2, "image").read() #"cpd:C00755" return Image(result) elif input1 == "Enzyme molekyle" and input2 != None: result = REST.kegg_find("enzyme", input2).read() return to_df(result) #Enzym delen from Bio.KEGG import Enzyme request = REST.kegg_get(input1) records = Enzyme.parse(request) record = list(records)[0] if input2 == "reaction": return record.reaction elif input2 == "classname": return record.classname elif input2 == "genes": genes = list() for g in record.genes: gene_id, gene_symbol = g genes.append(gene_id) return genes else: print("Du har indskrevet nogget der ikke er gældende. Prøv igen")
#13.10.KEGG.py from Bio.KEGG import REST human_pathways = REST.kegg_list("pathway", "hsa").read() hepatitis_pathways = [] for line in human_pathways.rstrip().split("\n"): entry, description = line.split("\t") if "hepatitis" in description.lower(): hepatitis_pathways.append(entry) print(entry, description) print(hepatitis_pathways) hepatitis_genes = [] for pathway in hepatitis_pathways: pathway_file = REST.kegg_get(pathway).read() current_section = None for line in pathway_file.rstrip().split("\n"): section = line[:12].strip() if not section == "": current_section = section if current_section == "GENE": gene_identifiers, gene_description = line[12:].split("; ") gene_id, gene_symbol = gene_identifiers.split() if not gene_symbol in hepatitis_genes: hepatitis_genes.append(gene_symbol) print(
analysis_folder=os.path.dirname(__file__) KEGG_data_folder=os.path.join(analysis_folder, 'KEGG_data/') import urllib.request #Download KEGG onthology if not os.path.isfile(os.path.join(KEGG_data_folder,'ko00001.json')): url='https://www.genome.jp/kegg-bin/download_htext?htext=ko00001&format=json&filedir=' urllib.request.urlretrieve(url, os.path.join(KEGG_data_folder,'ko00001.json')) #get all human pathways hsa_path_list=KEGG_REST.kegg_list('pathway','hsa') identifiers=[] for line in hsa_path_list: identifiers.append(line.partition('\t')[0][5:]) for identifier in identifiers: KGML_handle=KEGG_REST.kegg_get(identifier,option='kgml') file=open(os.path.join(KEGG_data_folder,identifier+'.kgml'),'w') file.write(KGML_handle.read()) file.close() #%% parse_pathways=1 if parse_pathways:
from Bio.KEGG import REST from bioservices import Reactome import csv from input import inp #gene_list = ['POLD1', 'POLE3', 'ABO', 'TP53'] gene_list = inp specie = "hsa" human_pathways = REST.kegg_list("pathway", specie).read() human_pathways_dict = {} repair_pathways = [] repair_pathways_dict = {} for line in human_pathways.rstrip().split("\n"): entry, description = line.split("\t") human_pathways_dict[entry] = description if "DNA" in description: repair_pathways.append(entry) repair_pathways_dict[entry] = description rejected = [] gene_dict = dict((gene, []) for gene in gene_list) i = 0 len_ = len(human_pathways_dict.keys()) for pathway in human_pathways_dict.keys(): i += 1 print str(i) + ' // ' + str(len_) #print pathway pathway_file = REST.kegg_get(pathway).read() current_section = None
from Bio.Seq import Seq from Bio.KEGG import Enzyme from Bio.KEGG import REST from Bio.KEGG.KGML import KGML_parser from Bio.KEGG import Map #request = REST.kegg_get("ec:5.4.2.2") #open("ec_5.4.2.2.txt",'w').write(request.read()) #records = Enzyme.parse(open("ec_5.4.2.2.txt")) #record = list(records)[0] #print(record.classname) #print(record.entry) organisms = REST.kegg_list("organism").read() organismlist = [] for line in organisms.rstrip().split("\n"): #print(line) code = line.split("\t")[1] organismlist.append(code) #print(organismlist) #parser = KGML_parser.KGMLparser() #open("human_map.xml",'w').write(REST.kegg_get("hsa05130",option="kgml").read()) human_map = KGML_parser.read(REST.kegg_get("hsa01100",option="kgml")) cpds = human_map.compounds for cpd in cpds: print(cpd.name) graphics = cpd.graphics for graphic in graphics: print(graphic.x) rxns = human_map.reaction_entries
from Bio.KEGG import REST import urllib.request import re import sys ##### TO DO ############# #### Get Organism ID from organims name ##### result = REST.kegg_list("pathway", "aga").read() #print(result.split("\t")[0]) pathw_ids = [] pathw = [] for item in result.split("\t"): #print(item) #print("*") tmp2 = item.split("\n") if len(tmp2) > 1: if tmp2[1] != "": pathw.append(tmp2[0]) pathw_ids.append(tmp2[1]) #print(pathw) for pathway in pathw_ids: #pathway = 'hsa00010' # glycolysis url = "http://rest.kegg.jp/get/" + pathway with urllib.request.urlopen(url) as f: lines = f.read().decode('utf-8').splitlines() want = 0 for line in lines: fields = line.split()
def list(db, org="hsa"): # Create a static method which invokes the REST.kegg_list function from the Biopython module. # Return the result as a bytes string return str.encode(REST.kegg_list(db, org).read())