def get_genes_from_kegg_pathway(pathway): from bioservices.kegg import KEGG k = KEGG() k.organism = 'hsa' pathway = k.get(pathway) genes = k.parse(pathway)['GENE'] entrez, symbol = zip(*[i.split(' ') for i in genes]) return symbol
def __connect(self, organism_code): """ Purpose: Connect to the KEGG database specified by organism_code. @param organism_code: Use 'hsa' to connect to h**o sapien. @return: n/a """ k = KEGG() k.organism = organism_code return k
#from Bio import Entrez from bioservices.kegg import KEGG import sys k = KEGG() #Entrez.email = "*****@*****.**" #file = open(sys.argv[1], "r") file = open("../data/ids5.txt", "r") result = "" k.organism = "lpl" k.get() #for line in file.readlines(): # file.close() print(result) # for line in file.readlines(): # handle = Entrez.esearch(db="pubmed", term=line) # record = Entrez.read(handle) # ids = record["IdList"] # print(ids)
""" KEGG module example ==================== Histogram of KEGG pathways relations """ ################################################# # from pylab import * # extract all relations from all pathways from bioservices.kegg import KEGG s = KEGG() s.organism = "hsa" # retrieve more than 260 pathways so it takes time max_pathways = 10 results = [s.parse_kgml_pathway(x) for x in s.pathwayIds[0:max_pathways]] relations = [x['relations'] for x in results] # plot hist([len(this) for this in relations], 20) xlabel('number of relations') ylabel('#') title("number of relations per pathways") grid(True)
Author: Daniel Esposito Date: 28/12/2015 Purpose: Wrapper Class for accessing KEGG via the bioservices interface. So far this class implements methods to obtain all pathways and then all reactions from those pathways in edgelist format tagged with the type of reaction. """ from bioservices.kegg import KEGG from predict.parsing import PPI import pandas as pd # ----------------------------------- UTILS -------------------------------- # kegg = KEGG() kegg.organism = 'hsa' reactions_to_exclude = [ 'missing-interaction', 'indirect-effect', 'expression', 'repression', 'compound', 'hidden-compound' ] def uniprot_cmp(x, y): t = {'P':0, 'Q':1, 'O':2} try: x_num = t[x[0]] except KeyError: x_num = ord(x[0])
import re from bioservices.kegg import KEGG # -- KEGG bioservice bioser = KEGG(cache=True) bioser.organism = 'hsa' # Get pathways keggp = {p: bioser.get(p) for p in bioser.pathwayIds} print '[INFO] Pathways fetched' # Get reactions keggr = {r: bioser.get(r) for r in bioser.reactionIds} print '[INFO] Reactions fetched' # Get enzymes kegge = {e: bioser.get(e) for e in bioser.enzymeIds} print '[INFO] Enzymes fetched' # keggc = {c: bioser.get(c) for c in bioser.compoundIds} # print '[INFO] Compounds fetched' # # # Get modules # keggm = {m: bioser.get(m) for m in bioser.moduleIds} # print '[INFO] Modules fetched' # -- KEGG methods def get_pathway_names(pathways=None): pathways_ = pathways if pathways else set(keggp) return {p: re.findall('NAME\s+(.*)?\n', keggp[p])[0].split(' - ')[0] for p in pathways_}
""" KEGG module example ==================== Histogram of KEGG pathways relations """ ################################################# # from pylab import * # extract all relations from all pathways from bioservices.kegg import KEGG s = KEGG() s.organism = "hsa" # retrieve more than 260 pathways so it takes time max_pathways = 10 results = [s.parse_kgml_pathway(x) for x in s.pathwayIds[0:max_pathways]] relations = [x['relations'] for x in results] # plot hist([len(this) for this in relations], 20) xlabel('number of relations') ylabel('#') title("number of relations per pathways") grid(True)