def output(self, outputfile): #this creates PGDB object associated with meta(MetaCyc) meta = pythoncyc.select_organism('meta') #prints pathways of compound specified #print meta.pathways_of_compound('1,2-dilinoleoyl-sn-glycero-3-phosphocholine') #print meta.pathways_of_compound('sucrose') mapping = dict() for pway in meta.all_pathways(): print("PATHWAY: ", pway) compounds = meta.compounds_of_pathway(pway) for compound in compounds: if (compound not in mapping): myframe = PFrame(compound, meta, getFrameData=True) #print(myframe.__dict__) #print('|HMDB|' in myframe.__dict__['dblinks']) #x = input() if ('dblinks' in myframe.__dict__ and '|HMDB|' in myframe.__dict__['dblinks']): mapping[compound] = myframe.__dict__['dblinks'][ '|HMDB|'][0] print("WRITING FILE...") outfile = open(outputfile, 'w') for compound in mapping.keys(): outfile.write(compound + "\t" + mapping[compound] + "\n")
def connect_to_ptools(ptools_org): try: cyc = pythoncyc.select_organism(ptools_org) except: print "Error: Could not connect to ptools" exit() return cyc
def main(): (opts, args) = parser.parse_args() check_arguments(opts) # print available organisms if opts.list_organisms: print "Listing Availalbe PGDBs:" org_list = pythoncyc.all_orgids() org_list = map(clean_ptools_output, org_list) for organism in org_list: print organism exit() # connect to ePGDB global cyc try: cyc = pythoncyc.select_organism(opts.ptools_org) except: print "Could not connect to Pathway Tools. Run pathway-tools/pathway-tools -lisp -python-local-only" exit() # create compatible pathway file for MicroDex if opts.micro_dex: create_micro_dex_file(cyc, opts.output_file) exit() if os.path.exists("metabolite_count.pk"): fh = open("metabolite_count.pk", 'r') metabolite_count = pickle.load(fh) fh.close() sorted_metabolite_count = sorted(metabolite_count.items(), key=operator.itemgetter(1), reverse=True) for metabolite in sorted_metabolite_count[0:500]: if metabolite[1] >= 1000: excluded_substrates[metabolite[0]] = metabolite[1] print "Constructing Graph" construct_graph(cyc) print len(edges), "edges" print len(verticies), "verticies" print len(substrate_to_verticies), "substrate_to_verticies" print id_index, "id_index" print "Done" exit()
def input(self, inputfile): params = open(inputfile, 'r') self.parameters = dict() for line in params: contents = line.strip().split('\t') self.parameters[contents[0]] = contents[1] config.set_host_name(self.parameters['hostname']) self.meta = pythoncyc.select_organism('meta') abundancefile = open(self.parameters['csvfile'], 'r') mappingfile = open(self.parameters['mapping'], 'r') junk = mappingfile.readline() mapping = dict() self.reversemapping = dict() for line in mappingfile: myline = line.strip() contents = myline.split('\t') mapping["X" + contents[0]] = contents[1] if (contents[1] != "NOTFOUND"): self.reversemapping[contents[1]] = "X" + contents[0] firstline = abundancefile.readline().strip() entries = firstline.split(',') entries.remove(entries[0]) self.microbes = dict() self.metabolites = [] #print(entries) for entry in entries: # Microbe entry2 = entry[1:len(entry) - 1] if entry2[len(entry2) - 3] == '.' or entry2 == "Unassigned": # Not classified as lowest level if entry2.find('.') != len(entry2) - 3: microbe = entry2[2:len(entry2) - 3] else: microbe = entry2[0:len(entry2) - 3] self.microbes[microbe] = entry # Metabolite else: metabolite = mapping[entry2] if (metabolite != "NOTFOUND"): self.metabolites.append(metabolite)
# needs: pathway-tools -python-local-only-non-strict -lisp import pythoncyc import sys import re import pdb meta = pythoncyc.select_organism('meta') pythoncyc.sendQueryToPTools("(select-organism :org-id 'META)") def getReaInfos(pwy): meta.allpwy = meta.all_pathways() #pwy = "ANAEROFRUCAT-PWY" #pwy = "ALL-CHORISMATE-PWY" if meta[pwy] == None: print(pwy, "Pathway does not exist") return ([[], []]) rea_list = [] if meta[pwy].key_reactions != None: keyRea_list = meta[pwy].key_reactions else: keyRea_list = [] check_list = meta[pwy]["reaction_list"] if meta[pwy].sub_pathways != None: check_list.extend( [p for p in meta[pwy].sub_pathways if p not in check_list]) isSuperPwy = False while len(check_list) > 0: ptmp = check_list.pop()
from pprint import pprint, pformat import pythoncyc from pythoncyc.PToolsFrame import PFrame pgdb = dict() pgdb['tigr4'] = pythoncyc.select_organism('spne170187') pgdb['d39'] = pythoncyc.select_organism('spne373153') pgdb['t19f'] = pythoncyc.select_organism('spne487213') def show_gpr(db, rxn): rframe = PFrame(rxn, db) if rframe is not None and rframe.enzymatic_reaction is not None: for er in rframe.enzymatic_reaction: erframe = PFrame(er, db) print " " + er pframe = PFrame(erframe.enzyme, db) genes = [PFrame(g, db).accession_1 for g in pframe.gene] print " " + erframe.enzyme + ' -- ' + " ".join(genes) def show_gprs(rxn): for db in pgdb: print db show_gpr(pgdb[db], rxn) def build_python_command(command, *args, **kwargs): args_str = [pformat(arg) for arg in args]
def PGDB_select(name): meta = pythoncyc.select_organism(name) return (meta)
def get_pathways_from_biocyc(model, csv_fn, biocyc_subsystem_fn, db="meta", add_annotations_to_model=True): """ ** Warning: Must be run using python 2.7 and PathwayTools running in the background ** This function use pythoncyc to extract pathway information from the BioCyc database based on the BioCyc annotation of each reaction. The result is a table the rows are reactions (IDs) and the columns are the biocyc pathway annotations. Because BioCyC use very small pathways we use the parent pathways as annotations. Some key steps are required to run this function: - PathwayTools must be running in the background () - You need the pythoncyc package (https://github.com/latendre/PythonCyc), more info at https://bioinformatics.ai.sri.com/ptools/pythoncyc.html - Pythoncyc only works with python 2.7 # Parameters - model: SBML-model (imported with cobrapy) - csv_fn: Where to store the created csv-file - biocyc_subsystem_fn: This is in general the All_pathways_of_MetaCyC.txt file, but can be replaced by similar csv-files. - db: Which db in BioCyc to use. - add_annotations_to_model: A flag used to turn on/off writing the biocyc annotations to the model reactions """ import sys assert sys.version_info[0] < 3, ("Can't use PythonCyc with python 3") import pythoncyc # Add this import here, so it is only imported if used df_subsystem = pd.read_csv(biocyc_subsystem_fn, sep="\t", index_col=0) biocyc_db = pythoncyc.select_organism(db) pathway_list = [] for r in model.reactions[::2]: print(r.id, end="\t") try: biocyc = r.annotation["biocyc"] except KeyError: print() continue # Fix erroneous annotations if biocyc[:5] == "META:": biocyc = biocyc[5:] r_db = biocyc_db[biocyc] try: pathways = r_db["in_pathway"] except TypeError: print(biocyc, " is not in sco-db") continue if isinstance(pathways, list): sub1_list = [] sub2_list = [] for pathway in pathways: print(pathway, end=", ") pwy = pathway.replace("|", "") try: sub1 = df_subsystem.loc[pwy, "Subsystem 1"].split( "//")[0].strip() sub2 = df_subsystem.loc[pwy, "Subsystem 2"].split( "//")[0].strip() except KeyError: pass else: sub1_list.append(sub1) sub2_list.append(sub2) pathway_list.append([ r.id, ";".join(pathway), ";".join(list(set(sub1_list))), ";".join(list(set(sub2_list))) ]) if len(sub1_list) and add_annotations_to_model: r.annotation["biocyc.subsystem1"] = list(set(sub1_list)) r.annotation["biocyc.subsystem2"] = list(set(sub2_list)) print(sub1_list, sub2_list) else: print("No pathways given for ", biocyc) df = pd.DataFrame(pathway_list) df.columns = ["Reaction ID", "Pathway", "Subsystem 1", "Subsystem 2"] df.to_csv(csv_fn) return model
def __init__(self): self.pgdb = pythoncyc.select_organism('eco')
""" Note that this script can only be run if BioCyc's pathway tools is installed locally and is being served via the following command: ./pathway-tools -lisp -python """ #%% import numpy as np import pandas as pd import tqdm as tqdm import pythoncyc annotations = pd.read_csv('../../../data/ecoli_genelist_master.csv') data = pd.read_csv('../../../data/compiled_absolute_measurements.csv') # %% ecocyc = pythoncyc.select_organism('ECOLI') prot_cplx = list(ecocyc.all_protein_complexes()) for frame in ecocyc['Protein-RNA-Complexes']: prot_cplx.append(frame['frameid']) #%% # Instantiate the dataframe. _df = pd.DataFrame([]) for c in tqdm.tqdm(prot_cplx): components, counts = ecocyc.monomers_of_protein(c) if (components is not None) & (counts is not None): for prot, sub in zip(components, counts): if ecocyc[prot].abbrev_name is None: if ecocyc[prot].synonyms is None: gene = prot.split('-')[0][1:]
genes = root.findall("./Gene") for gene in genes: geneID = gene.attrib['frameid'] geneName = gene.findall("common-name")[0].text print "Gene: " + geneName + ", GeneID: " + geneID getPromoter(geneID) # this function pulls the promoterIDs associated with the geneID passed into it def getPromoter(geneID): resp = requests.get("http://ecocyc.org/apixml?fn=transcription-units-of-gene&id=ECOLI:{}".format(geneID)) root=ET.fromstring(resp.text) #print(resp.text) promoters=root.findall("./Transcription-Unit/component/Promoter") #print promoters for promoter in promoters: pID = promoter.attrib['frameid'] #regulon = ecoli.containing_chromosome(pID) print "promoter: ",pID #print(regulon) ecoli = pc.select_organism("ECOLI") pathwayFromCompound("SER") #genesFromPathway("HISTSYN-PWY") # In[ ]:
def selectOrganism(code): if checkPathwayTools(verbose = False): return pythoncyc.select_organism(code)
def writecsv(data, ofile, delim='\t'): f = open(ofile, 'wb') ofile = csv.writer( f, delimiter=delim ) # dialect='excel',delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL for row in data: #row = [item.encode("utf-8") if isinstance(item, unicode) else str(item) for item in row] ofile.writerow(row) f.close() import pythoncyc as pc ecoli = pc.select_organism('ECOLI') allcofs = ecoli.all_cofactors() allcofs = [cf for cf in allcofs if 'CPD' in cf] cfnames = [ecoli[cf].common_name for cf in allcofs] cofs = ['|PYRIDOXAL_PHOSPHATE|'] rxns = ecoli['|PYRIDOXAL_PHOSPHATE|'].cofactors_of #cmplx=list(set([ecoli[e].enzyme for e in enzs])) genes = [] for r in rxns: