def __call__(self): db = Kegg(self._project) allr = [r for r in db.getMappedRPairsReact(self.path_id)] ecore, edisp, eacc, euni = db.getExclusiveRPairsReact(self.path_id) dpangenome = {'all':allr, 'core':ecore, 'dispensable':edisp, 'accessory':eacc, 'unique':euni} return self.path_id, dpangenome
def getPathsReacts(project): kegg = Kegg(project) # Get the pathway - reaction links paths = {} for pR in kegg.getPathReacts(): if pR.path_id in ['path:rn01100','path:rn01110','path:rn01120']: continue if pR.path_id not in paths: paths[pR.path_id] = [] paths[pR.path_id].append(pR.re_id) return paths
except Exception as e: logger.warning('Could not fetch the KEGG DB version (%s)'%str(e)) release = None if proj.isKegg(): if release and proj.kegg < release: logger.warning('A new KEGG DB version is available (%s, was %s)'% (str(release), str(proj.kegg))) fetch = True else: fetch = True if fetch: logger.info('Fetching the whole KEGG metabolic map') if release: logger.info('KEGG DB release %s'%str(release)) kegg = Kegg(project) knet = KeggNet(keeptrying=keeptrying) if not RunThread(knet): return False # Details kegg.addPathways(knet.result.path) logger.info('Added %d Path IDs'%len(knet.result.path)) kegg.addReactions(knet.result.react) logger.info('Added %d Re IDs'%len(knet.result.react)) kegg.addCompounds(knet.result.comp) logger.info('Added %d Co IDs'%len(knet.result.comp)) kegg.addRPairs(knet.result.rpair) logger.info('Added %d RPair IDs'%len(knet.result.rpair)) # Links
def dGenomeExport(project): # Is there something to be exported? organism = Organism(project) if organism.howMany() == 0: logger.info('No genomic data can be exported at this time') return False else: logger.info('Exporting protein data') genome = Genome(project) for org in organism.getAll(): nprots = SeqIO.write([x for x in genome.getRecords(org.org_id)], open('%s.faa'%org.org_id,'w'), 'fasta') logger.info('Saved %d proteins from %s (%s)'%(nprots, org.org_id, '%s.faa'%org.org_id)) logger.info('Exporting Kegg data') logger.info('Exporting KO map data') kegg = Kegg(project) for org in organism.getAll(): fname = 'ko_%s.tsv'%org.org_id fout = open(fname,'w') i = 0 for prot_id, ko_id in kegg.getAllKO(org.org_id): fout.write('%s\t%s\n'%(prot_id, ko_id.lstrip('ko:'))) i += 1 fout.close() if i == 0: os.remove(fname) logger.warning('No KO links available for %s'%org.org_id) else: logger.info('Saved %d KO links for %s (%s)'%(i, org.org_id, fname)) logger.info('Exporting Kegg reactions data') for org in organism.getAll(): fname = 'reactions_%s.tsv'%org.org_id fout = open(fname,'w') i = 0 for prot_id, re_id in kegg.getAllReactions(org.org_id): fout.write('%s\t%s\n'%(prot_id, re_id.lstrip('rn:'))) i += 1 fout.close() if i == 0: os.remove(fname) logger.warning('No Kegg reactions available for %s'%org.org_id) else: logger.info('Saved %d Kegg reactions links for %s (%s)'% (i, org.org_id, fname)) proj = Project(project) if proj.isPanGenome(): logger.info('Exporting pangenome data') dG = genome.getPanGenome() if len(dG) == 0: logger.warning('No pangenome available') else: fname = 'pangenome.tsv' fout = open(fname,'w') for group, prots in dG.iteritems(): for prot in prots: fout.write('%s\t%s\n'%(group,prot)) fout.close() logger.info('Exported %d orthologs (%s)'%(len(dG),fname)) fname = 'pangenome_category.tsv' fout = open(fname,'w') dG = genome.getPanGenomeOrgs() for group in genome.getCore(): fout.write('%s\t%s\t%s\n'%(group.group_id, 'core', '-'.join(dG[group.group_id]))) for group in genome.getAcc(): fout.write('%s\t%s\t%s\n'%(group.group_id, 'accessory', '-'.join(dG[group.group_id]))) for group in genome.getUni(): fout.write('%s\t%s\t%s\n'%(group.group_id, 'unique', '-'.join(dG[group.group_id]))) fout.close() logger.info('Exported orthologs informations (%s)'%fname) return True
def dGenomeStats(project, svg=False, doPrint=True): # Which project are we talking about? kind = dSetKind(project) proj = Project(project) organism = Organism(project) genome = Genome(project) kegg = Kegg(project) if kind == 'single' or kind == 'pangenome': logger.info('Single genomes stats') # Single genomes stats # Header header = '\t'.join( ['ID', 'name', 'description', 'proteome size', 'mapped to kegg', 'KEGG orthology IDs', 'pathways', 'reactions'] ) if doPrint: print header else: logger.info(header) lOrg = [] for org in organism.getAll(): org_id = org.org_id name = org.name if org.name else 'NONE' description = org.description if org.description else 'NONE' prots = genome.howMany(org_id) mapped, ko, react, path = (kegg.howManyMapped(org_id), kegg.howManyKO(org_id), kegg.howManyReactions(org_id), kegg.howManyPathways(org_id)) stats = '\t'.join( [str(x) for x in [org_id, name, description, prots, mapped, ko, path, react]] ) if doPrint: print stats else: logger.info(stats) lOrg.append([org_id, prots, mapped, react]) plotMapBars(lOrg, 'Single genomes statistics', 'single', svg) if proj.isPanGenome(): logger.info('Pangenome stats') # Pangenome stats # Header header = '\t'.join( ['kind', 'size', 'mapped to kegg', 'KEGG orthology IDs', 'pathways', 'reactions'] ) if doPrint: print header else: logger.info(header) core, acc, uni = (genome.getLenCore(), genome.getLenAcc(), genome.getLenUni()) stats = [] stats.append('\t'.join( [str(x) for x in ['core', core, kegg.howManyMapped(pangenome='core'), kegg.howManyKO(pangenome='core'), kegg.howManyPathways(pangenome='core'), kegg.howManyReactions(pangenome='core')]])) stats.append('\t'.join( [str(x) for x in ['accessory', acc, kegg.howManyMapped(pangenome='accessory'), kegg.howManyKO(pangenome='accessory'), kegg.howManyPathways(pangenome='accessory'), kegg.howManyReactions(pangenome='accessory')]])) stats.append('\t'.join( [str(x) for x in ['unique', uni, kegg.howManyMapped(pangenome='unique'), kegg.howManyKO(pangenome='unique'), kegg.howManyPathways(pangenome='unique'), kegg.howManyReactions(pangenome='unique')]])) for stat in stats: if doPrint: print stat else: logger.info(stat) lPanGenome = [['Core', core, kegg.howManyMapped(pangenome='core'), kegg.howManyReactions(pangenome='core')], ['Accessory', acc, kegg.howManyMapped(pangenome='accessory'), kegg.howManyReactions(pangenome='accessory')], ['Unique', uni, kegg.howManyMapped(pangenome='unique'), kegg.howManyReactions(pangenome='unique')]] plotMapBars(lPanGenome, 'PanGenome statistics', 'pangenome_stats', svg) plotPanGenome(core, acc, uni, svg) elif kind == 'mutants': refs = [org.org_id for org in organism.getAll() if not organism.isMutant(org.org_id)] # Header header = '\t'.join( ['ID', 'name', 'description', 'kind', 'proteome size', 'mapped to kegg', 'reactions'] ) for ref_id in refs: logger.info('Mutants of %s stats'%ref_id) if doPrint: print header else: logger.info(header) muts = [x for x in organism.getOrgMutants(ref_id)] lOrg = [] for org_id in [ref_id] + muts: org = organism.getOrg(org_id) name = org.name if org.name else 'NONE' description = org.description if org.description else 'NONE' mkind = org.mkind if org.mkind in ['deletion', 'insertion'] else 'wild-type' if mkind not in ['deletion', 'insertion']: prots = genome.howMany(org_id) elif mkind == 'deletion': prots = genome.howMany(ref_id) - genome.howMany(org_id) elif mkind == 'insertion': prots = genome.howMany(ref_id) + genome.howMany(org_id) mapped, react = (kegg.howManyMapped(org_id), kegg.howManyReactions(org_id)) if mkind == 'deletion': mapped = kegg.howManyMapped(ref_id) - mapped react = kegg.howManyReactions(ref_id) - react elif mkind == 'insertion': mapped += kegg.howManyMapped(ref_id) react += kegg.howManyReactions(ref_id) stats = '\t'.join( [str(x) for x in [org_id, name, description, mkind, prots, mapped, react]] ) if doPrint: print stats else: logger.info(stats) lOrg.append([org_id, prots, mapped, react]) plotMapBars(lOrg, 'Wild-type (%s) and mutants statistics'%ref_id, '%s'%ref_id, svg) else: logger.info('No statistics can be computed at this time') return False return True
def fetchKegg(project): from ductape.kegg.kegg import KeggNet, KeggAPI from ductape.terminal import RunThread # Check if we have to fetch the whole kegg DB fetch = False proj = Project(project) k = KeggAPI() try: k.getDBVersion() release = k.result[1] except Exception as e: logger.warning("Could not fetch the KEGG DB version (%s)" % str(e)) release = None if proj.isKegg(): if release and proj.kegg < release: logger.warning("A new KEGG DB version is available (%s, was %s)" % (str(release), str(proj.kegg))) fetch = True else: fetch = True if fetch: logger.info("Fetching the whole KEGG metabolic map") if release: logger.info("KEGG DB release %s" % str(release)) kegg = Kegg(project) avoid = [kid for kid in kegg.getAllIDs()] knet = KeggNet(avoid=avoid) if not RunThread(knet): return False # Details kegg.addPathways(knet.result.path) logger.info("Added %d Path IDs" % len(knet.result.path)) kegg.addReactions(knet.result.react) logger.info("Added %d Re IDs" % len(knet.result.react)) kegg.addCompounds(knet.result.comp) logger.info("Added %d Co IDs" % len(knet.result.comp)) kegg.addRPairs(knet.result.rpair) logger.info("Added %d RPair IDs" % len(knet.result.rpair)) # Links kegg.addPathReacts(knet.result.pathreact) kegg.addReactComps(knet.result.reactcomp) kegg.addCompReacts(knet.result.compreact) kegg.addPathComps(knet.result.pathcomp) kegg.addReactRPairs(knet.result.reactrpair) kegg.addRPairReacts(knet.result.rpairreact) logger.info("Added Kegg links") # HTML maps kegg.addPathHtml(knet.result.pathmaps) logger.info("Added Kegg maps") # Add the release version if release: proj.setKegg(release) else: logger.info("KEGG db is up-to-date")
def fetchKegg(project, keeptrying=False): from ductape.kegg.kegg import KeggNet, KeggAPI, BaseKegg from ductape.terminal import RunThread # Check if we have to fetch the whole kegg DB fetch = False proj = Project(project) logger.info('Checking connectivity') bk = BaseKegg() try: bk.checkConnection() except Exception as e: logger.error(str(e)) return False k = KeggAPI() try: k.getDBVersion() release = k.result[1] except Exception as e: logger.warning('Could not fetch the KEGG DB version (%s)'%str(e)) release = None if proj.isKegg(): if release and proj.kegg < release: logger.warning('A new KEGG DB version is available (%s, was %s)'% (str(release), str(proj.kegg))) fetch = True else: fetch = True if fetch: logger.info('Fetching the whole KEGG metabolic map') if release: logger.info('KEGG DB release %s'%str(release)) kegg = Kegg(project) knet = KeggNet(keeptrying=keeptrying) if not RunThread(knet): return False # Details kegg.addPathways(knet.result.path) logger.info('Added %d Path IDs'%len(knet.result.path)) kegg.addReactions(knet.result.react) logger.info('Added %d Re IDs'%len(knet.result.react)) kegg.addCompounds(knet.result.comp) logger.info('Added %d Co IDs'%len(knet.result.comp)) kegg.addRPairs(knet.result.rpair) logger.info('Added %d RPair IDs'%len(knet.result.rpair)) # Links kegg.addPathReacts(knet.result.pathreact) kegg.addReactComps(knet.result.reactcomp) kegg.addCompReacts(knet.result.compreact) kegg.addPathComps(knet.result.pathcomp) kegg.addReactRPairs(knet.result.reactrpair) kegg.addRPairReacts(knet.result.rpairreact) logger.info('Added Kegg links') # HTML maps kegg.addPathHtml(knet.result.pathmaps) logger.info('Added Kegg maps') # Add the release version if release: proj.setKegg(release) else: logger.info('KEGG db is up-to-date') return True
def fetchKegg(project, keeptrying=False): from ductape.kegg.kegg import KeggNet, KeggAPI, BaseKegg from ductape.terminal import RunThread # Check if we have to fetch the whole kegg DB fetch = False proj = Project(project) logger.info('Checking connectivity') bk = BaseKegg() try: bk.checkConnection() except Exception as e: logger.error(str(e)) return False k = KeggAPI() try: k.getDBVersion() release = k.result[1] except Exception as e: logger.warning('Could not fetch the KEGG DB version (%s)' % str(e)) release = None if proj.isKegg(): if release and proj.kegg < release: logger.warning('A new KEGG DB version is available (%s, was %s)' % (str(release), str(proj.kegg))) fetch = True else: fetch = True if fetch: logger.info('Fetching the whole KEGG metabolic map') if release: logger.info('KEGG DB release %s' % str(release)) kegg = Kegg(project) knet = KeggNet(keeptrying=keeptrying) if not RunThread(knet): return False # Details kegg.addPathways(knet.result.path) logger.info('Added %d Path IDs' % len(knet.result.path)) kegg.addReactions(knet.result.react) logger.info('Added %d Re IDs' % len(knet.result.react)) kegg.addCompounds(knet.result.comp) logger.info('Added %d Co IDs' % len(knet.result.comp)) kegg.addRPairs(knet.result.rpair) logger.info('Added %d RPair IDs' % len(knet.result.rpair)) # Links kegg.addPathReacts(knet.result.pathreact) kegg.addReactComps(knet.result.reactcomp) kegg.addCompReacts(knet.result.compreact) kegg.addPathComps(knet.result.pathcomp) kegg.addReactRPairs(knet.result.reactrpair) kegg.addRPairReacts(knet.result.rpairreact) logger.info('Added Kegg links') # HTML maps kegg.addPathHtml(knet.result.pathmaps) logger.info('Added Kegg maps') # Add the release version if release: proj.setKegg(release) else: logger.info('KEGG db is up-to-date') return True