def dGenomeExport(project): # Is there something to be exported? organism = Organism(project) if organism.howMany() == 0: logger.info('No genomic data can be exported at this time') return False else: logger.info('Exporting protein data') genome = Genome(project) for org in organism.getAll(): nprots = SeqIO.write([x for x in genome.getRecords(org.org_id)], open('%s.faa'%org.org_id,'w'), 'fasta') logger.info('Saved %d proteins from %s (%s)'%(nprots, org.org_id, '%s.faa'%org.org_id)) logger.info('Exporting Kegg data') logger.info('Exporting KO map data') kegg = Kegg(project) for org in organism.getAll(): fname = 'ko_%s.tsv'%org.org_id fout = open(fname,'w') i = 0 for prot_id, ko_id in kegg.getAllKO(org.org_id): fout.write('%s\t%s\n'%(prot_id, ko_id.lstrip('ko:'))) i += 1 fout.close() if i == 0: os.remove(fname) logger.warning('No KO links available for %s'%org.org_id) else: logger.info('Saved %d KO links for %s (%s)'%(i, org.org_id, fname)) logger.info('Exporting Kegg reactions data') for org in organism.getAll(): fname = 'reactions_%s.tsv'%org.org_id fout = open(fname,'w') i = 0 for prot_id, re_id in kegg.getAllReactions(org.org_id): fout.write('%s\t%s\n'%(prot_id, re_id.lstrip('rn:'))) i += 1 fout.close() if i == 0: os.remove(fname) logger.warning('No Kegg reactions available for %s'%org.org_id) else: logger.info('Saved %d Kegg reactions links for %s (%s)'% (i, org.org_id, fname)) proj = Project(project) if proj.isPanGenome(): logger.info('Exporting pangenome data') dG = genome.getPanGenome() if len(dG) == 0: logger.warning('No pangenome available') else: fname = 'pangenome.tsv' fout = open(fname,'w') for group, prots in dG.iteritems(): for prot in prots: fout.write('%s\t%s\n'%(group,prot)) fout.close() logger.info('Exported %d orthologs (%s)'%(len(dG),fname)) fname = 'pangenome_category.tsv' fout = open(fname,'w') dG = genome.getPanGenomeOrgs() for group in genome.getCore(): fout.write('%s\t%s\t%s\n'%(group.group_id, 'core', '-'.join(dG[group.group_id]))) for group in genome.getAcc(): fout.write('%s\t%s\t%s\n'%(group.group_id, 'accessory', '-'.join(dG[group.group_id]))) for group in genome.getUni(): fout.write('%s\t%s\t%s\n'%(group.group_id, 'unique', '-'.join(dG[group.group_id]))) fout.close() logger.info('Exported orthologs informations (%s)'%fname) return True
def dGenomeStats(project, svg=False, doPrint=True): # Which project are we talking about? kind = dSetKind(project) proj = Project(project) organism = Organism(project) genome = Genome(project) kegg = Kegg(project) if kind == 'single' or kind == 'pangenome': logger.info('Single genomes stats') # Single genomes stats # Header header = '\t'.join( ['ID', 'name', 'description', 'proteome size', 'mapped to kegg', 'KEGG orthology IDs', 'pathways', 'reactions'] ) if doPrint: print header else: logger.info(header) lOrg = [] for org in organism.getAll(): org_id = org.org_id name = org.name if org.name else 'NONE' description = org.description if org.description else 'NONE' prots = genome.howMany(org_id) mapped, ko, react, path = (kegg.howManyMapped(org_id), kegg.howManyKO(org_id), kegg.howManyReactions(org_id), kegg.howManyPathways(org_id)) stats = '\t'.join( [str(x) for x in [org_id, name, description, prots, mapped, ko, path, react]] ) if doPrint: print stats else: logger.info(stats) lOrg.append([org_id, prots, mapped, react]) plotMapBars(lOrg, 'Single genomes statistics', 'single', svg) if proj.isPanGenome(): logger.info('Pangenome stats') # Pangenome stats # Header header = '\t'.join( ['kind', 'size', 'mapped to kegg', 'KEGG orthology IDs', 'pathways', 'reactions'] ) if doPrint: print header else: logger.info(header) core, acc, uni = (genome.getLenCore(), genome.getLenAcc(), genome.getLenUni()) stats = [] stats.append('\t'.join( [str(x) for x in ['core', core, kegg.howManyMapped(pangenome='core'), kegg.howManyKO(pangenome='core'), kegg.howManyPathways(pangenome='core'), kegg.howManyReactions(pangenome='core')]])) stats.append('\t'.join( [str(x) for x in ['accessory', acc, kegg.howManyMapped(pangenome='accessory'), kegg.howManyKO(pangenome='accessory'), kegg.howManyPathways(pangenome='accessory'), kegg.howManyReactions(pangenome='accessory')]])) stats.append('\t'.join( [str(x) for x in ['unique', uni, kegg.howManyMapped(pangenome='unique'), kegg.howManyKO(pangenome='unique'), kegg.howManyPathways(pangenome='unique'), kegg.howManyReactions(pangenome='unique')]])) for stat in stats: if doPrint: print stat else: logger.info(stat) lPanGenome = [['Core', core, kegg.howManyMapped(pangenome='core'), kegg.howManyReactions(pangenome='core')], ['Accessory', acc, kegg.howManyMapped(pangenome='accessory'), kegg.howManyReactions(pangenome='accessory')], ['Unique', uni, kegg.howManyMapped(pangenome='unique'), kegg.howManyReactions(pangenome='unique')]] plotMapBars(lPanGenome, 'PanGenome statistics', 'pangenome_stats', svg) plotPanGenome(core, acc, uni, svg) elif kind == 'mutants': refs = [org.org_id for org in organism.getAll() if not organism.isMutant(org.org_id)] # Header header = '\t'.join( ['ID', 'name', 'description', 'kind', 'proteome size', 'mapped to kegg', 'reactions'] ) for ref_id in refs: logger.info('Mutants of %s stats'%ref_id) if doPrint: print header else: logger.info(header) muts = [x for x in organism.getOrgMutants(ref_id)] lOrg = [] for org_id in [ref_id] + muts: org = organism.getOrg(org_id) name = org.name if org.name else 'NONE' description = org.description if org.description else 'NONE' mkind = org.mkind if org.mkind in ['deletion', 'insertion'] else 'wild-type' if mkind not in ['deletion', 'insertion']: prots = genome.howMany(org_id) elif mkind == 'deletion': prots = genome.howMany(ref_id) - genome.howMany(org_id) elif mkind == 'insertion': prots = genome.howMany(ref_id) + genome.howMany(org_id) mapped, react = (kegg.howManyMapped(org_id), kegg.howManyReactions(org_id)) if mkind == 'deletion': mapped = kegg.howManyMapped(ref_id) - mapped react = kegg.howManyReactions(ref_id) - react elif mkind == 'insertion': mapped += kegg.howManyMapped(ref_id) react += kegg.howManyReactions(ref_id) stats = '\t'.join( [str(x) for x in [org_id, name, description, mkind, prots, mapped, react]] ) if doPrint: print stats else: logger.info(stats) lOrg.append([org_id, prots, mapped, react]) plotMapBars(lOrg, 'Wild-type (%s) and mutants statistics'%ref_id, '%s'%ref_id, svg) else: logger.info('No statistics can be computed at this time') return False return True