def dPanGenomeAdd(project, orthfile): ''' Add an external pangenome ''' if not os.path.exists(orthfile): logger.error('Pangenome file %s may not be present'%(orthfile)) return False else: orth = {} for l in open(orthfile): s = l.strip().split('\t') if s[0] not in orth: orth[s[0]] = [] orth[s[0]].append(s[1]) gen = Genome(project) gen.addPanGenome(orth) logger.info('PanGenome size: %d groups'%len(gen.getPanGenome())) logger.info('Core size: %d groups'%gen.getLenCore()) logger.info('Accessory size: %d groups'%gen.getLenAcc()) logger.info('Unique size: %d groups'%gen.getLenUni()) return True
def dGenomeStats(project, svg=False, doPrint=True): # Which project are we talking about? kind = dSetKind(project) proj = Project(project) organism = Organism(project) genome = Genome(project) kegg = Kegg(project) if kind == 'single' or kind == 'pangenome': logger.info('Single genomes stats') # Single genomes stats # Header header = '\t'.join( ['ID', 'name', 'description', 'proteome size', 'mapped to kegg', 'KEGG orthology IDs', 'pathways', 'reactions'] ) if doPrint: print header else: logger.info(header) lOrg = [] for org in organism.getAll(): org_id = org.org_id name = org.name if org.name else 'NONE' description = org.description if org.description else 'NONE' prots = genome.howMany(org_id) mapped, ko, react, path = (kegg.howManyMapped(org_id), kegg.howManyKO(org_id), kegg.howManyReactions(org_id), kegg.howManyPathways(org_id)) stats = '\t'.join( [str(x) for x in [org_id, name, description, prots, mapped, ko, path, react]] ) if doPrint: print stats else: logger.info(stats) lOrg.append([org_id, prots, mapped, react]) plotMapBars(lOrg, 'Single genomes statistics', 'single', svg) if proj.isPanGenome(): logger.info('Pangenome stats') # Pangenome stats # Header header = '\t'.join( ['kind', 'size', 'mapped to kegg', 'KEGG orthology IDs', 'pathways', 'reactions'] ) if doPrint: print header else: logger.info(header) core, acc, uni = (genome.getLenCore(), genome.getLenAcc(), genome.getLenUni()) stats = [] stats.append('\t'.join( [str(x) for x in ['core', core, kegg.howManyMapped(pangenome='core'), kegg.howManyKO(pangenome='core'), kegg.howManyPathways(pangenome='core'), kegg.howManyReactions(pangenome='core')]])) stats.append('\t'.join( [str(x) for x in ['accessory', acc, kegg.howManyMapped(pangenome='accessory'), kegg.howManyKO(pangenome='accessory'), kegg.howManyPathways(pangenome='accessory'), kegg.howManyReactions(pangenome='accessory')]])) stats.append('\t'.join( [str(x) for x in ['unique', uni, kegg.howManyMapped(pangenome='unique'), kegg.howManyKO(pangenome='unique'), kegg.howManyPathways(pangenome='unique'), kegg.howManyReactions(pangenome='unique')]])) for stat in stats: if doPrint: print stat else: logger.info(stat) lPanGenome = [['Core', core, kegg.howManyMapped(pangenome='core'), kegg.howManyReactions(pangenome='core')], ['Accessory', acc, kegg.howManyMapped(pangenome='accessory'), kegg.howManyReactions(pangenome='accessory')], ['Unique', uni, kegg.howManyMapped(pangenome='unique'), kegg.howManyReactions(pangenome='unique')]] plotMapBars(lPanGenome, 'PanGenome statistics', 'pangenome_stats', svg) plotPanGenome(core, acc, uni, svg) elif kind == 'mutants': refs = [org.org_id for org in organism.getAll() if not organism.isMutant(org.org_id)] # Header header = '\t'.join( ['ID', 'name', 'description', 'kind', 'proteome size', 'mapped to kegg', 'reactions'] ) for ref_id in refs: logger.info('Mutants of %s stats'%ref_id) if doPrint: print header else: logger.info(header) muts = [x for x in organism.getOrgMutants(ref_id)] lOrg = [] for org_id in [ref_id] + muts: org = organism.getOrg(org_id) name = org.name if org.name else 'NONE' description = org.description if org.description else 'NONE' mkind = org.mkind if org.mkind in ['deletion', 'insertion'] else 'wild-type' if mkind not in ['deletion', 'insertion']: prots = genome.howMany(org_id) elif mkind == 'deletion': prots = genome.howMany(ref_id) - genome.howMany(org_id) elif mkind == 'insertion': prots = genome.howMany(ref_id) + genome.howMany(org_id) mapped, react = (kegg.howManyMapped(org_id), kegg.howManyReactions(org_id)) if mkind == 'deletion': mapped = kegg.howManyMapped(ref_id) - mapped react = kegg.howManyReactions(ref_id) - react elif mkind == 'insertion': mapped += kegg.howManyMapped(ref_id) react += kegg.howManyReactions(ref_id) stats = '\t'.join( [str(x) for x in [org_id, name, description, mkind, prots, mapped, react]] ) if doPrint: print stats else: logger.info(stats) lOrg.append([org_id, prots, mapped, react]) plotMapBars(lOrg, 'Wild-type (%s) and mutants statistics'%ref_id, '%s'%ref_id, svg) else: logger.info('No statistics can be computed at this time') return False return True