def dGenomeMutAdd(project, mutID, mutfasta): ''' Check and add a mutant ''' if not os.path.exists(mutfasta): logger.error('Fasta file %s may not be present'%(mutfasta)) return False org = Organism(project) if not org.isOrg(mutID): logger.warning('Organism %s is not present yet!'%mutID) return False gen = Genome(project) gen.addProteome(mutID, mutfasta) logger.info('Mutant %s (%s) added, having %d mutated genes' %(mutID, org.getOrg(mutID).mkind,gen.howMany(mutID))) return True
def dGenomeAdd(project, orgID, filename): ''' Add a single genome ''' if not os.path.exists(filename): logger.error('Fasta file %s may not be present'%(filename)) return False filename = os.path.abspath(filename) org = Organism(project) if not org.isOrg(orgID): logger.warning('Organism %s is not present yet!'%orgID) return False gen = Genome(project) gen.addProteome(orgID, filename) logger.info('Added genome %s, having %d proteins'% (orgID, gen.howMany(orgID))) return True
def dGenomeStats(project, svg=False, doPrint=True): # Which project are we talking about? kind = dSetKind(project) proj = Project(project) organism = Organism(project) genome = Genome(project) kegg = Kegg(project) if kind == 'single' or kind == 'pangenome': logger.info('Single genomes stats') # Single genomes stats # Header header = '\t'.join( ['ID', 'name', 'description', 'proteome size', 'mapped to kegg', 'KEGG orthology IDs', 'pathways', 'reactions'] ) if doPrint: print header else: logger.info(header) lOrg = [] for org in organism.getAll(): org_id = org.org_id name = org.name if org.name else 'NONE' description = org.description if org.description else 'NONE' prots = genome.howMany(org_id) mapped, ko, react, path = (kegg.howManyMapped(org_id), kegg.howManyKO(org_id), kegg.howManyReactions(org_id), kegg.howManyPathways(org_id)) stats = '\t'.join( [str(x) for x in [org_id, name, description, prots, mapped, ko, path, react]] ) if doPrint: print stats else: logger.info(stats) lOrg.append([org_id, prots, mapped, react]) plotMapBars(lOrg, 'Single genomes statistics', 'single', svg) if proj.isPanGenome(): logger.info('Pangenome stats') # Pangenome stats # Header header = '\t'.join( ['kind', 'size', 'mapped to kegg', 'KEGG orthology IDs', 'pathways', 'reactions'] ) if doPrint: print header else: logger.info(header) core, acc, uni = (genome.getLenCore(), genome.getLenAcc(), genome.getLenUni()) stats = [] stats.append('\t'.join( [str(x) for x in ['core', core, kegg.howManyMapped(pangenome='core'), kegg.howManyKO(pangenome='core'), kegg.howManyPathways(pangenome='core'), kegg.howManyReactions(pangenome='core')]])) stats.append('\t'.join( [str(x) for x in ['accessory', acc, kegg.howManyMapped(pangenome='accessory'), kegg.howManyKO(pangenome='accessory'), kegg.howManyPathways(pangenome='accessory'), kegg.howManyReactions(pangenome='accessory')]])) stats.append('\t'.join( [str(x) for x in ['unique', uni, kegg.howManyMapped(pangenome='unique'), kegg.howManyKO(pangenome='unique'), kegg.howManyPathways(pangenome='unique'), kegg.howManyReactions(pangenome='unique')]])) for stat in stats: if doPrint: print stat else: logger.info(stat) lPanGenome = [['Core', core, kegg.howManyMapped(pangenome='core'), kegg.howManyReactions(pangenome='core')], ['Accessory', acc, kegg.howManyMapped(pangenome='accessory'), kegg.howManyReactions(pangenome='accessory')], ['Unique', uni, kegg.howManyMapped(pangenome='unique'), kegg.howManyReactions(pangenome='unique')]] plotMapBars(lPanGenome, 'PanGenome statistics', 'pangenome_stats', svg) plotPanGenome(core, acc, uni, svg) elif kind == 'mutants': refs = [org.org_id for org in organism.getAll() if not organism.isMutant(org.org_id)] # Header header = '\t'.join( ['ID', 'name', 'description', 'kind', 'proteome size', 'mapped to kegg', 'reactions'] ) for ref_id in refs: logger.info('Mutants of %s stats'%ref_id) if doPrint: print header else: logger.info(header) muts = [x for x in organism.getOrgMutants(ref_id)] lOrg = [] for org_id in [ref_id] + muts: org = organism.getOrg(org_id) name = org.name if org.name else 'NONE' description = org.description if org.description else 'NONE' mkind = org.mkind if org.mkind in ['deletion', 'insertion'] else 'wild-type' if mkind not in ['deletion', 'insertion']: prots = genome.howMany(org_id) elif mkind == 'deletion': prots = genome.howMany(ref_id) - genome.howMany(org_id) elif mkind == 'insertion': prots = genome.howMany(ref_id) + genome.howMany(org_id) mapped, react = (kegg.howManyMapped(org_id), kegg.howManyReactions(org_id)) if mkind == 'deletion': mapped = kegg.howManyMapped(ref_id) - mapped react = kegg.howManyReactions(ref_id) - react elif mkind == 'insertion': mapped += kegg.howManyMapped(ref_id) react += kegg.howManyReactions(ref_id) stats = '\t'.join( [str(x) for x in [org_id, name, description, mkind, prots, mapped, react]] ) if doPrint: print stats else: logger.info(stats) lOrg.append([org_id, prots, mapped, react]) plotMapBars(lOrg, 'Wild-type (%s) and mutants statistics'%ref_id, '%s'%ref_id, svg) else: logger.info('No statistics can be computed at this time') return False return True