Esempio n. 1
0
def dGenomeExport(project):
    # Is there something to be exported?
    organism = Organism(project)
    
    if organism.howMany() == 0:
        logger.info('No genomic data can be exported at this time')
        return False
    else:
        logger.info('Exporting protein data')
        
        genome = Genome(project)
        
        for org in organism.getAll():
            nprots = SeqIO.write([x for x in genome.getRecords(org.org_id)],
                        open('%s.faa'%org.org_id,'w'), 'fasta')
            logger.info('Saved %d proteins from %s (%s)'%(nprots,
                                                          org.org_id,
                                                          '%s.faa'%org.org_id))
            
        logger.info('Exporting Kegg data')
        
        logger.info('Exporting KO map data')
        
        kegg = Kegg(project)
        
        for org in organism.getAll():
            fname = 'ko_%s.tsv'%org.org_id
            fout = open(fname,'w')
            i = 0
            for prot_id, ko_id in kegg.getAllKO(org.org_id):
                fout.write('%s\t%s\n'%(prot_id, ko_id.lstrip('ko:')))
                i += 1
            fout.close()
            
            if i == 0:
                os.remove(fname)
                logger.warning('No KO links available for %s'%org.org_id)
            else:
                logger.info('Saved %d KO links for %s (%s)'%(i, org.org_id,
                                                         fname))
            
        logger.info('Exporting Kegg reactions data')
        
        for org in organism.getAll():
            fname = 'reactions_%s.tsv'%org.org_id
            fout = open(fname,'w')
            i = 0
            for prot_id, re_id in kegg.getAllReactions(org.org_id):
                fout.write('%s\t%s\n'%(prot_id, re_id.lstrip('rn:')))
                i += 1
            fout.close()
            
            if i == 0:
                os.remove(fname)
                logger.warning('No Kegg reactions available for %s'%org.org_id)
            else:
                logger.info('Saved %d Kegg reactions links for %s (%s)'%
                        (i, org.org_id, fname))
            
        proj = Project(project)
        
        if proj.isPanGenome():
            logger.info('Exporting pangenome data')
            
            dG = genome.getPanGenome()
            if len(dG) == 0:
                logger.warning('No pangenome available')
            else:
                fname = 'pangenome.tsv'
                fout = open(fname,'w')
                for group, prots in dG.iteritems():
                    for prot in prots:
                        fout.write('%s\t%s\n'%(group,prot))
                fout.close()
                
                logger.info('Exported %d orthologs (%s)'%(len(dG),fname))
                
                fname = 'pangenome_category.tsv'
                fout = open(fname,'w')
                dG = genome.getPanGenomeOrgs()
                for group in genome.getCore():
                    fout.write('%s\t%s\t%s\n'%(group.group_id,
                                               'core',
                                               '-'.join(dG[group.group_id])))
                for group in genome.getAcc():
                    fout.write('%s\t%s\t%s\n'%(group.group_id,
                                               'accessory',
                                               '-'.join(dG[group.group_id])))
                for group in genome.getUni():
                    fout.write('%s\t%s\t%s\n'%(group.group_id,
                                               'unique',
                                               '-'.join(dG[group.group_id])))
                fout.close()
                
                logger.info('Exported orthologs informations (%s)'%fname)
    
    return True
Esempio n. 2
0
def dGenomeStats(project, svg=False, doPrint=True):
    # Which project are we talking about?
    kind = dSetKind(project)
    
    proj = Project(project)
    organism = Organism(project)
    genome = Genome(project)
    kegg = Kegg(project)
    
    if kind == 'single' or kind == 'pangenome':
        logger.info('Single genomes stats')
        # Single genomes stats
        # Header
        header = '\t'.join( ['ID', 'name', 'description', 'proteome size',
                                'mapped to kegg', 'KEGG orthology IDs',
                                'pathways', 'reactions'] )
        if doPrint:
            print header
        else:
            logger.info(header)
        
        lOrg = []
        for org in organism.getAll():
            org_id = org.org_id
            name = org.name if org.name else 'NONE'
            description = org.description if org.description else 'NONE'
            
            prots = genome.howMany(org_id)
            
            mapped, ko, react, path = (kegg.howManyMapped(org_id),
                                        kegg.howManyKO(org_id),
                                        kegg.howManyReactions(org_id),
                                        kegg.howManyPathways(org_id))
            
            stats = '\t'.join( [str(x) for x in [org_id, name, description,
                                                 prots, mapped, ko, path,
                                                 react]] )
            if doPrint:
                print stats
            else:
                logger.info(stats)
                
            lOrg.append([org_id, prots, mapped, react])
            
        plotMapBars(lOrg, 'Single genomes statistics', 'single', svg)
        
        if proj.isPanGenome():
            logger.info('Pangenome stats')
            # Pangenome stats
            # Header
            header = '\t'.join( ['kind', 'size',
                                    'mapped to kegg', 'KEGG orthology IDs',
                                    'pathways', 'reactions'] )
            if doPrint:
                print header
            else:
                logger.info(header)
                
            core, acc, uni = (genome.getLenCore(), genome.getLenAcc(),
                              genome.getLenUni())

            stats = []
            stats.append('\t'.join( [str(x) for x in ['core', core,
                                 kegg.howManyMapped(pangenome='core'),
                                 kegg.howManyKO(pangenome='core'),
                                 kegg.howManyPathways(pangenome='core'),
                                 kegg.howManyReactions(pangenome='core')]]))
            stats.append('\t'.join( [str(x) for x in ['accessory', acc,
                                 kegg.howManyMapped(pangenome='accessory'),
                                 kegg.howManyKO(pangenome='accessory'),
                                 kegg.howManyPathways(pangenome='accessory'),
                                 kegg.howManyReactions(pangenome='accessory')]]))
            stats.append('\t'.join( [str(x) for x in ['unique', uni,
                                 kegg.howManyMapped(pangenome='unique'),
                                 kegg.howManyKO(pangenome='unique'),
                                 kegg.howManyPathways(pangenome='unique'),
                                 kegg.howManyReactions(pangenome='unique')]]))
            
            for stat in stats:
                if doPrint:
                    print stat
                else:
                    logger.info(stat)
            
            lPanGenome = [['Core', core, kegg.howManyMapped(pangenome='core'),
                           kegg.howManyReactions(pangenome='core')],
                          ['Accessory', acc,
                           kegg.howManyMapped(pangenome='accessory'),
                           kegg.howManyReactions(pangenome='accessory')],
                          ['Unique', uni,
                           kegg.howManyMapped(pangenome='unique'),
                           kegg.howManyReactions(pangenome='unique')]]
 
            plotMapBars(lPanGenome, 'PanGenome statistics', 'pangenome_stats',
                        svg)
            plotPanGenome(core, acc, uni, svg)
    
    elif kind == 'mutants':
        refs = [org.org_id
                    for org in organism.getAll()
                    if not organism.isMutant(org.org_id)]
        
        # Header
        header = '\t'.join( ['ID', 'name', 'description', 'kind', 'proteome size',
                                'mapped to kegg', 'reactions'] )
        
        for ref_id in refs:
            logger.info('Mutants of %s stats'%ref_id)
            
            if doPrint:
                print header
            else:
                logger.info(header)
            
            muts = [x for x in organism.getOrgMutants(ref_id)]
            
            lOrg = []
            for org_id in [ref_id] + muts:
                org = organism.getOrg(org_id)
                
                name = org.name if org.name else 'NONE'
                description = org.description if org.description else 'NONE'
                
                mkind = org.mkind if org.mkind in ['deletion', 'insertion'] else 'wild-type'
                
                if mkind not in ['deletion', 'insertion']:
                    prots = genome.howMany(org_id)
                elif mkind == 'deletion':
                    prots = genome.howMany(ref_id) - genome.howMany(org_id)
                elif mkind == 'insertion':
                    prots = genome.howMany(ref_id) + genome.howMany(org_id)
                
                mapped, react = (kegg.howManyMapped(org_id),
                                kegg.howManyReactions(org_id))
        
                if mkind == 'deletion':
                    mapped = kegg.howManyMapped(ref_id) - mapped
                    react = kegg.howManyReactions(ref_id) - react
                elif mkind == 'insertion':
                    mapped += kegg.howManyMapped(ref_id)
                    react += kegg.howManyReactions(ref_id)
                
                stats = '\t'.join( [str(x) for x in [org_id, name, description,
                                                 mkind, prots, mapped,
                                                 react]] )
                if doPrint:
                    print stats
                else:
                    logger.info(stats)
                
                lOrg.append([org_id, prots, mapped, react])
        
            plotMapBars(lOrg, 'Wild-type (%s) and mutants statistics'%ref_id,
                        '%s'%ref_id, svg)
    
    else:
        logger.info('No statistics can be computed at this time')
        return False

    return True