Esempio n. 1
0
def dGenomeMutAdd(project, mutID, mutfasta):
    '''
    Check and add a mutant
    '''
    if not os.path.exists(mutfasta):
        logger.error('Fasta file %s may not be present'%(mutfasta))
        return False
    
    org = Organism(project)
    if not org.isOrg(mutID):
        logger.warning('Organism %s is not present yet!'%mutID)
        return False
    
    gen = Genome(project)
    gen.addProteome(mutID, mutfasta)
    logger.info('Mutant %s (%s) added, having %d mutated genes'
                %(mutID, org.getOrg(mutID).mkind,gen.howMany(mutID)))
    return True
Esempio n. 2
0
def dGenomeAdd(project, orgID, filename):
    '''
    Add a single genome
    '''
    if not os.path.exists(filename):
        logger.error('Fasta file %s may not be present'%(filename))
        return False
    
    filename = os.path.abspath(filename)
    org = Organism(project)
    if not org.isOrg(orgID):
        logger.warning('Organism %s is not present yet!'%orgID)
        return False
    
    gen = Genome(project)
    gen.addProteome(orgID, filename)
    logger.info('Added genome %s, having %d proteins'%
                (orgID, gen.howMany(orgID)))
    return True
Esempio n. 3
0
def dGenomeStats(project, svg=False, doPrint=True):
    # Which project are we talking about?
    kind = dSetKind(project)
    
    proj = Project(project)
    organism = Organism(project)
    genome = Genome(project)
    kegg = Kegg(project)
    
    if kind == 'single' or kind == 'pangenome':
        logger.info('Single genomes stats')
        # Single genomes stats
        # Header
        header = '\t'.join( ['ID', 'name', 'description', 'proteome size',
                                'mapped to kegg', 'KEGG orthology IDs',
                                'pathways', 'reactions'] )
        if doPrint:
            print header
        else:
            logger.info(header)
        
        lOrg = []
        for org in organism.getAll():
            org_id = org.org_id
            name = org.name if org.name else 'NONE'
            description = org.description if org.description else 'NONE'
            
            prots = genome.howMany(org_id)
            
            mapped, ko, react, path = (kegg.howManyMapped(org_id),
                                        kegg.howManyKO(org_id),
                                        kegg.howManyReactions(org_id),
                                        kegg.howManyPathways(org_id))
            
            stats = '\t'.join( [str(x) for x in [org_id, name, description,
                                                 prots, mapped, ko, path,
                                                 react]] )
            if doPrint:
                print stats
            else:
                logger.info(stats)
                
            lOrg.append([org_id, prots, mapped, react])
            
        plotMapBars(lOrg, 'Single genomes statistics', 'single', svg)
        
        if proj.isPanGenome():
            logger.info('Pangenome stats')
            # Pangenome stats
            # Header
            header = '\t'.join( ['kind', 'size',
                                    'mapped to kegg', 'KEGG orthology IDs',
                                    'pathways', 'reactions'] )
            if doPrint:
                print header
            else:
                logger.info(header)
                
            core, acc, uni = (genome.getLenCore(), genome.getLenAcc(),
                              genome.getLenUni())

            stats = []
            stats.append('\t'.join( [str(x) for x in ['core', core,
                                 kegg.howManyMapped(pangenome='core'),
                                 kegg.howManyKO(pangenome='core'),
                                 kegg.howManyPathways(pangenome='core'),
                                 kegg.howManyReactions(pangenome='core')]]))
            stats.append('\t'.join( [str(x) for x in ['accessory', acc,
                                 kegg.howManyMapped(pangenome='accessory'),
                                 kegg.howManyKO(pangenome='accessory'),
                                 kegg.howManyPathways(pangenome='accessory'),
                                 kegg.howManyReactions(pangenome='accessory')]]))
            stats.append('\t'.join( [str(x) for x in ['unique', uni,
                                 kegg.howManyMapped(pangenome='unique'),
                                 kegg.howManyKO(pangenome='unique'),
                                 kegg.howManyPathways(pangenome='unique'),
                                 kegg.howManyReactions(pangenome='unique')]]))
            
            for stat in stats:
                if doPrint:
                    print stat
                else:
                    logger.info(stat)
            
            lPanGenome = [['Core', core, kegg.howManyMapped(pangenome='core'),
                           kegg.howManyReactions(pangenome='core')],
                          ['Accessory', acc,
                           kegg.howManyMapped(pangenome='accessory'),
                           kegg.howManyReactions(pangenome='accessory')],
                          ['Unique', uni,
                           kegg.howManyMapped(pangenome='unique'),
                           kegg.howManyReactions(pangenome='unique')]]
 
            plotMapBars(lPanGenome, 'PanGenome statistics', 'pangenome_stats',
                        svg)
            plotPanGenome(core, acc, uni, svg)
    
    elif kind == 'mutants':
        refs = [org.org_id
                    for org in organism.getAll()
                    if not organism.isMutant(org.org_id)]
        
        # Header
        header = '\t'.join( ['ID', 'name', 'description', 'kind', 'proteome size',
                                'mapped to kegg', 'reactions'] )
        
        for ref_id in refs:
            logger.info('Mutants of %s stats'%ref_id)
            
            if doPrint:
                print header
            else:
                logger.info(header)
            
            muts = [x for x in organism.getOrgMutants(ref_id)]
            
            lOrg = []
            for org_id in [ref_id] + muts:
                org = organism.getOrg(org_id)
                
                name = org.name if org.name else 'NONE'
                description = org.description if org.description else 'NONE'
                
                mkind = org.mkind if org.mkind in ['deletion', 'insertion'] else 'wild-type'
                
                if mkind not in ['deletion', 'insertion']:
                    prots = genome.howMany(org_id)
                elif mkind == 'deletion':
                    prots = genome.howMany(ref_id) - genome.howMany(org_id)
                elif mkind == 'insertion':
                    prots = genome.howMany(ref_id) + genome.howMany(org_id)
                
                mapped, react = (kegg.howManyMapped(org_id),
                                kegg.howManyReactions(org_id))
        
                if mkind == 'deletion':
                    mapped = kegg.howManyMapped(ref_id) - mapped
                    react = kegg.howManyReactions(ref_id) - react
                elif mkind == 'insertion':
                    mapped += kegg.howManyMapped(ref_id)
                    react += kegg.howManyReactions(ref_id)
                
                stats = '\t'.join( [str(x) for x in [org_id, name, description,
                                                 mkind, prots, mapped,
                                                 react]] )
                if doPrint:
                    print stats
                else:
                    logger.info(stats)
                
                lOrg.append([org_id, prots, mapped, react])
        
            plotMapBars(lOrg, 'Wild-type (%s) and mutants statistics'%ref_id,
                        '%s'%ref_id, svg)
    
    else:
        logger.info('No statistics can be computed at this time')
        return False

    return True