Esempio n. 1
0
def fetchKegg(project):
    from ductape.kegg.kegg import KeggNet, KeggAPI
    from ductape.terminal import RunThread

    # Check if we have to fetch the whole kegg DB
    fetch = False
    proj = Project(project)
    k = KeggAPI()
    try:
        k.getDBVersion()
        release = k.result[1]
    except Exception as e:
        logger.warning("Could not fetch the KEGG DB version (%s)" % str(e))
        release = None
    if proj.isKegg():
        if release and proj.kegg < release:
            logger.warning("A new KEGG DB version is available (%s, was %s)" % (str(release), str(proj.kegg)))
            fetch = True
    else:
        fetch = True

    if fetch:
        logger.info("Fetching the whole KEGG metabolic map")
        if release:
            logger.info("KEGG DB release %s" % str(release))
        kegg = Kegg(project)
        avoid = [kid for kid in kegg.getAllIDs()]

        knet = KeggNet(avoid=avoid)
        if not RunThread(knet):
            return False

        # Details
        kegg.addPathways(knet.result.path)
        logger.info("Added %d Path IDs" % len(knet.result.path))
        kegg.addReactions(knet.result.react)
        logger.info("Added %d Re IDs" % len(knet.result.react))
        kegg.addCompounds(knet.result.comp)
        logger.info("Added %d Co IDs" % len(knet.result.comp))
        kegg.addRPairs(knet.result.rpair)
        logger.info("Added %d RPair IDs" % len(knet.result.rpair))
        # Links
        kegg.addPathReacts(knet.result.pathreact)
        kegg.addReactComps(knet.result.reactcomp)
        kegg.addCompReacts(knet.result.compreact)
        kegg.addPathComps(knet.result.pathcomp)
        kegg.addReactRPairs(knet.result.reactrpair)
        kegg.addRPairReacts(knet.result.rpairreact)
        logger.info("Added Kegg links")
        # HTML maps
        kegg.addPathHtml(knet.result.pathmaps)
        logger.info("Added Kegg maps")

        # Add the release version
        if release:
            proj.setKegg(release)
    else:
        logger.info("KEGG db is up-to-date")
Esempio n. 2
0
def touchProject(project):
    '''
    Check and update the project file
    '''
    if not isProject(project):
        return False
    else:
        proj = Project(project)
        proj.updateLast()
        logger.debug('%s'%str(proj))
        return True
Esempio n. 3
0
def dInit(project, wdir='.', name='', descr=''):
    '''
    Initializes a project
    '''
    if not os.path.exists(project):
        create = DBBase(project)
        create.create()
        proj = Project(project)
        tmp = os.path.join(wdir, 'tmp')
        proj.addProject(name=name, description=descr, tmp=tmp)
        logger.info('Project successfully created')
        return True
    else:
        projshort = os.path.split(project)[1]
        logger.warning('Project %s is already present in %s'%(projshort,wdir))
        return False
Esempio n. 4
0
def dGetGenomeSteps(project):
    '''
    Get the analysis that these genomes deserve
    '''
    proj = Project(project)
    proj.getProject()
    status = proj.genome
    pangenome = bool(proj.pangenome)
    kind = dSetKind(project)
    if kind == 'mutants':
        if status == 'map2ko':
            return ['map2kegg']
        elif status == 'map2kegg':
            return []
        else:
            return ['map2ko', 'map2kegg']
    elif kind == 'single':
        if status == 'map2ko':
            return ['map2kegg']
        elif status == 'map2kegg':
            return []
        else:
            return ['map2ko', 'map2kegg']
    else:
        steps = []
        if not pangenome:
            steps.append('pangenome')
        if status == 'map2ko':
            steps.append('map2kegg')
        elif status == 'map2kegg':
            pass
        else:
            steps.append('map2ko')
            steps.append('pangenome')
        
        return steps
Esempio n. 5
0
def dSetKind(project):
    '''
    Set the kind of genomic project and return its value
    '''
    proj = Project(project)
    proj.getProject()
    org = Organism(project)
    if org.howManyMutants() > 0:
        logger.info('%d mutants are present'%org.howManyMutants())
        proj.setKind('mutants')
        return 'mutants'
    elif org.howMany() == 1:
        logger.info('Just one organism is present')
        proj.setKind('single')
        return 'single'
    elif org.howMany() == 0:
        logger.info('No organisms are present yet')
        return None
    else:
        logger.info('%d organisms are present'%org.howMany())
        proj.setKind('pangenome')
        return 'pangenome'
Esempio n. 6
0
def dGenomeExport(project):
    # Is there something to be exported?
    organism = Organism(project)
    
    if organism.howMany() == 0:
        logger.info('No genomic data can be exported at this time')
        return False
    else:
        logger.info('Exporting protein data')
        
        genome = Genome(project)
        
        for org in organism.getAll():
            nprots = SeqIO.write([x for x in genome.getRecords(org.org_id)],
                        open('%s.faa'%org.org_id,'w'), 'fasta')
            logger.info('Saved %d proteins from %s (%s)'%(nprots,
                                                          org.org_id,
                                                          '%s.faa'%org.org_id))
            
        logger.info('Exporting Kegg data')
        
        logger.info('Exporting KO map data')
        
        kegg = Kegg(project)
        
        for org in organism.getAll():
            fname = 'ko_%s.tsv'%org.org_id
            fout = open(fname,'w')
            i = 0
            for prot_id, ko_id in kegg.getAllKO(org.org_id):
                fout.write('%s\t%s\n'%(prot_id, ko_id.lstrip('ko:')))
                i += 1
            fout.close()
            
            if i == 0:
                os.remove(fname)
                logger.warning('No KO links available for %s'%org.org_id)
            else:
                logger.info('Saved %d KO links for %s (%s)'%(i, org.org_id,
                                                         fname))
            
        logger.info('Exporting Kegg reactions data')
        
        for org in organism.getAll():
            fname = 'reactions_%s.tsv'%org.org_id
            fout = open(fname,'w')
            i = 0
            for prot_id, re_id in kegg.getAllReactions(org.org_id):
                fout.write('%s\t%s\n'%(prot_id, re_id.lstrip('rn:')))
                i += 1
            fout.close()
            
            if i == 0:
                os.remove(fname)
                logger.warning('No Kegg reactions available for %s'%org.org_id)
            else:
                logger.info('Saved %d Kegg reactions links for %s (%s)'%
                        (i, org.org_id, fname))
            
        proj = Project(project)
        
        if proj.isPanGenome():
            logger.info('Exporting pangenome data')
            
            dG = genome.getPanGenome()
            if len(dG) == 0:
                logger.warning('No pangenome available')
            else:
                fname = 'pangenome.tsv'
                fout = open(fname,'w')
                for group, prots in dG.iteritems():
                    for prot in prots:
                        fout.write('%s\t%s\n'%(group,prot))
                fout.close()
                
                logger.info('Exported %d orthologs (%s)'%(len(dG),fname))
                
                fname = 'pangenome_category.tsv'
                fout = open(fname,'w')
                dG = genome.getPanGenomeOrgs()
                for group in genome.getCore():
                    fout.write('%s\t%s\t%s\n'%(group.group_id,
                                               'core',
                                               '-'.join(dG[group.group_id])))
                for group in genome.getAcc():
                    fout.write('%s\t%s\t%s\n'%(group.group_id,
                                               'accessory',
                                               '-'.join(dG[group.group_id])))
                for group in genome.getUni():
                    fout.write('%s\t%s\t%s\n'%(group.group_id,
                                               'unique',
                                               '-'.join(dG[group.group_id])))
                fout.close()
                
                logger.info('Exported orthologs informations (%s)'%fname)
    
    return True
Esempio n. 7
0
def dGenomeStats(project, svg=False, doPrint=True):
    # Which project are we talking about?
    kind = dSetKind(project)
    
    proj = Project(project)
    organism = Organism(project)
    genome = Genome(project)
    kegg = Kegg(project)
    
    if kind == 'single' or kind == 'pangenome':
        logger.info('Single genomes stats')
        # Single genomes stats
        # Header
        header = '\t'.join( ['ID', 'name', 'description', 'proteome size',
                                'mapped to kegg', 'KEGG orthology IDs',
                                'pathways', 'reactions'] )
        if doPrint:
            print header
        else:
            logger.info(header)
        
        lOrg = []
        for org in organism.getAll():
            org_id = org.org_id
            name = org.name if org.name else 'NONE'
            description = org.description if org.description else 'NONE'
            
            prots = genome.howMany(org_id)
            
            mapped, ko, react, path = (kegg.howManyMapped(org_id),
                                        kegg.howManyKO(org_id),
                                        kegg.howManyReactions(org_id),
                                        kegg.howManyPathways(org_id))
            
            stats = '\t'.join( [str(x) for x in [org_id, name, description,
                                                 prots, mapped, ko, path,
                                                 react]] )
            if doPrint:
                print stats
            else:
                logger.info(stats)
                
            lOrg.append([org_id, prots, mapped, react])
            
        plotMapBars(lOrg, 'Single genomes statistics', 'single', svg)
        
        if proj.isPanGenome():
            logger.info('Pangenome stats')
            # Pangenome stats
            # Header
            header = '\t'.join( ['kind', 'size',
                                    'mapped to kegg', 'KEGG orthology IDs',
                                    'pathways', 'reactions'] )
            if doPrint:
                print header
            else:
                logger.info(header)
                
            core, acc, uni = (genome.getLenCore(), genome.getLenAcc(),
                              genome.getLenUni())

            stats = []
            stats.append('\t'.join( [str(x) for x in ['core', core,
                                 kegg.howManyMapped(pangenome='core'),
                                 kegg.howManyKO(pangenome='core'),
                                 kegg.howManyPathways(pangenome='core'),
                                 kegg.howManyReactions(pangenome='core')]]))
            stats.append('\t'.join( [str(x) for x in ['accessory', acc,
                                 kegg.howManyMapped(pangenome='accessory'),
                                 kegg.howManyKO(pangenome='accessory'),
                                 kegg.howManyPathways(pangenome='accessory'),
                                 kegg.howManyReactions(pangenome='accessory')]]))
            stats.append('\t'.join( [str(x) for x in ['unique', uni,
                                 kegg.howManyMapped(pangenome='unique'),
                                 kegg.howManyKO(pangenome='unique'),
                                 kegg.howManyPathways(pangenome='unique'),
                                 kegg.howManyReactions(pangenome='unique')]]))
            
            for stat in stats:
                if doPrint:
                    print stat
                else:
                    logger.info(stat)
            
            lPanGenome = [['Core', core, kegg.howManyMapped(pangenome='core'),
                           kegg.howManyReactions(pangenome='core')],
                          ['Accessory', acc,
                           kegg.howManyMapped(pangenome='accessory'),
                           kegg.howManyReactions(pangenome='accessory')],
                          ['Unique', uni,
                           kegg.howManyMapped(pangenome='unique'),
                           kegg.howManyReactions(pangenome='unique')]]
 
            plotMapBars(lPanGenome, 'PanGenome statistics', 'pangenome_stats',
                        svg)
            plotPanGenome(core, acc, uni, svg)
    
    elif kind == 'mutants':
        refs = [org.org_id
                    for org in organism.getAll()
                    if not organism.isMutant(org.org_id)]
        
        # Header
        header = '\t'.join( ['ID', 'name', 'description', 'kind', 'proteome size',
                                'mapped to kegg', 'reactions'] )
        
        for ref_id in refs:
            logger.info('Mutants of %s stats'%ref_id)
            
            if doPrint:
                print header
            else:
                logger.info(header)
            
            muts = [x for x in organism.getOrgMutants(ref_id)]
            
            lOrg = []
            for org_id in [ref_id] + muts:
                org = organism.getOrg(org_id)
                
                name = org.name if org.name else 'NONE'
                description = org.description if org.description else 'NONE'
                
                mkind = org.mkind if org.mkind in ['deletion', 'insertion'] else 'wild-type'
                
                if mkind not in ['deletion', 'insertion']:
                    prots = genome.howMany(org_id)
                elif mkind == 'deletion':
                    prots = genome.howMany(ref_id) - genome.howMany(org_id)
                elif mkind == 'insertion':
                    prots = genome.howMany(ref_id) + genome.howMany(org_id)
                
                mapped, react = (kegg.howManyMapped(org_id),
                                kegg.howManyReactions(org_id))
        
                if mkind == 'deletion':
                    mapped = kegg.howManyMapped(ref_id) - mapped
                    react = kegg.howManyReactions(ref_id) - react
                elif mkind == 'insertion':
                    mapped += kegg.howManyMapped(ref_id)
                    react += kegg.howManyReactions(ref_id)
                
                stats = '\t'.join( [str(x) for x in [org_id, name, description,
                                                 mkind, prots, mapped,
                                                 react]] )
                if doPrint:
                    print stats
                else:
                    logger.info(stats)
                
                lOrg.append([org_id, prots, mapped, react])
        
            plotMapBars(lOrg, 'Wild-type (%s) and mutants statistics'%ref_id,
                        '%s'%ref_id, svg)
    
    else:
        logger.info('No statistics can be computed at this time')
        return False

    return True
Esempio n. 8
0
def fetchKegg(project, keeptrying=False):
    from ductape.kegg.kegg import KeggNet, KeggAPI, BaseKegg
    from ductape.terminal import RunThread
    
    # Check if we have to fetch the whole kegg DB
    fetch = False
    proj = Project(project)
    
    logger.info('Checking connectivity')
    bk = BaseKegg()
    try:
        bk.checkConnection()
    except Exception as e:
        logger.error(str(e))
        return False
    
    k = KeggAPI()
    try:
        k.getDBVersion()
        release = k.result[1]
    except Exception as e:
        logger.warning('Could not fetch the KEGG DB version (%s)'%str(e))
        release = None
    if proj.isKegg():
        if release and proj.kegg < release:
            logger.warning('A new KEGG DB version is available (%s, was %s)'%
                           (str(release), str(proj.kegg)))
            fetch = True
    else:
        fetch = True
  
    if fetch:
        logger.info('Fetching the whole KEGG metabolic map')
        if release:
            logger.info('KEGG DB release %s'%str(release))
        kegg = Kegg(project)
        
        knet = KeggNet(keeptrying=keeptrying)
        if not RunThread(knet):
            return False
        
        # Details
        kegg.addPathways(knet.result.path)
        logger.info('Added %d Path IDs'%len(knet.result.path))
        kegg.addReactions(knet.result.react)
        logger.info('Added %d Re IDs'%len(knet.result.react))
        kegg.addCompounds(knet.result.comp)
        logger.info('Added %d Co IDs'%len(knet.result.comp))
        kegg.addRPairs(knet.result.rpair)
        logger.info('Added %d RPair IDs'%len(knet.result.rpair))
        # Links
        kegg.addPathReacts(knet.result.pathreact)
        kegg.addReactComps(knet.result.reactcomp)
        kegg.addCompReacts(knet.result.compreact)
        kegg.addPathComps(knet.result.pathcomp)
        kegg.addReactRPairs(knet.result.reactrpair)
        kegg.addRPairReacts(knet.result.rpairreact)
        logger.info('Added Kegg links')
        # HTML maps
        kegg.addPathHtml(knet.result.pathmaps)
        logger.info('Added Kegg maps')
        
        # Add the release version
        if release:
            proj.setKegg(release)
    else:
        logger.info('KEGG db is up-to-date')
        
    return True
Esempio n. 9
0
def fetchKegg(project, keeptrying=False):
    from ductape.kegg.kegg import KeggNet, KeggAPI, BaseKegg
    from ductape.terminal import RunThread

    # Check if we have to fetch the whole kegg DB
    fetch = False
    proj = Project(project)

    logger.info('Checking connectivity')
    bk = BaseKegg()
    try:
        bk.checkConnection()
    except Exception as e:
        logger.error(str(e))
        return False

    k = KeggAPI()
    try:
        k.getDBVersion()
        release = k.result[1]
    except Exception as e:
        logger.warning('Could not fetch the KEGG DB version (%s)' % str(e))
        release = None
    if proj.isKegg():
        if release and proj.kegg < release:
            logger.warning('A new KEGG DB version is available (%s, was %s)' %
                           (str(release), str(proj.kegg)))
            fetch = True
    else:
        fetch = True

    if fetch:
        logger.info('Fetching the whole KEGG metabolic map')
        if release:
            logger.info('KEGG DB release %s' % str(release))
        kegg = Kegg(project)

        knet = KeggNet(keeptrying=keeptrying)
        if not RunThread(knet):
            return False

        # Details
        kegg.addPathways(knet.result.path)
        logger.info('Added %d Path IDs' % len(knet.result.path))
        kegg.addReactions(knet.result.react)
        logger.info('Added %d Re IDs' % len(knet.result.react))
        kegg.addCompounds(knet.result.comp)
        logger.info('Added %d Co IDs' % len(knet.result.comp))
        kegg.addRPairs(knet.result.rpair)
        logger.info('Added %d RPair IDs' % len(knet.result.rpair))
        # Links
        kegg.addPathReacts(knet.result.pathreact)
        kegg.addReactComps(knet.result.reactcomp)
        kegg.addCompReacts(knet.result.compreact)
        kegg.addPathComps(knet.result.pathcomp)
        kegg.addReactRPairs(knet.result.reactrpair)
        kegg.addRPairReacts(knet.result.rpairreact)
        logger.info('Added Kegg links')
        # HTML maps
        kegg.addPathHtml(knet.result.pathmaps)
        logger.info('Added Kegg maps')

        # Add the release version
        if release:
            proj.setKegg(release)
    else:
        logger.info('KEGG db is up-to-date')

    return True