def run(self):
        logger.info('CycleStep4 object running, %s' % self.thisDir)
        lsc.verifyDirExists(self.thisDir)
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step4.start.xml'))
        # lsc.subTypeTimestamp(self.thisDir, 'cycle', 'CycleStep4_start')

        outname = os.path.join(self.thisDir, 'logs', 'gene_deactivation.log')
        if not os.path.exists(outname):
            if not self.options.noGeneDeactivation:
                # by default gene deactivation is turned on.
                cmd = lsc.evolverGeneDeactivationStep(self.thisDir, self.thisParentDir)
                p = subprocess.Popen(cmd, cwd = self.getLocalTempDir(), 
                                     stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
                out = p.communicate()[0]
                f=open(outname + '.tmp', 'w')
                f.write(out)
                f.close()
                os.rename(outname + '.tmp', outname)
            else:
                # this could cause a proliferation of gene creation.
                cmd = [lsc.which('cp')]
                cmd.append(os.path.join(thisDir, 'intra', 'evannots.gff'))
                cmd.append(os.path.join(thisDir, 'annots.gff'))
                cmds = [cmd]
                cmds.append([lsc.which('touch'), outname])
                lsc.runCommands(cmds, self.getLocalTempDir())
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step4.end.xml'))
def main():
    usage = ('usage: %prog --simDir path/to/dir [options]\n\n'
             '%prog takes in a simulation directory and then extracts\n'
             'the sequence of each leaf node in fasta format and stores them\n'
             'in the respective step\'s directory.')
    parser = OptionParser(usage = usage)
    initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)
    
    cycles = glob.glob(os.path.join(options.simDir, '*'))
    cycles = directoriesOnly(cycles)
    leafs = {}
    nt = newickTreeParser(options.inputNewick, 0.0)
    extractLeafs(nt, leafs)
    for d in cycles:
        if not options.allCycles and not os.path.basename(d) in leafs:
            continue

        cmds = []
        outPipes = []
        inPipes = []
        nameA     = os.path.basename(d)
        nameA     = nameA.replace('[','')
        nameA     = nameA.replace(']','')
        cleanName = nameA.replace('\'','')
        
        cmd = [lsc.which('evolver_cvt')]
        cmd.append('-fromrev')
        cmd.append(os.path.join(d,'seq.rev'))
        cmd.append('-tofasta')
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)
        
        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)
        
        cmd = [lsc.which('sed')]
        cmd.append(r"s/^>/>%s./;" % cleanName)
        inPipes.append(os.path.join(d, 'seq.fa'))
        outPipes.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmds.append(cmd)
        
        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.name.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)
        
        lsc.runCommands(cmds, os.curdir, outPipes = outPipes, inPipes = inPipes, mode = 's')
    def run(self):
        logger.info('StatsStep3 object running, %s' % self.thisDir)
        lsc.verifyDirExists(self.thisDir)
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'stats.step3.start.xml'))

        cmds, pipes = lsc.statsStep3Cmds(self.thisDir, self.thisParentDir, self.options)
        lsc.runCommands(cmds, self.getLocalTempDir(), outPipes = pipes)

        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'stats.step3.end.xml'))
        self.setFollowOnTarget(StatsStep4(self.thisDir, self.thisParentDir, self.options))
    def run(self):
        logger.info('StatsStep4 object running, %s' % self.thisDir)
        lsc.verifyDirExists(self.thisDir)
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'stats.step4.start.xml'))
        
        cmds, pipes = lsc.statsStep4Cmds(self.thisDir, self.thisParentDir, self.options)
        lsc.runCommands(cmds, self.getLocalTempDir(), outPipes = pipes)

        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'stats.step4.end.xml'))
        if lsc.isLeaf(self.thisDir):
            lsc.lastOneOutTurnOffTheLightsSimulation(self.options.simDir, self.options)
 def run(self):
     logger.info('MergeTreeFollow object running, name: %s' % (self.name))
     outname = os.path.join(self.options.rootDir, 'burnin.maf')
     if (os.path.exists(os.path.join(self.options.rootDir, 'burnin.tmp.maf')) 
         and not os.path.exists(outname)):
         treelessRootCmd = ['-treelessRoot2=%s' % lsc.burninRootName(self.options)]
         maf1 = os.path.join(self.options.rootDir, self.options.rootName + '.maf')
         maf2 = os.path.join(self.options.rootDir, 'burnin.tmp.maf')
         # drop = os.path.join(self.options.rootDir, 'burnin.dropped.maf')
         cmds = lsc.buildMergeCommand(maf1, maf2, outname, treelessRootCmd, 
                                      self.name, self.options)
         lsc.runCommands(cmds, self.getLocalTempDir())
 def run(self):
     logger.info('Extract object running, thisDir: %s' % (self.thisDir))
     if self.isLeaf:
         ext = '.maf'
     else:
         ext = '.tmp.maf'
     outname = os.path.join(self.thisDir, self.alignName + ext)
     if not os.path.exists(outname):
         cmd = [lsc.which('evolver_cvt')]
         cmd.append('-fromrev')
         cmd.append(os.path.join(self.thisDir, 'aln.rev'))
         cmd.append('-tomaf')
         cmd.append(outname + '.tmp')
         cmds = [cmd]
         cmds.append([lsc.which('mv'), outname + '.tmp', outname])
         lsc.runCommands(cmds, self.getLocalTempDir())
 def run(self):
     logger.info('TransalignStep object running, thisDir: %s thisParentDir: %s' 
                 % (self.thisDir, self.thisParentDir))
     lsc.verifyDirExists(self.thisDir)
     lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'transalign.start.xml'))
     
     cmds, pipes = lsc.transalignStepCmds_1(self.thisDir, self.thisParentDir, self.options)
     lsc.runCommands(cmds, self.getLocalTempDir(), outPipes = pipes)
     
     lsc.runTransalignStepCmds_2(self.thisDir, self.thisParentDir, 
                                  self.getLocalTempDir(), self.options)
     
     lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'transalign.end.xml'))
     lsc.lastOneOutTurnOffTheLightsCycle(self.thisDir)
     if lsc.isLeaf(self.thisDir):
         lsc.lastOneOutTurnOffTheLightsSimulation(self.options.simDir, self.options)
 def run(self):
     logger.info('MergeMafsUp object running, name: %s nodeParent: %s' 
                 % (self.name, self.nodeParent))
     ##############################
     # The 'lookup' aspect of the merge is only performed when we are not at the root
     # This merge merges the results of the 'lookdown' merge, that is to say the maf that contains
     # all descendant sequences including the node, with the node-parent maf, to produce a maf
     # that the parent can use to merge its children.
     if self.name == self.options.rootName:
         return
     outname = os.path.join(self.options.simDir, self.name, self.nodeParent + '.maf')
     if not os.path.exists(outname):
         treelessRootCmd = ['-treelessRoot2=%s' % self.nodeParent]
         maf1 = os.path.join(self.options.simDir, self.name, self.name + '.maf')
         maf2 = os.path.join(self.options.simDir, self.name, self.nodeParent + '.tmp.maf')
         cmds = lsc.buildMergeCommand(maf1, maf2, outname, treelessRootCmd, 
                                      self.name, self.options)
         lsc.runCommands(cmds, self.getLocalTempDir())
 def run(self):
     logger.info('MergeTreeDown object running, name: %s nodeParent: %s' 
                 % (self.name, self.nodeParent))
     treelessRootCmd = []
     for i in xrange(0,2):
         if self.nodeDict[self.name].children[i] in self.leafsDict:
             treelessRootCmd.append('-treelessRoot%d=%s' % (i + 1, self.name))
     ##############################
     # the 'lookdown' aspect of the merge is performed for every node, including the root.
     outname = os.path.join(self.options.simDir, self.name, self.name + '.maf')
     if not os.path.exists(outname):
         maf1 = os.path.join(self.options.simDir, self.nodeDict[self.name].children[0], 
                             self.name + '.maf')
         maf2 = os.path.join(self.options.simDir, self.nodeDict[self.name].children[1], 
                             self.name + '.maf')
         cmds = lsc.buildMergeCommand(maf1, maf2, outname, treelessRootCmd, self.name, 
                                      self.options)
         lsc.runCommands(cmds, self.getLocalTempDir())
     self.setFollowOnTarget(MergeMafsUp(self.nt, self.nodeDict, self.nodeParentDict, 
                                        self.leafsDict, self.nodeParent, self.options))
    def run(self):
        chrNameDict, revChrNameDict = lsc.extractChrNamesDict(self.thisDir)
        logger.info('CycleStep2Chromosome object running, %s %s %s' 
                    % (self.thisDir, self.thisChr, chrNameDict[self.thisChr]))
        lsc.verifyDirExists(self.thisDir)
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step2.%s.start.xml' 
                                         % chrNameDict[self.thisChr]), 
                            extra = {'name': self.thisChr})

        # evolver intra on one chromosome
        cmds = lsc.evolverIntraStepCmd(self.thisDir, self.theChild, self.thisStepLength, 
                                       self.thisChr, self.options.seed, 
                                       self.options.paramsDir, self.getLocalTempDir(), self.options)
        lsc.runCommands(cmds, self.getLocalTempDir())

        # evolver conversion from .rev to fasta in localTempDir
        cmds = lsc.evolverIntraStepToFastaCmd(self.thisDir, self.thisStepLength, self.thisChr, 
                                              self.options.paramsDir, self.getLocalTempDir())
        lsc.runCommands(cmds, self.getLocalTempDir())
            
        # trf wrapper
        lsc.callEvolverIntraStepTRFCmd(self.thisDir, self.thisChr, self.getLocalTempDir())
        
        # move the resulting trf files out of localTempDir
        cmds = lsc.evolverIntraStepMoveTRFCmd(self.thisDir, self.thisChr, self.getLocalTempDir())
        lsc.runCommands(cmds, self.getLocalTempDir(), mode = 'p')
        
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 
                                         'cycle.step2.%s.end.xml' % chrNameDict[self.thisChr]),
                            extra = {'name': self.thisChr})
Esempio n. 11
0
def populateRootDir(options):
    """ The first order of business in a simulation is to create the basic directory structure
    for the root genome and the parameters.
    """
    # mkdir is used here for simplicity in timing the creation of the diretory and 
    # subsequent two cp jobs for parameters.
    lsc.runCommands([['mkdir', '-p', os.path.join(options.outDir, 'parameters')]], options.outDir)
    jobs = []
    jobs.append(['cp', '-r', options.rootInputDir, os.path.join(options.outDir, options.rootName)])
    jobs.append(['cp', os.path.join(options.paramsDir,'model.txt'), 
                 os.path.join(options.outDir, 'parameters')])
    if not options.noMEs:
        jobs.append(['cp', os.path.join(options.paramsDir,'model.mes.txt'),
                     os.path.join(options.outDir, 'parameters')])
        jobs.append(['cp', os.path.join(options.paramsDir,'mes.cfg'),
                     os.path.join(options.outDir, 'parameters')])
    lsc.runCommands(jobs, options.outDir, mode = 'p')
    options.paramsInputDir = options.paramsDir
    options.paramsDir = os.path.abspath(os.path.join(options.outDir, 'parameters'))
    options.parentDir = os.path.abspath(os.path.join(options.outDir, options.rootName))
    options.simDir, tail = os.path.split(options.parentDir)
    options.rootDir = os.path.abspath(os.path.join(options.simDir, options.rootName))
    lsc.createRootXmls(sys.argv, options)
    def run(self):
        logger.info('CycleStep3 object running, %s' % self.thisDir)
        lsc.verifyDirExists(self.thisDir)
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step3.start.xml'))

        # trf
        regex = r'^(chr\S+)\.outseq\.fa.*\.dat'
        pat = re.compile(regex)
        files = glob.glob(os.path.join(self.thisDir, 'intra', '*.dat'))
        cmds = []
        outPipes = []
        followCmds = []
        followPipes = []
        for f in files:
            # each file is the trf output for one chromosome
            m = re.match(regex, os.path.basename(f))
            if m is None:
                raise RuntimeError('Regex "%s" failed on filename %s' % (regex, os.path.basename(f)))
            outname = os.path.join(self.thisDir, 'intra', m.group(1) + '.trfannots.gff')
            if not os.path.exists(outname):
                # convert the .dat to .gff
                cmd = [lsc.which('evolver_trf2gff.py'), f]
                cmds.append(cmd)
                outPipes.append(outname + '.tmp')
                # atomic files
                followCmds.append([lsc.which('mv'), outname + '.tmp', outname])
                followPipes.append(None)
        lsc.runCommands(cmds, self.getLocalTempDir(), outPipes = outPipes, mode = 'p')
        lsc.runCommands(followCmds, self.getLocalTempDir(), outPipes = followPipes, mode = 'p')
        
        catCmd, evoCmd, cvtCmd, followCmds = lsc.evolverIntraMergeCmds(self.thisDir, self.theChild)
        
        lsc.runCommands([catCmd, evoCmd, cvtCmd], self.getLocalTempDir(),
                         outPipes = [os.path.join(self.thisDir, 'intra', 'evannots.gff.tmp'), None, None], 
                         mode = 'p')
        lsc.runCommands(followCmds, self.getLocalTempDir())
        
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step3.end.xml'))
        self.setFollowOnTarget(CycleStep4(self.thisNewickStr, self.thisParentDir,
                                            self.thisStepLength, self.options))
Esempio n. 13
0
def main():
    usage = ('usage: %prog --simDir path/to/dir [options]\n\n'
             '%prog takes in a simulation directory and then extracts\n'
             'the sequence of each leaf node in fasta format and stores them\n'
             'in the respective step\'s directory.')
    parser = OptionParser(usage=usage)
    initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)

    cycles = glob.glob(os.path.join(options.simDir, '*'))
    cycles = directoriesOnly(cycles)
    leafs = {}
    nt = newickTreeParser(options.inputNewick, 0.0)
    extractLeafs(nt, leafs)
    for d in cycles:
        if not options.allCycles and not os.path.basename(d) in leafs:
            continue

        cmds = []
        outPipes = []
        inPipes = []
        nameA = os.path.basename(d)
        nameA = nameA.replace('[', '')
        nameA = nameA.replace(']', '')
        cleanName = nameA.replace('\'', '')

        cmd = [lsc.which('evolver_cvt')]
        cmd.append('-fromrev')
        cmd.append(os.path.join(d, 'seq.rev'))
        cmd.append('-tofasta')
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)

        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)

        cmd = [lsc.which('sed')]
        cmd.append(r"s/^>/>%s./;" % cleanName)
        inPipes.append(os.path.join(d, 'seq.fa'))
        outPipes.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmds.append(cmd)

        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.name.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)

        lsc.runCommands(cmds,
                        os.curdir,
                        outPipes=outPipes,
                        inPipes=inPipes,
                        mode='s')