def run(self): logger.info('CycleStep4 object running, %s' % self.thisDir) lsc.verifyDirExists(self.thisDir) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step4.start.xml')) # lsc.subTypeTimestamp(self.thisDir, 'cycle', 'CycleStep4_start') outname = os.path.join(self.thisDir, 'logs', 'gene_deactivation.log') if not os.path.exists(outname): if not self.options.noGeneDeactivation: # by default gene deactivation is turned on. cmd = lsc.evolverGeneDeactivationStep(self.thisDir, self.thisParentDir) p = subprocess.Popen(cmd, cwd = self.getLocalTempDir(), stdout = subprocess.PIPE, stderr = subprocess.STDOUT) out = p.communicate()[0] f=open(outname + '.tmp', 'w') f.write(out) f.close() os.rename(outname + '.tmp', outname) else: # this could cause a proliferation of gene creation. cmd = [lsc.which('cp')] cmd.append(os.path.join(thisDir, 'intra', 'evannots.gff')) cmd.append(os.path.join(thisDir, 'annots.gff')) cmds = [cmd] cmds.append([lsc.which('touch'), outname]) lsc.runCommands(cmds, self.getLocalTempDir()) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step4.end.xml'))
def main(): usage = ('usage: %prog --simDir path/to/dir [options]\n\n' '%prog takes in a simulation directory and then extracts\n' 'the sequence of each leaf node in fasta format and stores them\n' 'in the respective step\'s directory.') parser = OptionParser(usage = usage) initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) cycles = glob.glob(os.path.join(options.simDir, '*')) cycles = directoriesOnly(cycles) leafs = {} nt = newickTreeParser(options.inputNewick, 0.0) extractLeafs(nt, leafs) for d in cycles: if not options.allCycles and not os.path.basename(d) in leafs: continue cmds = [] outPipes = [] inPipes = [] nameA = os.path.basename(d) nameA = nameA.replace('[','') nameA = nameA.replace(']','') cleanName = nameA.replace('\'','') cmd = [lsc.which('evolver_cvt')] cmd.append('-fromrev') cmd.append(os.path.join(d,'seq.rev')) cmd.append('-tofasta') cmd.append(os.path.join(d, 'seq.fa.tmp')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.fa.tmp')) cmd.append(os.path.join(d, 'seq.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('sed')] cmd.append(r"s/^>/>%s./;" % cleanName) inPipes.append(os.path.join(d, 'seq.fa')) outPipes.append(os.path.join(d, 'seq.name.fa.tmp')) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.name.fa.tmp')) cmd.append(os.path.join(d, 'seq.name.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) lsc.runCommands(cmds, os.curdir, outPipes = outPipes, inPipes = inPipes, mode = 's')
def run(self): logger.info('StatsStep3 object running, %s' % self.thisDir) lsc.verifyDirExists(self.thisDir) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'stats.step3.start.xml')) cmds, pipes = lsc.statsStep3Cmds(self.thisDir, self.thisParentDir, self.options) lsc.runCommands(cmds, self.getLocalTempDir(), outPipes = pipes) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'stats.step3.end.xml')) self.setFollowOnTarget(StatsStep4(self.thisDir, self.thisParentDir, self.options))
def run(self): logger.info('StatsStep4 object running, %s' % self.thisDir) lsc.verifyDirExists(self.thisDir) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'stats.step4.start.xml')) cmds, pipes = lsc.statsStep4Cmds(self.thisDir, self.thisParentDir, self.options) lsc.runCommands(cmds, self.getLocalTempDir(), outPipes = pipes) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'stats.step4.end.xml')) if lsc.isLeaf(self.thisDir): lsc.lastOneOutTurnOffTheLightsSimulation(self.options.simDir, self.options)
def run(self): logger.info('MergeTreeFollow object running, name: %s' % (self.name)) outname = os.path.join(self.options.rootDir, 'burnin.maf') if (os.path.exists(os.path.join(self.options.rootDir, 'burnin.tmp.maf')) and not os.path.exists(outname)): treelessRootCmd = ['-treelessRoot2=%s' % lsc.burninRootName(self.options)] maf1 = os.path.join(self.options.rootDir, self.options.rootName + '.maf') maf2 = os.path.join(self.options.rootDir, 'burnin.tmp.maf') # drop = os.path.join(self.options.rootDir, 'burnin.dropped.maf') cmds = lsc.buildMergeCommand(maf1, maf2, outname, treelessRootCmd, self.name, self.options) lsc.runCommands(cmds, self.getLocalTempDir())
def run(self): logger.info('Extract object running, thisDir: %s' % (self.thisDir)) if self.isLeaf: ext = '.maf' else: ext = '.tmp.maf' outname = os.path.join(self.thisDir, self.alignName + ext) if not os.path.exists(outname): cmd = [lsc.which('evolver_cvt')] cmd.append('-fromrev') cmd.append(os.path.join(self.thisDir, 'aln.rev')) cmd.append('-tomaf') cmd.append(outname + '.tmp') cmds = [cmd] cmds.append([lsc.which('mv'), outname + '.tmp', outname]) lsc.runCommands(cmds, self.getLocalTempDir())
def run(self): logger.info('TransalignStep object running, thisDir: %s thisParentDir: %s' % (self.thisDir, self.thisParentDir)) lsc.verifyDirExists(self.thisDir) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'transalign.start.xml')) cmds, pipes = lsc.transalignStepCmds_1(self.thisDir, self.thisParentDir, self.options) lsc.runCommands(cmds, self.getLocalTempDir(), outPipes = pipes) lsc.runTransalignStepCmds_2(self.thisDir, self.thisParentDir, self.getLocalTempDir(), self.options) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'transalign.end.xml')) lsc.lastOneOutTurnOffTheLightsCycle(self.thisDir) if lsc.isLeaf(self.thisDir): lsc.lastOneOutTurnOffTheLightsSimulation(self.options.simDir, self.options)
def run(self): logger.info('MergeMafsUp object running, name: %s nodeParent: %s' % (self.name, self.nodeParent)) ############################## # The 'lookup' aspect of the merge is only performed when we are not at the root # This merge merges the results of the 'lookdown' merge, that is to say the maf that contains # all descendant sequences including the node, with the node-parent maf, to produce a maf # that the parent can use to merge its children. if self.name == self.options.rootName: return outname = os.path.join(self.options.simDir, self.name, self.nodeParent + '.maf') if not os.path.exists(outname): treelessRootCmd = ['-treelessRoot2=%s' % self.nodeParent] maf1 = os.path.join(self.options.simDir, self.name, self.name + '.maf') maf2 = os.path.join(self.options.simDir, self.name, self.nodeParent + '.tmp.maf') cmds = lsc.buildMergeCommand(maf1, maf2, outname, treelessRootCmd, self.name, self.options) lsc.runCommands(cmds, self.getLocalTempDir())
def run(self): logger.info('MergeTreeDown object running, name: %s nodeParent: %s' % (self.name, self.nodeParent)) treelessRootCmd = [] for i in xrange(0,2): if self.nodeDict[self.name].children[i] in self.leafsDict: treelessRootCmd.append('-treelessRoot%d=%s' % (i + 1, self.name)) ############################## # the 'lookdown' aspect of the merge is performed for every node, including the root. outname = os.path.join(self.options.simDir, self.name, self.name + '.maf') if not os.path.exists(outname): maf1 = os.path.join(self.options.simDir, self.nodeDict[self.name].children[0], self.name + '.maf') maf2 = os.path.join(self.options.simDir, self.nodeDict[self.name].children[1], self.name + '.maf') cmds = lsc.buildMergeCommand(maf1, maf2, outname, treelessRootCmd, self.name, self.options) lsc.runCommands(cmds, self.getLocalTempDir()) self.setFollowOnTarget(MergeMafsUp(self.nt, self.nodeDict, self.nodeParentDict, self.leafsDict, self.nodeParent, self.options))
def run(self): chrNameDict, revChrNameDict = lsc.extractChrNamesDict(self.thisDir) logger.info('CycleStep2Chromosome object running, %s %s %s' % (self.thisDir, self.thisChr, chrNameDict[self.thisChr])) lsc.verifyDirExists(self.thisDir) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step2.%s.start.xml' % chrNameDict[self.thisChr]), extra = {'name': self.thisChr}) # evolver intra on one chromosome cmds = lsc.evolverIntraStepCmd(self.thisDir, self.theChild, self.thisStepLength, self.thisChr, self.options.seed, self.options.paramsDir, self.getLocalTempDir(), self.options) lsc.runCommands(cmds, self.getLocalTempDir()) # evolver conversion from .rev to fasta in localTempDir cmds = lsc.evolverIntraStepToFastaCmd(self.thisDir, self.thisStepLength, self.thisChr, self.options.paramsDir, self.getLocalTempDir()) lsc.runCommands(cmds, self.getLocalTempDir()) # trf wrapper lsc.callEvolverIntraStepTRFCmd(self.thisDir, self.thisChr, self.getLocalTempDir()) # move the resulting trf files out of localTempDir cmds = lsc.evolverIntraStepMoveTRFCmd(self.thisDir, self.thisChr, self.getLocalTempDir()) lsc.runCommands(cmds, self.getLocalTempDir(), mode = 'p') lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step2.%s.end.xml' % chrNameDict[self.thisChr]), extra = {'name': self.thisChr})
def populateRootDir(options): """ The first order of business in a simulation is to create the basic directory structure for the root genome and the parameters. """ # mkdir is used here for simplicity in timing the creation of the diretory and # subsequent two cp jobs for parameters. lsc.runCommands([['mkdir', '-p', os.path.join(options.outDir, 'parameters')]], options.outDir) jobs = [] jobs.append(['cp', '-r', options.rootInputDir, os.path.join(options.outDir, options.rootName)]) jobs.append(['cp', os.path.join(options.paramsDir,'model.txt'), os.path.join(options.outDir, 'parameters')]) if not options.noMEs: jobs.append(['cp', os.path.join(options.paramsDir,'model.mes.txt'), os.path.join(options.outDir, 'parameters')]) jobs.append(['cp', os.path.join(options.paramsDir,'mes.cfg'), os.path.join(options.outDir, 'parameters')]) lsc.runCommands(jobs, options.outDir, mode = 'p') options.paramsInputDir = options.paramsDir options.paramsDir = os.path.abspath(os.path.join(options.outDir, 'parameters')) options.parentDir = os.path.abspath(os.path.join(options.outDir, options.rootName)) options.simDir, tail = os.path.split(options.parentDir) options.rootDir = os.path.abspath(os.path.join(options.simDir, options.rootName)) lsc.createRootXmls(sys.argv, options)
def run(self): logger.info('CycleStep3 object running, %s' % self.thisDir) lsc.verifyDirExists(self.thisDir) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step3.start.xml')) # trf regex = r'^(chr\S+)\.outseq\.fa.*\.dat' pat = re.compile(regex) files = glob.glob(os.path.join(self.thisDir, 'intra', '*.dat')) cmds = [] outPipes = [] followCmds = [] followPipes = [] for f in files: # each file is the trf output for one chromosome m = re.match(regex, os.path.basename(f)) if m is None: raise RuntimeError('Regex "%s" failed on filename %s' % (regex, os.path.basename(f))) outname = os.path.join(self.thisDir, 'intra', m.group(1) + '.trfannots.gff') if not os.path.exists(outname): # convert the .dat to .gff cmd = [lsc.which('evolver_trf2gff.py'), f] cmds.append(cmd) outPipes.append(outname + '.tmp') # atomic files followCmds.append([lsc.which('mv'), outname + '.tmp', outname]) followPipes.append(None) lsc.runCommands(cmds, self.getLocalTempDir(), outPipes = outPipes, mode = 'p') lsc.runCommands(followCmds, self.getLocalTempDir(), outPipes = followPipes, mode = 'p') catCmd, evoCmd, cvtCmd, followCmds = lsc.evolverIntraMergeCmds(self.thisDir, self.theChild) lsc.runCommands([catCmd, evoCmd, cvtCmd], self.getLocalTempDir(), outPipes = [os.path.join(self.thisDir, 'intra', 'evannots.gff.tmp'), None, None], mode = 'p') lsc.runCommands(followCmds, self.getLocalTempDir()) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step3.end.xml')) self.setFollowOnTarget(CycleStep4(self.thisNewickStr, self.thisParentDir, self.thisStepLength, self.options))
def main(): usage = ('usage: %prog --simDir path/to/dir [options]\n\n' '%prog takes in a simulation directory and then extracts\n' 'the sequence of each leaf node in fasta format and stores them\n' 'in the respective step\'s directory.') parser = OptionParser(usage=usage) initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) cycles = glob.glob(os.path.join(options.simDir, '*')) cycles = directoriesOnly(cycles) leafs = {} nt = newickTreeParser(options.inputNewick, 0.0) extractLeafs(nt, leafs) for d in cycles: if not options.allCycles and not os.path.basename(d) in leafs: continue cmds = [] outPipes = [] inPipes = [] nameA = os.path.basename(d) nameA = nameA.replace('[', '') nameA = nameA.replace(']', '') cleanName = nameA.replace('\'', '') cmd = [lsc.which('evolver_cvt')] cmd.append('-fromrev') cmd.append(os.path.join(d, 'seq.rev')) cmd.append('-tofasta') cmd.append(os.path.join(d, 'seq.fa.tmp')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.fa.tmp')) cmd.append(os.path.join(d, 'seq.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('sed')] cmd.append(r"s/^>/>%s./;" % cleanName) inPipes.append(os.path.join(d, 'seq.fa')) outPipes.append(os.path.join(d, 'seq.name.fa.tmp')) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.name.fa.tmp')) cmd.append(os.path.join(d, 'seq.name.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) lsc.runCommands(cmds, os.curdir, outPipes=outPipes, inPipes=inPipes, mode='s')