def run(self): logger.info('CycleStep4 object running, %s' % self.thisDir) lsc.verifyDirExists(self.thisDir) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step4.start.xml')) # lsc.subTypeTimestamp(self.thisDir, 'cycle', 'CycleStep4_start') outname = os.path.join(self.thisDir, 'logs', 'gene_deactivation.log') if not os.path.exists(outname): if not self.options.noGeneDeactivation: # by default gene deactivation is turned on. cmd = lsc.evolverGeneDeactivationStep(self.thisDir, self.thisParentDir) p = subprocess.Popen(cmd, cwd = self.getLocalTempDir(), stdout = subprocess.PIPE, stderr = subprocess.STDOUT) out = p.communicate()[0] f=open(outname + '.tmp', 'w') f.write(out) f.close() os.rename(outname + '.tmp', outname) else: # this could cause a proliferation of gene creation. cmd = [lsc.which('cp')] cmd.append(os.path.join(thisDir, 'intra', 'evannots.gff')) cmd.append(os.path.join(thisDir, 'annots.gff')) cmds = [cmd] cmds.append([lsc.which('touch'), outname]) lsc.runCommands(cmds, self.getLocalTempDir()) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step4.end.xml'))
def main(): usage = ('usage: %prog --simDir path/to/dir [options]\n\n' '%prog takes in a simulation directory and then extracts\n' 'the sequence of each leaf node in fasta format and stores them\n' 'in the respective step\'s directory.') parser = OptionParser(usage = usage) initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) cycles = glob.glob(os.path.join(options.simDir, '*')) cycles = directoriesOnly(cycles) leafs = {} nt = newickTreeParser(options.inputNewick, 0.0) extractLeafs(nt, leafs) for d in cycles: if not options.allCycles and not os.path.basename(d) in leafs: continue cmds = [] outPipes = [] inPipes = [] nameA = os.path.basename(d) nameA = nameA.replace('[','') nameA = nameA.replace(']','') cleanName = nameA.replace('\'','') cmd = [lsc.which('evolver_cvt')] cmd.append('-fromrev') cmd.append(os.path.join(d,'seq.rev')) cmd.append('-tofasta') cmd.append(os.path.join(d, 'seq.fa.tmp')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.fa.tmp')) cmd.append(os.path.join(d, 'seq.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('sed')] cmd.append(r"s/^>/>%s./;" % cleanName) inPipes.append(os.path.join(d, 'seq.fa')) outPipes.append(os.path.join(d, 'seq.name.fa.tmp')) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.name.fa.tmp')) cmd.append(os.path.join(d, 'seq.name.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) lsc.runCommands(cmds, os.curdir, outPipes = outPipes, inPipes = inPipes, mode = 's')
def run(self): logger.info('Extract object running, thisDir: %s' % (self.thisDir)) if self.isLeaf: ext = '.maf' else: ext = '.tmp.maf' outname = os.path.join(self.thisDir, self.alignName + ext) if not os.path.exists(outname): cmd = [lsc.which('evolver_cvt')] cmd.append('-fromrev') cmd.append(os.path.join(self.thisDir, 'aln.rev')) cmd.append('-tomaf') cmd.append(outname + '.tmp') cmds = [cmd] cmds.append([lsc.which('mv'), outname + '.tmp', outname]) lsc.runCommands(cmds, self.getLocalTempDir())
def run(self): logger.info('CycleStep3 object running, %s' % self.thisDir) lsc.verifyDirExists(self.thisDir) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step3.start.xml')) # trf regex = r'^(chr\S+)\.outseq\.fa.*\.dat' pat = re.compile(regex) files = glob.glob(os.path.join(self.thisDir, 'intra', '*.dat')) cmds = [] outPipes = [] followCmds = [] followPipes = [] for f in files: # each file is the trf output for one chromosome m = re.match(regex, os.path.basename(f)) if m is None: raise RuntimeError('Regex "%s" failed on filename %s' % (regex, os.path.basename(f))) outname = os.path.join(self.thisDir, 'intra', m.group(1) + '.trfannots.gff') if not os.path.exists(outname): # convert the .dat to .gff cmd = [lsc.which('evolver_trf2gff.py'), f] cmds.append(cmd) outPipes.append(outname + '.tmp') # atomic files followCmds.append([lsc.which('mv'), outname + '.tmp', outname]) followPipes.append(None) lsc.runCommands(cmds, self.getLocalTempDir(), outPipes = outPipes, mode = 'p') lsc.runCommands(followCmds, self.getLocalTempDir(), outPipes = followPipes, mode = 'p') catCmd, evoCmd, cvtCmd, followCmds = lsc.evolverIntraMergeCmds(self.thisDir, self.theChild) lsc.runCommands([catCmd, evoCmd, cvtCmd], self.getLocalTempDir(), outPipes = [os.path.join(self.thisDir, 'intra', 'evannots.gff.tmp'), None, None], mode = 'p') lsc.runCommands(followCmds, self.getLocalTempDir()) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step3.end.xml')) self.setFollowOnTarget(CycleStep4(self.thisNewickStr, self.thisParentDir, self.thisStepLength, self.options))
def main(): usage = ('usage: %prog --simDir path/to/dir [options]\n\n' '%prog takes in a simulation directory and then extracts\n' 'the sequence of each leaf node in fasta format and stores them\n' 'in the respective step\'s directory.') parser = OptionParser(usage=usage) initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) cycles = glob.glob(os.path.join(options.simDir, '*')) cycles = directoriesOnly(cycles) leafs = {} nt = newickTreeParser(options.inputNewick, 0.0) extractLeafs(nt, leafs) for d in cycles: if not options.allCycles and not os.path.basename(d) in leafs: continue cmds = [] outPipes = [] inPipes = [] nameA = os.path.basename(d) nameA = nameA.replace('[', '') nameA = nameA.replace(']', '') cleanName = nameA.replace('\'', '') cmd = [lsc.which('evolver_cvt')] cmd.append('-fromrev') cmd.append(os.path.join(d, 'seq.rev')) cmd.append('-tofasta') cmd.append(os.path.join(d, 'seq.fa.tmp')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.fa.tmp')) cmd.append(os.path.join(d, 'seq.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('sed')] cmd.append(r"s/^>/>%s./;" % cleanName) inPipes.append(os.path.join(d, 'seq.fa')) outPipes.append(os.path.join(d, 'seq.name.fa.tmp')) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.name.fa.tmp')) cmd.append(os.path.join(d, 'seq.name.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) lsc.runCommands(cmds, os.curdir, outPipes=outPipes, inPipes=inPipes, mode='s')