def run(self): logger.info("Preparing sequence for preprocessing") # chunk it up inChunkDirectory = makeSubDir(os.path.join(self.getGlobalTempDir(), "preprocessChunksIn")) inChunkList = [ chunk for chunk in popenCatch( "cactus_blast_chunkSequences %s %i 0 %s %s" % (getLogLevelString(), self.prepOptions.chunkSize, inChunkDirectory, self.inSequencePath) ).split("\n") if chunk != "" ] outChunkDirectory = makeSubDir(os.path.join(self.getGlobalTempDir(), "preprocessChunksOut")) outChunkList = [] # For each input chunk we create an output chunk, it is the output chunks that get concatenated together. for i in xrange(len(inChunkList)): outChunkList.append(os.path.join(outChunkDirectory, "chunk_%i" % i)) # Calculate the number of chunks to use inChunkNumber = int(max(1, math.ceil(len(inChunkList) * self.prepOptions.proportionToSample))) assert inChunkNumber <= len(inChunkList) and inChunkNumber > 0 # Now get the list of chunks flanking and including the current chunk j = max(0, i - inChunkNumber / 2) inChunks = inChunkList[j : j + inChunkNumber] if len(inChunks) < inChunkNumber: # This logic is like making the list circular inChunks += inChunkList[: inChunkNumber - len(inChunks)] assert len(inChunks) == inChunkNumber self.addChildTarget( PreprocessChunk( self.prepOptions, inChunks, float(inChunkNumber) / len(inChunkList), inChunkList[i], outChunkList[i] ) ) # follow on to merge chunks self.setFollowOnTarget(MergeChunks(self.prepOptions, outChunkList, self.outSequencePath))
def run(self): # If the files are in a sub-dir then rip them out. if os.path.isdir(self.inputSequenceFileOrDirectory): tempFile = getTempFile(rootDir=self.getGlobalTempDir()) catFiles( [ os.path.join(self.inputSequenceFileOrDirectory, f) for f in os.listdir(self.inputSequenceFileOrDirectory) ], tempFile, ) inputSequenceFile = tempFile else: inputSequenceFile = self.inputSequenceFileOrDirectory assert inputSequenceFile != self.outputSequenceFile prepXmlElems = self.configNode.findall("preprocessor") analysisString = runCactusAnalyseAssembly(inputSequenceFile) self.logToMaster( "Before running any preprocessing on the assembly: %s got following stats (assembly may be listed as temp file if input sequences from a directory): %s" % (self.inputSequenceFileOrDirectory, analysisString) ) if len(prepXmlElems) == 0: # Just cp the file to the output file system("cp %s %s" % (inputSequenceFile, self.outputSequenceFile)) else: logger.info("Adding child batch_preprocessor target") self.addChildTarget(BatchPreprocessor(prepXmlElems, inputSequenceFile, self.outputSequenceFile, 0))
def runWorkflow_multipleExamples(inputGenFunction, testNumber=1, testRestrictions=(TestStatus.TEST_SHORT, TestStatus.TEST_MEDIUM, \ TestStatus.TEST_LONG, TestStatus.TEST_VERY_LONG,), inverseTestRestrictions=False, batchSystem="single_machine", buildAvgs=False, buildReference=False, buildReferenceSequence=False, buildCactusPDF=False, buildAdjacencyPDF=False, buildReferencePDF=False, makeCactusTreeStats=False, makeMAFs=False, configFile=None, buildJobTreeStats=False): """A wrapper to run a number of examples. """ if (inverseTestRestrictions and TestStatus.getTestStatus() not in testRestrictions) or \ (not inverseTestRestrictions and TestStatus.getTestStatus() in testRestrictions): for test in xrange(testNumber): tempDir = getTempDirectory(os.getcwd()) sequences, newickTreeString = inputGenFunction(regionNumber=test, tempDir=tempDir) runWorkflow_TestScript(sequences, newickTreeString, batchSystem=batchSystem, buildAvgs=buildAvgs, buildReference=buildReference, buildCactusPDF=buildCactusPDF, buildAdjacencyPDF=buildAdjacencyPDF, makeCactusTreeStats=makeCactusTreeStats, makeMAFs=makeMAFs, configFile=configFile, buildJobTreeStats=buildJobTreeStats) system("rm -rf %s" % tempDir) logger.info("Finished random test %i" % test)
def main(): #Parse the inputs args/options parser = OptionParser(usage="usage: workingDir [options]", version="%prog 0.1") options = Options() parser.add_option("--sequences", dest="sequences", help="Quoted list of fasta files containing sequences") parser.add_option("--alignments", dest="alignments", help="Cigar file ") addExpectationMaximisationOptions(parser, options) Stack.addJobTreeOptions(parser) options, args = parser.parse_args() setLoggingFromOptions(options) if len(args) != 0: raise RuntimeError("Expected no arguments, got %s arguments: %s" % (len(args), " ".join(args))) #Log the inputs logger.info( "Got '%s' sequences, '%s' alignments file, '%s' output model and '%s' iterations of training" % (options.sequences, options.alignments, options.outputModel, options.iterations)) #This line invokes jobTree i = Stack( Target.makeTargetFn(expectationMaximisationTrials, args=(options.sequences, options.alignments, options.outputModel, options))).startJobTree(options) if i != 0: raise RuntimeError("Got failed jobs")
def killJobs(self, jobIDs): """ Kills the given job indexes and makes sure they're dead. """ for jobID in jobIDs: slurmJobID = self.getSlurmJobID(jobID) logger.info("DEL: " + str(slurmJobID)) self.currentjobs.remove(jobID) try: Slurm.killJob(slurmJobID) except Exception: pass #What is this???? del self.jobIDs[self.slurmJobTasks[jobID]] del self.slurmJobTasks[jobID] toKill = set(jobIDs) maxattempts = 5 attempts = 0 while len(toKill) > 0 and attempts < maxattempts: for jobID in list(toKill): if SlurmBatchSystem.getJobExitCode( self.slurmJobIDs[jobID]) is not None: toKill.remove(jobID) if len(toKill) > 0: logger.critical( "Tried to kill some jobs, but something happened and they are still going, so I'll try again" ) time.sleep(5) attempts += 1
def runComparisonOfBlastScriptVsNaiveBlast(self, blastMode): """We compare the output with a naive run of the blast program, to check the results are nearly equivalent. """ encodeRegions = [ "ENm00" + str(i) for i in xrange(1,2) ] #, 2) ] #Could go to six species = ("human", "mouse", "dog") #Other species to try "rat", "monodelphis", "macaque", "chimp" for encodeRegion in encodeRegions: regionPath = os.path.join(self.encodePath, encodeRegion) for i in xrange(len(species)): species1 = species[i] for species2 in species[i+1:]: seqFile1 = os.path.join(regionPath, "%s.%s.fa" % (species1, encodeRegion)) seqFile2 = os.path.join(regionPath, "%s.%s.fa" % (species2, encodeRegion)) #Run simple blast runNaiveBlast(seqFile1, seqFile2, self.tempOutputFile, self.tempDir) logger.info("Ran the naive blast okay") #Run cactus blast pipeline toilDir = os.path.join(getTempDirectory(self.tempDir), "toil") if blastMode == "allAgainstAll": runCactusBlast(sequenceFiles=[ seqFile1, seqFile2 ], alignmentsFile=self.tempOutputFile2, toilDir=toilDir, chunkSize=500000, overlapSize=10000) else: runCactusBlast(sequenceFiles=[ seqFile1 ], alignmentsFile=self.tempOutputFile2, toilDir=toilDir, chunkSize=500000, overlapSize=10000, targetSequenceFiles=[ seqFile2 ]) logger.info("Ran cactus_blast okay") logger.critical("Comparing cactus_blast and naive blast; using mode: %s" % blastMode) checkCigar(self.tempOutputFile) checkCigar(self.tempOutputFile2) compareResultsFile(self.tempOutputFile, self.tempOutputFile2)
def bsub(bsubline): process = subprocess.Popen(" ".join(bsubline), shell=True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) liney = process.stdout.readline() logger.info("BSUB: " + liney) result = int(liney.strip().split()[1].strip('<>')) logger.debug("Got the job id: %s" % (str(result))) return result
def run(self): localTempDir = self.getLocalTempDir() i = 0 localfiles = [] for f in self.files: if not os.path.exists(f): #HACK continue localname = os.path.join(localTempDir, "%s%d.bam" %(os.path.basename(f).split('.')[0], i)) system("scp -C %s %s" %(f, localname)) localfiles.append(localname) i += 1 mergeFile = os.path.join(localTempDir, "merge.bam") if len(localfiles) == 1: system("mv %s %s" %(localfiles[0], mergeFile)) else: bamStr = " ".join(localfiles) logger.info("Merging bams...\n") mergeCmd = "samtools merge %s %s" %(mergeFile, bamStr) system( mergeCmd ) sortPrefix = os.path.join(localTempDir, "mergeSorted") sortCmp = "samtools sort %s %s" %( mergeFile, sortPrefix ) system( sortCmp ) system( "cp %s.bam %s" %(sortPrefix, self.outdir) ) #Get Snps info: self.setFollowOnTarget( Snp(self.outdir, self.options) )
def run(self): setLogLevel("DEBUG") options = self.options system("mkdir -p %s" %(options.outdir)) experiments, samples = getExperiments(options.cactusdir) for i, exp in enumerate(experiments): sample = samples[i] logger.info("Experiment %s, sample %s\n" %(exp, sample) ) self.addChildTarget( RunExperiment(options, exp, sample) ) #Map to other refs, the structure of the directories is going to be: #outdir/ # otherRefs/ # sampleNA*/ # hg19/ # apd/ # ... refdir = os.path.join(options.outdir, "otherRefs") system("mkdir -p %s" %refdir) for sample in samples: sampleDir = os.path.join(refdir, sample) readdir = os.path.join(self.options.readdir, sample) system("mkdir -p %s" %sampleDir) for ref in self.options.refs: rdir = os.path.join(sampleDir, ref) system("mkdir -p %s" %rdir) self.addChildTarget( RunMapping(self.options, os.path.join(self.options.refdir, ref), rdir, readdir) ) #Done mapping, now drawPlots self.setFollowOnTarget( Plots(options.outdir, os.path.join(options.outdir, "plots"), options.cleanup) )
def run(self): geneFile = os.path.join(self.getLocalTempDir(), "refgene.bed") system("cp %s %s" %(self.geneFile, geneFile)) system("cactus_genemapChain -c %s -o \"%s\" -s \"%s\" -g \"%s\"" \ %(self.dbStr, self.outputFile, self.refSpecies, geneFile)) logger.info("Done genemapChain for %s\n" %self.region)
def parseJobFile(absFileName): try: job = readJob(absFileName) return job except IOError: logger.info("Encountered error while parsing job file %s, so we will ignore it" % absFileName) return None
def testCactusSetup(self): """Creates a bunch of random inputs and then passes them to cactus setup. """ for test in xrange(self.testNo): tempDir = os.path.relpath(getTempDirectory(os.getcwd())) sequenceNumber = random.choice(xrange(100)) sequences, newickTreeString = getCactusInputs_random( tempDir=tempDir, sequenceNumber=sequenceNumber) #Setup the flower disk. experiment = getCactusWorkflowExperimentForTest( sequences, newickTreeString, os.path.join('/data', os.path.relpath(tempDir))) cactusDiskDatabaseString = experiment.getDiskDatabaseString() cactusSequencesPath = os.path.join(experiment.getDbDir(), "cactusSequences") runCactusSetup(cactusDiskDatabaseString=cactusDiskDatabaseString, cactusSequencesPath=cactusSequencesPath, sequences=sequences, newickTreeString=newickTreeString) runCactusSetup(cactusDiskDatabaseString=cactusDiskDatabaseString, cactusSequencesPath=cactusSequencesPath, sequences=sequences, newickTreeString=newickTreeString) experiment.cleanupDb() system("rm -rf %s" % tempDir) logger.info("Finished test %i of cactus_setup.py", test)
def runCactusTreeStatsToLatexTables(inputFiles, regionNames, outputFile): assert len(regionNames) == len(inputFiles) k = " ".join(["%s %s" % (i, j) for i, j in zip(inputFiles, regionNames)]) command = "cactus_treeStatsToLatexTables.py --outputFile %s %s" % ( outputFile, k) system(command) logger.info("Ran cactus_treeStatsToLatexTables okay")
def parasolRestart(): """Function starts the parasol hub and node. """ parasolStop() while True: machineList = os.path.join(workflowRootPath(), "jobTree", "machineList") #pathEnvVar = os.environ["PATH"] os.system("paraNode start -hub=localhost") #-umask=002 -userPath=%s -sysPath=%s" % (pathEnvVar, pathEnvVar)) os.system("paraHub %s subnet=127.0.0 &" % (machineList,)) tempFile = getTempFile() dead = True try: popen("parasol status", tempFile) fileHandle = open(tempFile, 'r') line = fileHandle.readline() while line != '': if "Nodes dead" in line: print line if int(line.split()[-1]) == 0: dead = False line = fileHandle.readline() fileHandle.close() except RuntimeError: pass os.remove(tempFile) if not dead: break else: logger.info("Tried to restart the parasol process, but failed, will try again") parasolStop() time.sleep(5) logger.info("Restarted the parasol process")
def run(self): localTempDir = self.getLocalTempDir() filelst = os.path.join(localTempDir, 'file.lst') f = open(filelst, 'w') for file in self.files: #Copy necessary file to local tempdir first: localbam = os.path.join( localTempDir, os.path.basename(file.path) ) #localbambai = os.path.join( localTempDir, "%s.bai" % os.path.basename(file.path) ) system("ln -s %s %s" %(file.path, localbam)) range = self.ref2info[file.ref][1] filename = os.path.basename(file.path).rstrip('.bam') localout = os.path.join( localTempDir, "%s-sorted" %(filename) ) logger.info("Pre-processing sample %s\n" %(filename)) f.write( "%s\t%s\n" %(filename, self.ref2info[file.ref][0]) ) #Extract range and sort by name: if os.path.exists( "%s.bai" %file.path ): system("ln -s %s.bai %s.bai" %(file.path, localbam)) else: system("samtools index %s" %(localbam)) cmd = "samtools view -b %s %s | samtools sort -n - %s" %(localbam, range, localout) #cmd = "samtools view -b %s %s | samtools sort -n - %s" %(file.path, range, localout) system(cmd) system("scp -C %s.bam %s" %(localout, self.sampledir)) #Clean up right away: system("rm -f %s.bam" %localout) #system("rm -f %s" %localbam) system("mv %s %s" %(filelst, self.sampledir)) f.close()
def run(self): chrNameDict, revChrNameDict = lsc.extractChrNamesDict(self.thisDir) logger.info('CycleStep2Chromosome object running, %s %s %s' % (self.thisDir, self.thisChr, chrNameDict[self.thisChr])) lsc.verifyDirExists(self.thisDir) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step2.%s.start.xml' % chrNameDict[self.thisChr]), extra = {'name': self.thisChr}) # evolver intra on one chromosome cmds = lsc.evolverIntraStepCmd(self.thisDir, self.theChild, self.thisStepLength, self.thisChr, self.options.seed, self.options.paramsDir, self.getLocalTempDir(), self.options) lsc.runCommands(cmds, self.getLocalTempDir()) # evolver conversion from .rev to fasta in localTempDir cmds = lsc.evolverIntraStepToFastaCmd(self.thisDir, self.thisStepLength, self.thisChr, self.options.paramsDir, self.getLocalTempDir()) lsc.runCommands(cmds, self.getLocalTempDir()) # trf wrapper lsc.callEvolverIntraStepTRFCmd(self.thisDir, self.thisChr, self.getLocalTempDir()) # move the resulting trf files out of localTempDir cmds = lsc.evolverIntraStepMoveTRFCmd(self.thisDir, self.thisChr, self.getLocalTempDir()) lsc.runCommands(cmds, self.getLocalTempDir(), mode = 'p') lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step2.%s.end.xml' % chrNameDict[self.thisChr]), extra = {'name': self.thisChr})
def writeConfig(config): #Write the config file to disk fileHandle = open(getConfigFileName(config.attrib["job_tree"]), 'w') tree = ET.ElementTree(config) tree.write(fileHandle) fileHandle.close() logger.info("Written the config file")
def run(self): setLogLevel("DEBUG") logger.info("Adding experiments to jobTree\n") if self.options.inputInfo: self.addChildTarget( PreProcess(self.options) ) else: self.addChildTarget( Start(self.options) )
def runCactusReference(cactusDiskDatabaseString, flowerNames, logLevel=None, matchingAlgorithm=None, referenceEventString=None, permutations=None, useSimulatedAnnealing=None, theta=None, phi=None, maxWalkForCalculatingZ=None, ignoreUnalignedGaps=None, wiggle=None, numberOfNs=None, minNumberOfSequencesToSupportAdjacency=None, makeScaffolds=None): """Runs cactus reference. """ logLevel = getLogLevelString2(logLevel) matchingAlgorithm = nameValue("matchingAlgorithm", matchingAlgorithm) referenceEventString = nameValue("referenceEventString", referenceEventString) permutations = nameValue("permutations", permutations, int) useSimulatedAnnealing = nameValue("useSimulatedAnnealing", useSimulatedAnnealing, bool) theta = nameValue("theta", theta, float) phi = nameValue("phi", phi, float) maxWalkForCalculatingZ = nameValue("maxWalkForCalculatingZ", maxWalkForCalculatingZ, int) ignoreUnalignedGaps = nameValue("ignoreUnalignedGaps", ignoreUnalignedGaps, bool) wiggle = nameValue("wiggle", wiggle, float) numberOfNs = nameValue("numberOfNs", numberOfNs, int) minNumberOfSequencesToSupportAdjacency = nameValue("minNumberOfSequencesToSupportAdjacency", minNumberOfSequencesToSupportAdjacency, int) makeScaffolds = nameValue("makeScaffolds", makeScaffolds, bool) command = "cactus_reference --cactusDisk '%s' --logLevel %s %s %s %s %s %s %s %s %s %s %s %s %s" % \ (cactusDiskDatabaseString, logLevel, matchingAlgorithm, referenceEventString, permutations, useSimulatedAnnealing, theta, phi, maxWalkForCalculatingZ, ignoreUnalignedGaps, wiggle, numberOfNs, minNumberOfSequencesToSupportAdjacency, makeScaffolds) masterMessages = popenCatch(command, stdinString=flowerNames) logger.info("Ran cactus_reference okay") return [ i for i in masterMessages.split("\n") if i != '' ]
def run(self): self.logToMaster("RepSize\n") stime = time.time() name2sample = {} for sam in os.listdir(self.sampledir): filepath = os.path.join(self.sampledir, sam, sam) sample = pickle.load(gzip.open(filepath, 'rb')) name2sample[sam] = sample logger.info("RepSize, done loading %d samples in %.4f s." % (len(name2sample), (time.time() - stime))) stime = time.time() # Get summary of samples' sizes: group2samples = self.options.group2samples group2avr = libcommon.get_group_avr(name2sample, group2samples) logger.info("RepSize, done computing group_avr in %.4f s." % (time.time() - stime)) txtfile = os.path.join(self.options.outdir, "clonesize.txt") repsize.repsize_table(name2sample, txtfile, group2avr, group2samples) texfile = os.path.join(self.options.outdir, "clonesize.tex") repsize.repsize_table(name2sample, texfile, group2avr, group2samples, True) self.addChildTarget(diversity.DiversityRarefaction(self.sampledir, self.options))
def main(): ########################################## #Construct the arguments. ########################################## parser = OptionParser() parser.add_option("--haplotypeSequences", dest="haplotypeSequences") parser.add_option("--newickTree", dest="newickTree") parser.add_option("--assembliesDir", dest="assembliesDir") parser.add_option("--outputDir", dest="outputDir") parser.add_option("--configFile", dest="configFile") parser.add_option("--minimumNsForScaffoldGap", dest="minimumNsForScaffoldGap") parser.add_option("--assemblyEventString", dest="assemblyEventString") parser.add_option("--haplotype1EventString", dest="haplotype1EventString") parser.add_option("--haplotype2EventString", dest="haplotype2EventString") parser.add_option("--contaminationEventString", dest="contaminationEventString") parser.add_option("--featureBedFiles", dest="featureBedFiles") parser.add_option("--geneBedFiles", dest="geneBedFiles") Stack.addJobTreeOptions(parser) options, args = parser.parse_args() setLoggingFromOptions(options) if len(args) != 0: raise RuntimeError("Unrecognised input arguments: %s" % " ".join(args)) Stack(MakeAlignments(newickTree=options.newickTree, haplotypeSequences=options.haplotypeSequences.split(), assembliesDir=options.assembliesDir, outputDir=options.outputDir, configFile=options.configFile, options=options)).startJobTree(options) logger.info("Done with job tree")
def obtainSystemConstants(self): p = subprocess.Popen(["lshosts"], stdout = subprocess.PIPE, stderr = subprocess.STDOUT) line = p.stdout.readline() items = line.strip().split() num_columns = len(items) cpu_index = None mem_index = None for i in range(num_columns): if items[i] == 'ncpus': cpu_index = i elif items[i] == 'maxmem': mem_index = i if cpu_index is None or mem_index is None: RuntimeError("lshosts command does not return ncpus or maxmem columns") p.stdout.readline() self.maxCPU = 0 self.maxMEM = MemoryString("0") for line in p.stdout: items = line.strip().split() if len(items) < num_columns: RuntimeError("lshosts output has a varying number of columns") if items[cpu_index] != '-' and items[cpu_index] > self.maxCPU: self.maxCPU = items[cpu_index] if items[mem_index] != '-' and MemoryString(items[mem_index]) > self.maxMEM: self.maxMEM = MemoryString(items[mem_index]) if self.maxCPU is 0 or self.maxMEM is 0: RuntimeError("lshosts returns null ncpus or maxmem info") logger.info("Got the maxCPU: %s" % (self.maxMEM))
def runCactusBlast(sequenceFiles, outputFile, jobTreeDir, chunkSize=None, overlapSize=None, logLevel=None, blastString=None, selfBlastString=None, compressFiles=None, lastzMemory=None, targetSequenceFiles=None): logLevel = getLogLevelString2(logLevel) chunkSize = nameValue("chunkSize", chunkSize, int) overlapSize = nameValue("overlapSize", overlapSize, int) blastString = nameValue("blastString", blastString, str) selfBlastString = nameValue("selfBlastString", selfBlastString, str) compressFiles = nameValue("compressFiles", compressFiles, bool) lastzMemory = nameValue("lastzMemory", lastzMemory, int) if targetSequenceFiles != None: targetSequenceFiles = " ".join(targetSequenceFiles) targetSequenceFiles = nameValue("targetSequenceFiles", targetSequenceFiles, quotes=True) command = "cactus_blast.py %s --cigars %s %s %s %s %s %s %s %s --jobTree %s --logLevel %s" % \ (" ".join(sequenceFiles), outputFile, chunkSize, overlapSize, blastString, selfBlastString, compressFiles, lastzMemory, targetSequenceFiles, jobTreeDir, logLevel) logger.info("Running command : %s" % command) system(command) logger.info("Ran the cactus_blast command okay")
def run(self): localTempDir = self.getLocalTempDir() filelst = os.path.join(localTempDir, 'file.lst') f = open(filelst, 'w') for file in self.files: #Copy necessary file to local tempdir first: localbam = os.path.join(localTempDir, os.path.basename(file.path)) #localbambai = os.path.join( localTempDir, "%s.bai" % os.path.basename(file.path) ) system("ln -s %s %s" % (file.path, localbam)) range = self.ref2info[file.ref][1] filename = os.path.basename(file.path).rstrip('.bam') localout = os.path.join(localTempDir, "%s-sorted" % (filename)) logger.info("Pre-processing sample %s\n" % (filename)) f.write("%s\t%s\n" % (filename, self.ref2info[file.ref][0])) #Extract range and sort by name: if os.path.exists("%s.bai" % file.path): system("ln -s %s.bai %s.bai" % (file.path, localbam)) else: system("samtools index %s" % (localbam)) cmd = "samtools view -b %s %s | samtools sort -n - %s" % ( localbam, range, localout) #cmd = "samtools view -b %s %s | samtools sort -n - %s" %(file.path, range, localout) system(cmd) system("scp -C %s.bam %s" % (localout, self.sampledir)) #Clean up right away: system("rm -f %s.bam" % localout) #system("rm -f %s" %localbam) system("mv %s %s" % (filelst, self.sampledir)) f.close()
def main(): ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser("usage: %prog [options] treeStatsFiles", "%prog 0.1") options, args = parseBasicOptions(parser) logger.info("Parsed arguments") ########################################## #Get the input data etc. ########################################## assert len(args) % 2 == 0 stats = [(ET.parse(statsFile).getroot(), regionName) for statsFile, regionName in zip(args[::2], args[1::2])] ########################################## #Make the scatter plots ########################################## chainScatterPlots(stats) blockScatterPlots(stats)
def run(self): ########################################## #Setup a file tree. ########################################## tempFileTree = TempFileTree(os.path.join(self.getGlobalTempDir(), getRandomAlphaNumericString())) fileTreeRootFile = tempFileTree.getTempFile() makeFileTree(fileTreeRootFile, \ self.depth, tempFileTree) treePointer = tempFileTree.getTempFile() makeTreePointer(fileTreeRootFile, treePointer) logger.info("We've set up the file tree") if random.random() > 0.5: raise RuntimeError() ########################################## #Issue the child and follow on jobs ########################################## self.addChildTarget(ChildTarget(treePointer)) self.setFollowOnTarget(DestructFileTree(tempFileTree)) logger.info("We've added the child target and finished SetupFileTree.run()")
def main(): usg = "Usage: %prog [options]\n" parser = OptionParser(usage=usg) parser.add_option("-d", "--simList", dest="sim", help="List of simulation directories. Default: simulations.lst", default="simulations.lst") parser.add_option("-c", "--configStartFile", dest="config", help="cactus_workflow_config.xml", default="cactus_workflow_config.xml") parser.add_option("-o", "--outputDir", dest="outputDir", help="Directory for the outputs of the runs. Default: out", default="out/") parser.add_option("-m", "--simTrueMafDir", dest="simTrueMafDir", help="Directory for 'true' mafs of the simulations. Default: sim/", default="sim/") parser.add_option("-t", "--tree", dest="tree", help="Phylogeny tree of the species of interest, in Newick format.Default: tree", default="tree") parser.add_option("-s", "--species", dest="species", help="List of species in the order as they appear in the Newick tree. Default: species.lst", default="species.lst") parser.add_option("-j", "--job", dest="jobFile", help="Job file containing command to run.", default=None) (options, args) = parser.parse_args() #Process options: options.outputDir = modify_dirname(options.outputDir) check_dir(options.outputDir) options.tree = getFirstLine(options.tree) #assert options.tree == '' options.species = getFirstLine(options.species).split() #assert len(options.species) == 0 options.sim = getList(options.sim) #assert len(options.sim) == 0 #options.config = getList(options.config) #assert len(options.config) == 0 logger.info("Processed options\n") #Tuning cactusTuningWrapper = CactusTuningWrapper(options) cactusTuningWrapper.execute(options.jobFile)
def run(self): #-------------------------------------------- #Run cactus & evaluations for each simulation #-------------------------------------------- logger.info("CactusTuningSimulationsWrapper: going to issue cactus runs for all simulations for parameter %s\n" %(self.paraFile)) simNum = 0 for sim in self.options.sim: sim = modify_dirname(sim) simName = getRootDir(sim) #Get path to sequence file of each species sequenceFiles = " ".join([ os.path.join(sim, spc) for spc in self.options.species ]) logger.info("Got sequence files: %s\n" % (sequenceFiles)) #add child #self.addChildTarget(CactusWorkflowWrapper(sim, simNum, self.paraFile, self.outDir, sequenceFiles, self.options.tree)) self.addChildTarget(CactusWorkflowWrapper(sim, simName, self.options.simTrueMafDir, self.paraFile, self.outDir, sequenceFiles, self.options.tree)) logger.info("Added child CactusWorkflowWrapper for sim %s and confi %s\n" % (sim, self.paraFile)) simNum += 1 #---------------------------------------------------------------- #Done running cactus & evaluations steps for all the simulations. #Now Merge results & clean up. #---------------------------------------------------------------- logger.info("Done running cactus & evaluations for parameter %s. Now merge results and clean up.\n" %(self.paraFile)) self.setFollowOnTarget(CactusMergeResultsAndCleanup(simNum, self.outDir, self.options)) logger.info("Added CactusMergeResultsAndCleanup as FollowOnTarget for %s\n" %(self.outDir))
def progressiveWithSubtreeRootFunction(self, experimentFile, toilDir, batchSystem, buildAvgs, buildReference, buildHal, buildFasta, toilStats): """Choose an arbitrary subtree from the larger species tree to run the alignment on. This function is necessary to keep runWorkflow_multipleExamples general (specifying a subtree root doesn't make sense for runCactusWorkflow). """ # Get valid internal nodes that are the root of the subtree we # want to align expWrapper = ExperimentWrapper(ET.parse(experimentFile).getroot()) tree = expWrapper.getTree() validNodes = [] for node in tree.postOrderTraversal(): if tree.hasName(node) and not tree.isLeaf(node): validNodes.append(tree.getName(node)) # Choose a random valid subtree root (NB: the entire species # tree is a valid subtree) subtreeRoot = random.choice(validNodes) logger.info("Chose subtree root %s to test from species tree " "%s" % (subtreeRoot, NXNewick().writeString(tree))) self.progressiveFunction(experimentFile, toilDir, batchSystem, buildAvgs, buildReference, buildHal, buildFasta, toilStats, subtreeRoot)
def run(self): setLogLevel("DEBUG") logger.info("Adding experiments to jobTree\n") if self.options.inputInfo: self.addChildTarget(PreProcess(self.options)) else: self.addChildTarget(Start(self.options))
def runMarginAlign(self, readFastqFile, referenceFastaFile, args=""): startTime = time.time() system("\t".join([ self.marginAlign, readFastqFile, referenceFastaFile, self.outputSamFile, "--jobTree=%s" % self.jobTree, args ])) runTime = time.time() - startTime readAlignmentStats = self.validateSam(self.outputSamFile, readFastqFile, referenceFastaFile) #Get some stats to print readIdentity = numpy.average( map(lambda rAS: rAS.readIdentity(), readAlignmentStats)) alignmentIdentity = numpy.average( map(lambda rAS: rAS.alignmentIdentity(), readAlignmentStats)) mismatchesPerAlignedBase = numpy.average( map(lambda rAS: rAS.mismatchesPerAlignedBase(), readAlignmentStats)) insertionsPerReadBase = numpy.average( map(lambda rAS: rAS.insertionsPerReadBase(), readAlignmentStats)) deletionsPerReadBase = numpy.average( map(lambda rAS: rAS.deletionsPerReadBase(), readAlignmentStats)) logger.info("Ran marginAlign with args: %s, with reference: %s and reads: %s. \ Got Read Identity: %s, Alignment Identity: %s, Mismatches per aligned base: %s, Insertions per read base: %s, \ Deletions per read base: %s, Took: %s seconds" % \ (args, readFastqFile, referenceFastaFile, readIdentity, alignmentIdentity, mismatchesPerAlignedBase, insertionsPerReadBase, deletionsPerReadBase, runTime)) system("rm -rf %s" % self.jobTree)
def __init__(self, config, maxCpus, maxMemory, workerFn=worker): AbstractBatchSystem.__init__(self, config, maxCpus, maxMemory) #Call the parent constructor self.jobIndex = 0 self.jobs = {} self.maxThreads = int(config.attrib["max_threads"]) logger.info( "Setting up the thread pool with %i threads given the max threads %i and the max cpus %i" % (min(self.maxThreads, self.maxCpus), self.maxThreads, self.maxCpus)) self.maxThreads = min(self.maxThreads, self.maxCpus) self.cpusPerThread = float(self.maxCpus) / float(self.maxThreads) self.memoryPerThread = self.maxThreads + float(self.maxMemory) / float( self.maxThreads ) #Add the maxThreads to avoid losing memory by rounding. assert self.cpusPerThread >= 1 assert self.maxThreads >= 1 assert self.maxMemory >= 1 assert self.memoryPerThread >= 1 self.inputQueue = Queue() self.outputQueue = Queue() self.workerFn = workerFn for i in xrange(self.maxThreads): #Setup the threads worker = Process(target=workerFn, args=(self.inputQueue, self.outputQueue)) worker.daemon = True worker.start()
def issueJobs(self, jobCommands): """Issues parasol with job commands. """ issuedJobs = {} for jobCommand, memory, cpu, logFile in jobCommands: assert memory != None assert cpu != None assert logFile != None pattern = re.compile("your job ([0-9]+).*") command = "parasol -verbose -ram=%i -cpu=%i -results=%s add job '%s'" % (memory, cpu, self.parasolResultsFile, jobCommand) while True: #time.sleep(0.1) #Sleep to let parasol catch up #Apparently unnecessary popenParasolCommand(command, self.scratchFile) fileHandle = open(self.scratchFile, 'r') line = fileHandle.readline() fileHandle.close() match = pattern.match(line) if match != None: #This is because parasol add job will return success, even if the job was not properly issued! break else: logger.info("We failed to properly add the job, we will try again after a sleep") time.sleep(5) jobID = int(match.group(1)) logger.debug("Got the job id: %s from line: %s" % (jobID, line)) assert jobID not in issuedJobs.keys() issuedJobs[jobID] = jobCommand logger.debug("Issued the job command: %s with job id: %i " % (command, jobID)) return issuedJobs
def runCactusProgressive(inputDir, jobTreeDir, logLevel=None, retryCount=0, batchSystem="single_machine", rescueJobFrequency=None, skipAlignments=False, buildHal=None, buildFasta=None, buildAvgs=False, jobTreeStats=False, maxThreads=None, maxCpus=None, defaultMemory=None, recursive=None, logFile=None, event=None, extraJobTreeArgumentsString="", profileFile=None): command = ("cactus_progressive.py %s" % inputDir) + " " + _fn(jobTreeDir, logLevel, retryCount, batchSystem, rescueJobFrequency, skipAlignments, buildAvgs, None, buildHal, buildFasta, jobTreeStats, maxThreads, maxCpus, defaultMemory, logFile, extraJobTreeArgumentsString=extraJobTreeArgumentsString) + \ (" %s %s" % (nameValue("recursive", recursive, bool), nameValue("event", event))) if profileFile != None: command = "python -m cProfile -o %s %s/bin/%s" % (profileFile, cactusRootPath(), command) system(command) logger.info("Ran the cactus progressive okay")
def run(self): # filter by size starttime = time.time() opts = self.opts clones = pickle.load(gzip.open(self.samplefile, 'rb')) if (opts.mincount > 1 or opts.maxcount > 0 or opts.minfreq > 0 or opts.maxfreq > 0): clones = filter_by_size(clones, opts.mincount, opts.maxcount, opts.minfreq, opts.maxfreq) msg = ("Filter_by_size for file %s done in %.4f s" % (self.samplefile, time.time() - starttime)) logger.info(msg) starttime = time.time() # filter by status pclones = filter_by_status(clones, True) npclones = filter_by_status(clones, False) filename = os.path.basename(self.samplefile) if pclones: pdir = os.path.join(self.outdir, "productive", self.name) system("mkdir -p %s" % pdir) pfile = os.path.join(pdir, filename) pickle.dump(pclones, gzip.open(pfile, "wb")) if npclones: npdir = os.path.join(self.outdir, "non_productive", self.name) system("mkdir -p %s" % npdir) npfile = os.path.join(npdir, filename) pickle.dump(npclones, gzip.open(npfile, "wb")) msg = ("Filter_by_status for file %s done in %.4f s" % (self.samplefile, time.time() - starttime)) logger.info(msg) self.setFollowOnTarget(libcommon.CleanupFile(self.samplefile))
def run(self): geneFile = os.path.join(self.getLocalTempDir(), "refgene.bed") system("cp %s %s" % (self.geneFile, geneFile)) system("cactus_genemapChain -c %s -o \"%s\" -s \"%s\" -g \"%s\"" \ %(self.dbStr, self.outputFile, self.refSpecies, geneFile)) logger.info("Done genemapChain for %s\n" % self.region)
def run(self): logger.info('CycleStep4 object running, %s' % self.thisDir) lsc.verifyDirExists(self.thisDir) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step4.start.xml')) # lsc.subTypeTimestamp(self.thisDir, 'cycle', 'CycleStep4_start') outname = os.path.join(self.thisDir, 'logs', 'gene_deactivation.log') if not os.path.exists(outname): if not self.options.noGeneDeactivation: # by default gene deactivation is turned on. cmd = lsc.evolverGeneDeactivationStep(self.thisDir, self.thisParentDir) p = subprocess.Popen(cmd, cwd = self.getLocalTempDir(), stdout = subprocess.PIPE, stderr = subprocess.STDOUT) out = p.communicate()[0] f=open(outname + '.tmp', 'w') f.write(out) f.close() os.rename(outname + '.tmp', outname) else: # this could cause a proliferation of gene creation. cmd = [lsc.which('cp')] cmd.append(os.path.join(thisDir, 'intra', 'evannots.gff')) cmd.append(os.path.join(thisDir, 'annots.gff')) cmds = [cmd] cmds.append([lsc.which('touch'), outname]) lsc.runCommands(cmds, self.getLocalTempDir()) lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step4.end.xml'))
def issueJob(self, command, memory, cpu): """Issues parasol with job commands. """ self.checkResourceRequest(memory, cpu) pattern = re.compile("your job ([0-9]+).*") parasolCommand = "%s -verbose -ram=%i -cpu=%i -results=%s add job '%s'" % (self.parasolCommand, memory, cpu, self.parasolResultsFile, command) #Deal with the cpus self.usedCpus += cpu while True: #Process finished results with no wait try: jobID = self.outputQueue1.get_nowait() self.usedCpus -= self.jobIDsToCpu.pop(jobID) assert self.usedCpus >= 0 self.outputQueue1.task_done() except Empty: break while self.usedCpus > self.maxCpus: #If we are still waiting self.usedCpus -= self.jobIDsToCpu.pop(self.outputQueue1.get()) assert self.usedCpus >= 0 self.outputQueue1.task_done() #Now keep going while True: #time.sleep(0.1) #Sleep to let parasol catch up #Apparently unnecessary line = popenParasolCommand(parasolCommand)[1][0] match = pattern.match(line) if match != None: #This is because parasol add job will return success, even if the job was not properly issued! break else: logger.info("We failed to properly add the job, we will try again after a sleep") time.sleep(5) jobID = int(match.group(1)) self.jobIDsToCpu[jobID] = cpu logger.debug("Got the parasol job id: %s from line: %s" % (jobID, line)) logger.debug("Issued the job command: %s with (parasol) job id: %i " % (parasolCommand, jobID)) return jobID
def run(self): cactusAlignmentName = "cactusAlignment" cactusAlignment = os.path.join(self.outputDir, cactusAlignmentName) if not os.path.exists(cactusAlignment): #Prepare the assembly #First copy it. if self.assemblyFile[-3:] == '.gz': tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(), suffix=".gz") system("cp %s %s" % (self.assemblyFile, tempAssemblyFile)) system("gunzip %s" % tempAssemblyFile) tempAssemblyFile = tempAssemblyFile[:-3] assert os.path.exists(tempAssemblyFile) else: tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(), suffix="") system("cp %s %s" % (self.assemblyFile, tempAssemblyFile)) #Make the supporting temporary files tempExperimentFile = getTempFile(rootDir=self.getLocalTempDir()) tempJobTreeDir = os.path.join(self.getLocalTempDir(), "jobTree") #Make the experiment file cactusWorkflowExperiment = ExperimentWrapper.createExperimentWrapper( sequences=self.haplotypeSequences + [tempAssemblyFile], newickTreeString=self.newickTree, outputDir=self.getLocalTempDir(), configFile=self.configFile) cactusWorkflowExperiment.setDbName(cactusAlignmentName) cactusWorkflowExperiment.setDbDir( os.path.join(self.getLocalTempDir(), cactusWorkflowExperiment.getDbName()) ) #This needs to be set to ensure the thing gets put in the right directory cactusWorkflowExperiment.writeXML(tempExperimentFile) #Now run cactus workflow runCactusWorkflow(experimentFile=tempExperimentFile, jobTreeDir=tempJobTreeDir, buildAvgs=False, buildReference=True, batchSystem="single_machine", maxThreads=1, jobTreeStats=True) logger.info("Ran the workflow") #Check if the jobtree completed sucessively. runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir) logger.info("Checked the job tree dir") #Compute the stats cactusAlignmentDir = os.path.join(self.getLocalTempDir(), cactusAlignmentName) tempJobTreeStatsFile = os.path.join(self.getLocalTempDir(), "jobTreeStats.xml") system("jobTreeStats --jobTree %s --outputFile %s" % (tempJobTreeDir, tempJobTreeStatsFile)) #Now copy the true assembly back to the output system("mv %s/* %s" % (self.getLocalTempDir(), self.outputDir)) #system("mv %s %s/config.xml" % (tempExperimentFile, self.outputDir)) #system("mv %s %s/" % (tempJobTreeStatsFile, self.outputDir)) #system("mv %s %s/" % (cactusAlignmentDir, self.outputDir)) assert os.path.exists(cactusAlignment) #We're done! self.addChildTarget( MakeStats1(self.outputDir, cactusAlignment, self.options))
def runCactusBar(cactusDiskDatabaseString, flowerNames, logLevel=None, spanningTrees=None, maximumLength=None, gapGamma=None, matchGamma=None, splitMatrixBiggerThanThis=None, anchorMatrixBiggerThanThis=None, repeatMaskMatrixBiggerThanThis=None, diagonalExpansion=None, constraintDiagonalTrim=None, minimumBlockDegree=None, minimumIngroupDegree=None, minimumOutgroupDegree=None, alignAmbiguityCharacters=None, pruneOutStubAlignments=None, useProgressiveMerging=None, calculateWhichEndsToComputeSeparately=None, largeEndSize=None, endAlignmentsToPrecomputeOutputFile=None, precomputedAlignments=None, ingroupCoverageFile=None, minimumSizeToRescue=None, minimumCoverageToRescue=None, minimumNumberOfSpecies=None): """Runs cactus base aligner. """ logLevel = getLogLevelString2(logLevel) maximumLength = nameValue("maximumLength", maximumLength, int) spanningTrees = nameValue("spanningTrees", spanningTrees, int) gapGamma = nameValue("gapGamma", gapGamma, float) matchGamma = nameValue("matchGamma", matchGamma, float) splitMatrixBiggerThanThis=nameValue("splitMatrixBiggerThanThis", splitMatrixBiggerThanThis, int) anchorMatrixBiggerThanThis=nameValue("anchorMatrixBiggerThanThis", anchorMatrixBiggerThanThis, int) repeatMaskMatrixBiggerThanThis=nameValue("repeatMaskMatrixBiggerThanThis", repeatMaskMatrixBiggerThanThis, int) diagonalExpansion=nameValue("diagonalExpansion", diagonalExpansion, int) constraintDiagonalTrim = nameValue("constraintDiagonalTrim", constraintDiagonalTrim, int) minimumBlockDegree = nameValue("minimumDegree", minimumBlockDegree, int) minimumIngroupDegree = nameValue("minimumIngroupDegree", minimumIngroupDegree, int) minimumOutgroupDegree = nameValue("minimumOutgroupDegree", minimumOutgroupDegree, int) pruneOutStubAlignments = nameValue("pruneOutStubAlignments", pruneOutStubAlignments, bool) alignAmbiguityCharacters = nameValue("alignAmbiguityCharacters", alignAmbiguityCharacters, bool) useProgressiveMerging=nameValue("useProgressiveMerging", useProgressiveMerging, bool) calculateWhichEndsToComputeSeparately=nameValue("calculateWhichEndsToComputeSeparately", calculateWhichEndsToComputeSeparately, bool) largeEndSize=nameValue("largeEndSize", largeEndSize, int) endAlignmentsToPrecomputeOutputFile=nameValue("endAlignmentsToPrecomputeOutputFile", endAlignmentsToPrecomputeOutputFile, str) precomputedAlignments=nameValue("precomputedAlignments", precomputedAlignments, str, quotes=True) ingroupCoverageFile = nameValue("ingroupCoverageFile", ingroupCoverageFile, str, quotes=True) minimumSizeToRescue = nameValue("minimumSizeToRescue", minimumSizeToRescue, int) minimumCoverageToRescue = nameValue("minimumCoverageToRescue", minimumCoverageToRescue, float) minimumNumberOfSpecies = nameValue("minimumNumberOfSpecies", minimumNumberOfSpecies, int) masterMessages = popenCatch("cactus_bar --cactusDisk '%s' --logLevel %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s" % (cactusDiskDatabaseString, logLevel, spanningTrees, maximumLength, gapGamma, matchGamma, splitMatrixBiggerThanThis, anchorMatrixBiggerThanThis, repeatMaskMatrixBiggerThanThis, constraintDiagonalTrim, minimumBlockDegree, minimumIngroupDegree, minimumOutgroupDegree, alignAmbiguityCharacters, pruneOutStubAlignments, diagonalExpansion, useProgressiveMerging, calculateWhichEndsToComputeSeparately, largeEndSize, endAlignmentsToPrecomputeOutputFile, precomputedAlignments, ingroupCoverageFile, minimumSizeToRescue, minimumCoverageToRescue, minimumNumberOfSpecies), stdinString=flowerNames) logger.info("Ran cactus_bar okay") return [ i for i in masterMessages.split("\n") if i != '' ]
def issueJob(self, command, memory, cpu): jobID = self.nextJobID self.nextJobID += 1 self.currentjobs.add(jobID) bsubline = prepareBsub(cpu, memory) + [command] self.newJobsQueue.put((jobID, bsubline)) logger.info("Issued the job command: %s with job id: %s " % (command, str(jobID))) return jobID
def listChildDirs(jobDir): try: return listChildDirsUnsafe(jobDir) except: logger.info( "Encountered error while parsing job dir %s, so we will ignore it" % jobDir) return []
def run(self): args=self.options refhistoryid=args.refhistoryid + histseg.Global_BINWIDTH*self.i outfile=os.path.join(self.outdir, "run_%d.dat" % self.i) if not os.path.exists(outfile): outfh=open(outfile, 'w') logger.info("running %d: get_history_distances_between_mcmc_steps(events, %s, %s, %s, %s, %s) > %s" % (self.i, refhistoryid, "" , args.numsteps, args.stepsize, args.stepsize, outfile)) mcmcdist.get_history_distances_between_mcmc_steps(self.events, refhistoryid, "", args.numsteps, args.stepsize, args.stepsize, outfh)
def runCactusTreeViewer(graphFile, cactusDiskDatabaseString, flowerName="0", logLevel=None): logLevel = getLogLevelString2(logLevel) system("cactus_treeViewer --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s" \ % (cactusDiskDatabaseString, flowerName, graphFile, logLevel)) logger.info("Created a cactus tree graph")
def loadEnvironment(config): """Puts the environment in the pickle file. """ #Dump out the environment of this process in the environment pickle file. fileHandle = open(getEnvironmentFileName(config.attrib["job_tree"]), 'w') cPickle.dump(os.environ, fileHandle) fileHandle.close() logger.info("Written the environment for the jobs to the environment file")
def testMatchGraph(self): """ Tests matchGraph.py program using randGraph.py input """ for test in range(self.testNo): tempInputFile = getTempFile() tempOutputFile = getTempFile() self.tempFiles.append(tempInputFile) self.tempFiles.append(tempOutputFile) # Create sample/test input graph file system("blossom_randGraph.py > %s" % tempInputFile) # Run matchGraph.py system("matchGraph.py -e %s -w %s" % (tempInputFile, tempOutputFile)) # Now check if output is valid f = open(tempOutputFile, 'r') lineIdx = 0 for line in f: line = line.rstrip() if lineIdx == 0: (vertexNum, edgeNum) = line.split() vertexNum = int(vertexNum) edgeNum = int(edgeNum) vertexArray = [0] * vertexNum # Number of vertices must be even self.assertEqual(vertexNum % 2, 0) # Number of edges is half the number of vertices self.assertEqual(vertexNum/2, edgeNum) else: (vertexI, vertexJ,) = line.split() vertexI = int(vertexI) vertexJ = int(vertexJ) vertexArray[vertexI] += 1 vertexArray[vertexJ] += 1 # Vertex indices must be 0<= i,j < V self.assertTrue(vertexI in range(vertexNum)) self.assertTrue(vertexJ in range(vertexNum)) lineIdx += 1 # Must have the correct number of edges self.assertEqual(edgeNum, lineIdx-1) badCount = 0 for i in vertexArray: if i != 1: badCount += 1 # Each vertex must be only in one edge self.assertEqual(badCount, 0) logger.info("Ran the test(s) of the matchGraph program okay")
def run(self): logger.info("At the end, this is the contents of the global temp dir...") system("ls -l %s" % self.getGlobalTempDir()) logger.info("And done....") if random.random() > 0.5: raise RuntimeError() self.tempFileTree.destroyTempFiles()
def run(self): geneFile = os.path.join(self.getLocalTempDir(), "refgene.bed") system("cp %s %s" % (self.geneFile, geneFile)) command = "cactus_genemapHomolog -c %s -o \"%s\" -s \"%s\" -g \"%s\" > %s" \ %(self.dbStr, self.output1, self.refSpecies, geneFile, self.output2) system("%s" % command) logger.info("Done genemapHomolog for %s, command: %s\n" % (self.region, command))
def parseJobFile(absFileName): try: job = Job.read(absFileName) return job except: logger.info( "Encountered error while parsing job file %s, so we will ignore it" % absFileName) return None
def obtainSystemConstants(self): """ This should be able to set self.maxCPU and self.maxMEM """ self.maxCPU = 0 self.maxMEM = 0 if self.maxCPU is 0 or self.maxMEM is 0: RuntimeError("Can't read ncpus or maxmem info") logger.info("Got the maxCPU: %s" % (self.maxMEM))
def run(self): sampleListName = "%s/%s/%s" % (self.options.dataDir, self.exp, "sample.lst") logger.info("sampleListName: %s\n" % (sampleListName)) assert os.path.exists(sampleListName) samples = getList(sampleListName) for sample in samples: sampleDir = "%s/%s" % (self.exp, sample) self.addChildTarget( RunSample(self.exp, sample, sampleDir, self.options))
def bsub(bsubline): process = subprocess.Popen(" ".join(bsubline), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) liney = process.stdout.readline() logger.info("BSUB: " + liney) result = int(liney.strip().split()[1].strip('<>')) logger.debug("Got the job id: %s" % (str(result))) return result
def runCactusAdjacencyGraphViewer(graphFile, cactusDiskDatabaseString, flowerName="0", logLevel=None, includeInternalAdjacencies=False): logLevel = getLogLevelString2(logLevel) includeInternalAdjacencies = nameValue("includeInternalAdjacencies", includeInternalAdjacencies, bool) system("cactus_adjacencyGraphViewer --cactusDisk '%s' --flowerName %s --outputFile %s --logLevel %s" \ % (cactusDiskDatabaseString, flowerName, graphFile, logLevel)) logger.info("Created a break point graph of the problem")
def createFirstJob(command, config, memory=None, cpu=None, time=sys.maxint): """Adds the first job to to the jobtree. """ logger.info("Adding the first job") if memory == None or memory == sys.maxint: memory = float(config.attrib["default_memory"]) if cpu == None or cpu == sys.maxint: cpu = float(config.attrib["default_cpu"]) job = Job(command=command, memory=memory, cpu=cpu, tryCount=int(config.attrib["try_count"]), jobDir=getJobFileDirName(config.attrib["job_tree"])) job.write() logger.info("Added the first job")