def parasolRestart(): """Function starts the parasol hub and node. """ parasolStop() while True: machineList = os.path.join(workflowRootPath(), "jobTree", "machineList") #pathEnvVar = os.environ["PATH"] os.system("paraNode start -hub=localhost") #-umask=002 -userPath=%s -sysPath=%s" % (pathEnvVar, pathEnvVar)) os.system("paraHub %s subnet=127.0.0 &" % (machineList,)) tempFile = getTempFile() dead = True try: popen("parasol status", tempFile) fileHandle = open(tempFile, 'r') line = fileHandle.readline() while line != '': if "Nodes dead" in line: print line if int(line.split()[-1]) == 0: dead = False line = fileHandle.readline() fileHandle.close() except RuntimeError: pass os.remove(tempFile) if not dead: break else: logger.info("Tried to restart the parasol process, but failed, will try again") parasolStop() time.sleep(5) logger.info("Restarted the parasol process")
def find_analyses(target, recordsToAnalyze, templateFastqFiles, complementFastqFiles, references, outputDir): """takes a set of records to analyze and finds the corresponding sequences and creates alignment targets""" files = {"template":[], "complement":[]} logger.info("Finding template analyses") for fastqFile in templateFastqFiles: for name, seq, qual in fastqRead(fastqFile): if name in recordsToAnalyze: outfile = os.path.join(target.getGlobalTempDir(), "template_" + name) files["template"].append(outfile) ref_name, ref_start, ref_stop = recordsToAnalyze[name] ref_seq = references[ref_name][ref_start : ref_stop] analysis = [name, seq, ref_name, ref_seq, outfile] target.addChildTarget(Target.makeTargetFn(analyze, args=analysis)) logger.info("Finding complement analyses") for fastqFile in complementFastqFiles: for name, seq, qual in fastqRead(fastqFile): if name in recordsToAnalyze: outfile = os.path.join(target.getGlobalTempDir(), "complement_" + name) files["complement"].append(outfile) ref_name, ref_start, ref_stop = recordsToAnalyze[name] ref_seq = references[ref_name][ref_start : ref_stop] analysis = [name, seq, ref_name, ref_seq, outfile] target.addChildTarget(Target.makeTargetFn(analyze, args=analysis)) target.setFollowOnTargetFn(merge, args=(files, outputDir))
def main(): parser = build_parser() Stack.addJobTreeOptions(parser) args = parser.parse_args() setLoggingFromOptions(args) if not os.path.exists(args.outDir): os.mkdir(args.outDir) if args.overwriteDb is True: if os.path.exists(args.mergedDb): os.remove(args.mergedDb) for g in args.genomes: if os.path.exists(os.path.join(args.outDir, g + ".db")): os.remove(os.path.join(args.outDir, g + ".db")) logger.info("Building paths to the required files") alnPslDict = parse_dir(args.genomes, args.dataDir, alignment_ext) seqTwoBitDict = parse_dir(args.genomes, args.dataDir, sequence_ext) geneCheckBedDict = parse_dir(args.genomes, args.dataDir, gene_check_ext) #geneCheckBedDetailsDict = parse_dir(args.genomes, args.geneCheckDir, gene_check_details_ext) refSequence = os.path.join(args.dataDir, args.refGenome + ".2bit") if not os.path.exists(refSequence): raise RuntimeError("Reference genome 2bit not present at {}".format(refSequence)) args.refSequence = refSequence i = Stack(Target.makeTargetFn(build_analysis, args=(alnPslDict, seqTwoBitDict, geneCheckBedDict, args.gencodeAttributeMap, args.genomes, args.annotationBed, args.outDir, args.primaryKey, args.refGenome))).startJobTree(args) if i != 0: raise RuntimeError("Got failed jobs") merge_databases(args.outDir, args.mergedDb, args.genomes)
def find_analyses(target, recordsToAnalyze, templateFastqFiles, complementFastqFiles, references, outputDir): """takes a set of records to analyze and finds the corresponding sequences and creates alignment targets""" files = {"template": [], "complement": []} logger.info("Finding template analyses") for fastqFile in templateFastqFiles: for name, seq, qual in fastqRead(fastqFile): if name in recordsToAnalyze: outfile = os.path.join(target.getGlobalTempDir(), "template_" + name) files["template"].append(outfile) ref_name, ref_start, ref_stop = recordsToAnalyze[name] ref_seq = references[ref_name][ref_start:ref_stop] analysis = [name, seq, ref_name, ref_seq, outfile] target.addChildTarget( Target.makeTargetFn(analyze, args=analysis)) logger.info("Finding complement analyses") for fastqFile in complementFastqFiles: for name, seq, qual in fastqRead(fastqFile): if name in recordsToAnalyze: outfile = os.path.join(target.getGlobalTempDir(), "complement_" + name) files["complement"].append(outfile) ref_name, ref_start, ref_stop = recordsToAnalyze[name] ref_seq = references[ref_name][ref_start:ref_stop] analysis = [name, seq, ref_name, ref_seq, outfile] target.addChildTarget( Target.makeTargetFn(analyze, args=analysis)) target.setFollowOnTargetFn(merge, args=(files, outputDir))
def issueJobs(self, jobCommands): """Issues parasol with job commands. """ issuedJobs = {} for jobCommand, memory, cpu, logFile in jobCommands: assert memory != None assert cpu != None assert logFile != None pattern = re.compile("your job ([0-9]+).*") command = "parasol -verbose -ram=%i -cpu=%i -results=%s add job '%s'" % (memory, cpu, self.parasolResultsFile, jobCommand) while True: #time.sleep(0.1) #Sleep to let parasol catch up #Apparently unnecessary popenParasolCommand(command, self.scratchFile) fileHandle = open(self.scratchFile, 'r') line = fileHandle.readline() fileHandle.close() match = pattern.match(line) if match != None: #This is because parasol add job will return success, even if the job was not properly issued! break else: logger.info("We failed to properly add the job, we will try again after a sleep") time.sleep(5) jobID = int(match.group(1)) logger.debug("Got the job id: %s from line: %s" % (jobID, line)) assert jobID not in issuedJobs.keys() issuedJobs[jobID] = jobCommand logger.debug("Issued the job command: %s with job id: %i " % (command, jobID)) return issuedJobs
def run(self): ########################################## #Setup a file tree. ########################################## tempFileTree = TempFileTree(os.path.join(self.getGlobalTempDir(), getRandomAlphaNumericString())) fileTreeRootFile = tempFileTree.getTempFile() makeFileTree(fileTreeRootFile, \ self.depth, tempFileTree) treePointer = tempFileTree.getTempFile() makeTreePointer(fileTreeRootFile, treePointer) logger.info("We've set up the file tree") ########################################## #Issue the child and follow on jobs ########################################## self.addChildTarget(ChildTarget(treePointer)) self.setFollowOnTarget(DestructFileTree(tempFileTree)) logger.info("We've added the child target and finished SetupFileTree.run()")
def get_paired_fastqs(target, genome, institute, tissue, bam, reference, out_dir, name_sorted_sam_path, fwd_fastq_path, rev_fastq_path): logger.info("Extracting paired fastqs") target.addChildTargetFn(get_fwd, args=(name_sorted_sam_path, fwd_fastq_path)) target.addChildTargetFn(get_rev, args=(name_sorted_sam_path, rev_fastq_path)) target.setFollowOnTargetFn(kallisto_paired, args=(genome, institute, tissue, bam, reference, out_dir, name_sorted_sam_path, fwd_fastq_path, rev_fastq_path))
def parseJobFile(absFileName): try: job = ET.parse(absFileName).getroot() return job except IOError: logger.info("Encountered error while parsing job file %s, so we will ignore it" % absFileName) return None
def loadEnvironment(config): """Puts the environment in the pickle file. """ #Dump out the environment of this process in the environment pickle file. fileHandle = open(config.attrib["environment_file"], 'w') cPickle.dump(os.environ, fileHandle) fileHandle.close() logger.info("Written the environment for the jobs to the environment file")
def setupTempFileTrees(config): """Load the temp file trees """ config.attrib["job_file_dir"] = TempFileTree(config.attrib["job_file_dir"]) config.attrib["temp_dir_dir"] = TempFileTree(config.attrib["temp_dir_dir"]) config.attrib["log_file_dir"] = TempFileTree(config.attrib["log_file_dir"]) config.attrib["slave_log_file_dir"] = TempFileTree(config.attrib["slave_log_file_dir"]) logger.info("Setup the temp file trees")
def run (self): parasolRestart() while True: time.sleep(random.choice(xrange(240))) if self.kill == True: return logger.info("Going to kill a parasol/master process") killMasterAndParasol()
def emHasTerminated(self): if self.iteration < 2: return False prevLL = self.readLL("params%i.txt" % (self.iteration - 1)) currLL = self.readLL("params%i.txt" % (self.iteration)) decrease = ((prevLL - currLL) / currLL) logger.info("LL: %5g, Decrease: %3g" % (currLL, 100 * decrease)) return decrease < self.tolerance
def emHasTerminated(self): if self.iteration < 2: return False prevLL = self.readLL("params%i.txt" % (self.iteration - 1)) currLL = self.readLL("params%i.txt" % (self.iteration)) decrease = ((prevLL - currLL) / currLL) logger.info("LL: %5g, Decrease: %3g" % (currLL, 100*decrease)) return decrease < self.tolerance
def buildAnalyses(target, queries, baseOutDir, bpPenalty, dataPenalty, tightness, keyFile, graph, kmerSize, saveInter): logger.info("Starting to build analyses") for uuid, queryString in queries.iteritems(): target.addChildTarget( SlicerModelWrapper(uuid, queryString, baseOutDir, bpPenalty, dataPenalty, tightness, keyFile, graph, kmerSize, saveInter))
def run(self): logger.info("Progressive Next: " + self.event) if not self.schedule.isVirtual(self.event): self.addChildTarget(ProgressiveUp(self.options, self.project, self.event)) followOnEvent = self.schedule.followOn(self.event) if followOnEvent is not None: self.addChildTarget(ProgressiveDown(self.options, self.project, followOnEvent, self.schedule))
def createFirstJob(command, config, memory=None, cpu=None, time=sys.maxint): """Adds the first job to to the jobtree. """ logger.info("Adding the first job") if memory == None: memory = config.attrib["default_memory"] if cpu == None: cpu = config.attrib["default_cpu"] job = createJob({ "command":command, "memory":str(int(memory)), "cpu":str(int(cpu)), "time":str(float(time)) }, None, config) writeJobs([job]) logger.info("Added the first job")
def run(self): logger.info("Progressive Down: " + self.event) if not self.options.nonRecursive: deps = self.schedule.deps(self.event) for child in deps: self.addChildTarget(ProgressiveDown(self.options, self.project, child, self.schedule)) self.setFollowOnTarget(ProgressiveNext(self.options, self.project, self.event, self.schedule))
def killJobs(self, jobIDs): """Kills the given jobs, represented as Job ids, then checks they are dead by checking they are not in the list of issued jobs. """ while True: for jobID in jobIDs: i = popenParasolCommand("parasol remove job %i" % jobID, tmpFileForStdOut=self.scratchFile, runUntilSuccessful=None) logger.info("Tried to remove jobID: %i, with exit value: %i" % (jobID, i)) runningJobs = self.getIssuedJobIDs() if set(jobIDs).difference(set(runningJobs)) == set(jobIDs): return time.sleep(5) logger.critical("Tried to kill some jobs, but something happened and they are still going, so I'll try again")
def restartFailedJobs(config, jobFiles): """Traverses through the file tree and resets the restart count of all jobs. """ for absFileName in jobFiles: if os.path.isfile(absFileName): job = ET.parse(absFileName).getroot() logger.info("Restarting job: %s" % job.attrib["file"]) job.attrib["remaining_retry_count"] = config.attrib["retry_count"] if job.attrib["colour"] == "red": job.attrib["colour"] = "white" #Is leaf and job failed when the system went downbut the status did not get updated. if job.attrib["colour"] == "grey": job.attrib["colour"] = "white" writeJobs([ job ])
def checkFileTreeCounts(rootFile): """Check the file tree produced by the test. """ tree = ET.parse(rootFile).getroot() i = 0 children = tree.find("children").findall("child") if len(children) == 0: i = 1 else: for child in children: i += checkFileTreeCounts(child.attrib["file"]) logger.info("File tree counts: %i %i" % (i, int(tree.attrib["count"]))) assert i == int(tree.attrib["count"]) return i
def main(): #Parse the inputs args/options parser = OptionParser(usage="usage: workingDir [options]", version="%prog 0.1") Stack.addJobTreeOptions(parser) options, args = parser.parse_args() setLoggingFromOptions(options) if len(args) != 1: raise RuntimeError("Expected one argument, got %s arguments: %s" % (len(args), " ".join(args))) workingDir = args[0] #Assign the input files readFastqFiles = [ os.path.join(workingDir, "readFastqFiles", i) for i in os.listdir(os.path.join(workingDir, "readFastqFiles")) if ".fq" in i or ".fastq" in i ] referenceFastaFiles = [ os.path.join(workingDir, "referenceFastaFiles", i) for i in os.listdir(os.path.join(workingDir, "referenceFastaFiles")) if ".fa" in i or ".fasta" in i ] outputDir = os.path.join(workingDir, "output") #Log the inputs logger.info("Using the following working directory: %s" % workingDir) logger.info("Using the following output directory: %s" % outputDir) for readFastqFile in readFastqFiles: logger.info("Got the following read fastq file: %s" % readFastqFile) for referenceFastaFile in referenceFastaFiles: logger.info("Got the following reference fasta files: %s" % referenceFastaFile) #This line invokes jobTree i = Stack(Target.makeTargetFn(setupExperiments, args=(readFastqFiles, referenceFastaFiles, mappers, analyses, outputDir))).startJobTree(options) if i != 0: raise RuntimeError("Got failed jobs")
def runJobTree(command, jobTreeDir, logLevel="DEBUG", retryCount=0, batchSystem="single_machine", rescueJobFrequency=None): """A convenience function for running job tree from within a python script. """ if rescueJobFrequency != None: rescueJobFrequencyString = "--rescueJobsFrequency %s" % float(rescueJobFrequency) else: rescueJobFrequencyString = "" command = "jobTree --command \"%s\" --jobTree %s --logLevel %s \ --retryCount %i --batchSystem %s %s" % \ (command, jobTreeDir, logLevel, retryCount, batchSystem, rescueJobFrequencyString) logger.info("Running command : %s" % command) system(command) logger.info("Ran the jobtree apparently okay")
def reloadJobTree(jobTree): """Load the job tree from a dir. """ logger.info("The job tree appears to already exist, so we'll reload it") assert os.path.isfile(os.path.join(jobTree, "config.xml")) #A valid job tree must contain the config file assert os.path.isfile(os.path.join(jobTree, "environ.pickle")) #A valid job tree must contain a pickle file which encodes the path environment of the job assert os.path.isfile(os.path.join(jobTree, "jobNumber.xml")) #A valid job tree must contain a file which is updated with the number of jobs that have been run. assert os.path.isdir(os.path.join(jobTree, "jobs")) #A job tree must have a directory of jobs. assert os.path.isdir(os.path.join(jobTree, "tempDirDir")) #A job tree must have a directory of temporary directories (for jobs to make temp files in). assert os.path.isdir(os.path.join(jobTree, "logFileDir")) #A job tree must have a directory of log files. assert os.path.isdir(os.path.join(jobTree, "slaveLogFileDir")) #A job tree must have a directory of slave log files. config = ET.parse(os.path.join(jobTree, "config.xml")).getroot() setupTempFileTrees(config) batchSystem = loadTheBatchSystem(config) logger.info("Reloaded the jobtree") return config, batchSystem
def killMasterAndParasol(): """Method to destroy master process """ tempFile = getTempFile() popen("ps -a", tempFile) fileHandle = open(tempFile, 'r') line = fileHandle.readline() #Example parasol state lines: #67401 ttys002 0:00.06 /Users/benedictpaten/kent/src/parasol/bin/paraNode start -hub=localhost -log=/tmp/node.2009-07-08.log -umask=002 -userPath=bin:bin/x86_64:bin/i #67403 ttys002 0:00.65 /Users/benedictpaten/kent/src/parasol/bin/paraHub -log=/tmp/hub.2009-07-08.log machineList subnet=127.0.0 #68573 ttys002 0:00.00 /Users/benedictpaten/kent/src/parasol/bin/paraNode start -hub=localhost -log=/tmp/node.2009-07-08.log -umask=002 -userPath=bin:bin/x86_64:bin/i while line != '': tokens = line.split() if 'paraNode' in line or 'paraHub' in line: if random.random() > 0.5: i = os.system("kill %i" % int(tokens[0])) logger.info("Tried to kill parasol process: %i, line: %s, exit value: %i" % (int(tokens[0]), line, i)) break elif 'jobTreeMaster.py' in line: logger.info("Have job tree master line") if random.random() > 0.5: i = os.system("kill %i" % int(tokens[0])) logger.info("Tried to kill master process: %i, line: %s, exit value: %i" % (int(tokens[0]), line, i)) break line = fileHandle.readline() fileHandle.close() os.remove(tempFile) parasolRestart()
def testJobTree_Parasol(self): """Runs a test program using the job tree, whilst constantly restarting parasol by killing the nodes. """ for test in xrange(self.testNo): #Does not run this test when doing short testing jobTreeCommand, fileTreeRootFile = setupJobTree(self.tempFileTree, self.jobTreeDir, "parasol", depth=self.depth) jobTreeCommand += " --rescueJobsFrequency 20" #Run the job parasolAndMasterKiller = ParasolAndMasterKiller() parasolAndMasterKiller.start() while True: while True: process = subprocess.Popen(jobTreeCommand, shell=True) sts = os.waitpid(process.pid, 0) if sts[1] == 0: logger.info("The job tree master ended, with an okay exit value (using parasol)") break else: logger.info("The job tree master ended with an error exit value, restarting: %i" % sts[1]) if checkEndStateOfJobTree(self.jobTreeDir): #Check the state of the job files break jobTreeCommand = "jobTreeRun --jobTree %s --logDebug" % self.jobTreeDir checkFileTreeCounts(fileTreeRootFile) os.system("rm -rf %s" % self.jobTreeDir) parasolAndMasterKiller.stopKilling() logger.info("Test done okay")
def setupJobTree(tempFileTree, jobTreeDir, batchSystem, depth=2): """Sets up a job tree using the jobTreeSetup.py command. """ #Setup a job retryCount = random.choice(xrange(1,10)) logger.info("Setup the basic files for the test") fileTreeRootFile = tempFileTree.getTempFile() makeFileTree(fileTreeRootFile, depth, tempFileTree) treePointerFile = makeTreePointer(fileTreeRootFile, tempFileTree.getTempFile()) #Setup the job command = "jobTreeTest_CommandFirst.py --treePointer %s --job JOB_FILE" % \ (treePointerFile) jobTreeCommand = "jobTreeRun --jobTree %s --retryCount %i\ --command '%s' --logLevel=INFO --maxJobDuration 100 --batchSystem %s" % \ (jobTreeDir, retryCount, command, batchSystem) logger.info("Setup the job okay") return (jobTreeCommand, fileTreeRootFile)
def testJobTree(testNo, depth, tempFileTree, jobTreeDir, batchSystem): """Runs a test program using the job tree using the single machine batch system. """ for test in xrange(testNo): jobTreeCommand, fileTreeRootFile = setupJobTree(tempFileTree, jobTreeDir, batchSystem, depth=depth) #Run the job while True: print "job tree command", jobTreeCommand process = subprocess.Popen(jobTreeCommand, shell=True) sts = os.waitpid(process.pid, 0) assert sts[1] == 0 logger.info("The job tree master ended, with an okay exit value") if checkEndStateOfJobTree(jobTreeDir): #Check the state of the job files, exit if none break jobTreeCommand = "jobTreeRun --jobTree %s --logInfo" % jobTreeDir checkFileTreeCounts(fileTreeRootFile) os.system("rm -rf %s" % jobTreeDir) logger.info("Test done okay")
def createJobTree(options): logger.info("Starting to create the job tree setup for the first time") options.jobTree = os.path.abspath(options.jobTree) os.mkdir(options.jobTree) config = ET.Element("config") config.attrib["environment_file"] = os.path.join(options.jobTree, "environ.pickle") config.attrib["job_number_file"] = os.path.join(options.jobTree, "jobNumber.xml") config.attrib["job_file_dir"] = os.path.join(options.jobTree, "jobs") config.attrib["temp_dir_dir"] = os.path.join(options.jobTree, "tempDirDir") config.attrib["log_file_dir"] = os.path.join(options.jobTree, "logFileDir") config.attrib["slave_log_file_dir"] = os.path.join(options.jobTree, "slaveLogFileDir") config.attrib["results_file"] = os.path.join(options.jobTree, "results.txt") config.attrib["scratch_file"] = os.path.join(options.jobTree, "scratch.txt") config.attrib["retry_count"] = str(int(options.retryCount)) config.attrib["max_job_duration"] = str(float(options.maxJobDuration)) config.attrib["batch_system"] = options.batchSystem config.attrib["job_time"] = str(float(options.jobTime)) config.attrib["max_log_file_size"] = str(int(options.maxLogFileSize)) config.attrib["default_memory"] = str(int(options.defaultMemory)) config.attrib["default_cpu"] = str(int(options.defaultCpu)) config.attrib["max_jobs"] = str(int(options.maxJobs)) config.attrib["max_threads"] = str(int(options.maxThreads)) if options.stats: config.attrib["stats"] = os.path.join(options.jobTree, "stats.xml") fileHandle = open(config.attrib["stats"], 'w') fileHandle.write("<stats>") fileHandle.close() #Load the batch system. batchSystem = loadTheBatchSystem(config) #Set the two parameters determining the polling frequency of the system. config.attrib["wait_duration"] = str(float(batchSystem.getWaitDuration())) if options.waitDuration != None: config.attrib["wait_duration"] = str(float(options.waitDuration)) config.attrib["rescue_jobs_frequency"] = str(float(batchSystem.getRescueJobFrequency())) if options.rescueJobsFrequency != None: config.attrib["rescue_jobs_frequency"] = str(float(options.rescueJobsFrequency)) #Write the config file to disk fileHandle = open(os.path.join(options.jobTree, "config.xml"), 'w') tree = ET.ElementTree(config) tree.write(fileHandle) fileHandle.close() logger.info("Written the config file") #Set up the jobNumber file fileHandle = open(config.attrib["job_number_file"], 'w') ET.ElementTree(ET.Element("job_number", { "job_number":'0' })).write(fileHandle) fileHandle.close() #Setup the temp file trees. setupTempFileTrees(config) logger.info("Finished the job tree setup") return config, batchSystem
def main(): parser = OptionParser() Stack.addJobTreeOptions(parser) options, args = parser.parse_args() setLoggingFromOptions(options) outputDir = "muscle_compare_2d/output/" if not os.path.exists(outputDir): logger.info("Output dir {} does not exist. Creating.") os.mkdir(outputDir) if len(os.listdir(outputDir)) > 0: logger.info("Output dir not empty.") if len(args) != 3: raise RuntimeError("Error: expected three arguments got %s arguments: %s" % (len(args), " ".join(args))) templateRecords = {x.qname for x in pysam.Samfile(args[0]) if not x.is_unmapped} complementRecords = {x.qname for x in pysam.Samfile(args[1]) if not x.is_unmapped} twodSamFile = pysam.Samfile(args[2]) twodRecords = {x.qname : x for x in twodSamFile if not x.is_unmapped} recordsToAnalyze = dict() for name, record in twodRecords.iteritems(): if name not in templateRecords and name not in complementRecords: ref_name = twodSamFile.getrname(record.tid) ref_start, ref_stop = int(record.aend - record.alen), int(record.aend) recordsToAnalyze[name] = [ref_name, ref_start, ref_stop] if os.path.exists("../readFastqFiles/template/") and os.path.exists("../readFastqFiles/complement"): templateFastqFiles = [os.path.join("../readFastqFiles/template/", x) for x in os.listdir("../readFastqFiles/template/") if x.endswith(".fastq") or x.endswith(".fq")] complementFastqFiles = [os.path.join("../readFastqFiles/complement/", x) for x in os.listdir("../readFastqFiles/complement/") if x.endswith(".fastq") or x.endswith(".fq")] else: raise RuntimeError("Error: readFastqFiles does not contain template and/or complement folders") referenceFastaFiles = [os.path.join("../referenceFastaFiles", x) for x in os.listdir("../referenceFastaFiles") if x.endswith(".fa") or x.endswith(".fasta")] if len(referenceFastaFiles) > 0: references = { y[0].split(" ")[0] : y[1] for x in referenceFastaFiles for y in fastaRead(x) } else: raise RuntimeError("Error: no reference fasta files") if len(recordsToAnalyze) == 0: raise RuntimeError("Error: none of the mappable twoD reads in this set did not map as template/complement.") logger.info("Starting to find analyses to run...") args = (recordsToAnalyze, templateFastqFiles, complementFastqFiles, references, outputDir) i = Stack(Target.makeTargetFn(find_analyses, args=args)).startJobTree(options) if i != 0: raise RuntimeError("Got {} failed jobs".format(i))
def main(): ## Make sure we're in the right type of directory assert os.path.exists("jobs.list") assert os.path.exists("jobsEM.list") assert os.path.exists("config.txt") assert os.path.exists("configEM.txt") assert os.path.exists("params0.txt") assert commandAvailable(collectParamsExec) assert commandAvailable(mergeSwarm) assert commandAvailable(mergeMerge) ## ## Parse options ## parser = OptionParser() Stack.addJobTreeOptions(parser) # so that the stack will work parser.add_option("--jobFile", help="Add as a child of jobFile rather " + "than making a new jobTree") options, args = parser.parse_args() print "Using Batch System '" + options.batchSystem + "'" assert len(args) == 0 or len(args) == 1 tolerance = 0.001 if len(args) == 1: tolerance = float(args[0]) logger.info("options: " + str(options)) ## ## Run ## logger.info("starting first EM iteration") s = Stack(ExpectationIteration(0, tolerance, os.getcwd())) if options.jobFile: s.addToJobFile(options.jobFile) else: if options.jobTree == None: options.jobTree = "./.jobTree" failed = s.startJobTree(options) if failed: print("%d jobs failed" % failed) else: logger.info("Run complete!")
def main(): parser = getBasicOptionParser("usage: %prog [options]", "%prog 0.1") parser.add_option("--jobTree", dest="jobTree", help="Directory containing the job tree to kill") options, args = parseBasicOptions(parser) logger.info("Parsed arguments") assert len(args) == 0 #This program takes no arguments assert options.jobTree != None #The jobtree should not be null assert os.path.isdir(options.jobTree) #The job tree must exist if we are going to kill it. logger.info("Starting routine to kill running jobs in the jobTree: %s" % options.jobTree) config = ET.parse(os.path.join(options.jobTree, "config.xml")).getroot() batchSystem = loadTheBatchSystem(config) #This should automatically kill the existing jobs.. so we're good. for job in batchSystem.getIssuedJobIDs(): #Just in case we do it again. batchSystem.killJobs(job) logger.info("All jobs SHOULD have been killed")
def main(): ## Make sure we're in the right type of directory assert os.path.exists("jobs.list") assert os.path.exists("jobsEM.list") assert os.path.exists("config.txt") assert os.path.exists("configEM.txt") assert os.path.exists("params0.txt") assert commandAvailable(collectParamsExec) assert commandAvailable(mergeSwarm) assert commandAvailable(mergeMerge) ## ## Parse options ## parser = OptionParser() Stack.addJobTreeOptions(parser) # so that the stack will work parser.add_option("--jobFile", help="Add as a child of jobFile rather " + "than making a new jobTree") options, args = parser.parse_args() print "Using Batch System '" + options.batchSystem + "'" assert len(args) == 0 or len(args) == 1 tolerance = 0.001 if len(args) == 1: tolerance = float(args[0]) logger.info("options: " + str(options)) ## ## Run ## logger.info("starting first EM iteration") s = Stack(ExpectationIteration(0, tolerance, os.getcwd())) if options.jobFile: s.addToJobFile(options.jobFile) else: if options.jobTree == None: options.jobTree = "./.jobTree" failed = s.startJobTree(options) if failed: print ("%d jobs failed" % failed) else: logger.info("Run complete!")
def loadTheBatchSystem(config): """Load the batch system. """ batchSystemString = config.attrib["batch_system"] if batchSystemString == "parasol": batchSystem = ParasolBatchSystem(config) logger.info("Using the parasol batch system") elif batchSystemString == "single_machine" or batchSystemString == "singleMachine": batchSystem = SingleMachineBatchSystem(config) logger.info("Using the single machine batch system") elif batchSystemString == "gridengine" or batchSystemString == "gridEngine": batchSystem = GridengineBatchSystem(config) logger.info("Using the grid engine machine batch system") elif batchSystemString == "acid_test" or batchSystemString == "acidTest": batchSystem = SingleMachineBatchSystem(config, workerClass=BadWorker) config.attrib["retry_count"] = str(32) #The chance that a job does not complete after 32 goes in one in 4 billion, so you need a lot of jobs before this becomes probable else: raise RuntimeError("Unrecognised batch system: %s" % batchSystemString) return batchSystem
def __init__(self, config): AbstractBatchSystem.__init__(self, config) #Call the parent constructor #Keep the name of the results file for the pstat2 command.. self.parasolResultsFile = config.attrib["results_file"] #Reset the job queue and results (initially, we do this again once we've killed the jobs) self.parasolResultsFileHandle = open(self.parasolResultsFile, 'w') self.parasolResultsFileHandle.close() #We lose any previous state in this file, and ensure the files existence self.queuePattern = re.compile("q\s+([0-9]+)") self.runningPattern = re.compile("r\s+([0-9]+)\s+[\S]+\s+[\S]+\s+([0-9]+)\s+[\S]+") #The scratch file self.scratchFile = self.config.attrib["scratch_file"] self.killJobs(self.getIssuedJobIDs()) #Kill any jobs on the current stack logger.info("Going to sleep for a few seconds to kill any existing jobs") time.sleep(5) #Give batch system a second to sort itself out. logger.info("Removed any old jobs from the queue") #Reset the job queue and results self.parasolResultsFileHandle = open(self.parasolResultsFile, 'w') self.parasolResultsFileHandle.close() #We lose any previous state in this file, and ensure the files existence self.parasolResultsFileHandle = open(self.parasolResultsFile, 'r') logger.info("Reset the results queue")
def main(): #Parse the inputs args/options parser = OptionParser(usage="usage: workingDir [options]", version="%prog 0.1") Stack.addJobTreeOptions(parser) options, args = parser.parse_args() setLoggingFromOptions(options) if len(args) != 1: raise RuntimeError("Expected one argument, got %s arguments: %s" % (len(args), " ".join(args))) workingDir = args[0] # call read sampler script; samples 75, 50, and 25% reads #SampleReads(workingDir) #Create (if necessary) the output dir outputDir = os.path.join(workingDir, "output") if not os.path.exists(outputDir): logger.info("Creating output dir: %s" % outputDir) os.mkdir(outputDir) else: logger.info("Root output dir already exists: %s" % outputDir) #Assign/process (uniquify the names of) the input read fastq files processedFastqFiles = os.path.join(outputDir, "processedReadFastqFiles") if not os.path.exists(processedFastqFiles): os.mkdir(processedFastqFiles) fastqParentDir = os.path.join(workingDir, "readFastqFiles") readFastqFiles = list() for fastqSubDir in filter( os.path.isdir, [os.path.join(fastqParentDir, x) for x in os.listdir(fastqParentDir)]): readType = os.path.basename(fastqSubDir) if not os.path.exists( os.path.join(processedFastqFiles, os.path.basename(fastqSubDir))): os.mkdir(os.path.join(processedFastqFiles, readType)) readFastqFiles.append([ readType, [ makeFastqSequenceNamesUnique( os.path.join(workingDir, "readFastqFiles", readType, i), os.path.join(processedFastqFiles, readType, i)) for i in os.listdir( os.path.join(workingDir, "readFastqFiles", readType)) if (".fq" in i and i[-3:] == '.fq') or ( ".fastq" in i and i[-6:] == '.fastq') ] ]) #Assign/process (uniquify the names of) the input reference fasta files processedFastaFiles = os.path.join(outputDir, "processedReferenceFastaFiles") if not os.path.exists(processedFastaFiles): os.mkdir(processedFastaFiles) referenceFastaFiles = [ makeFastaSequenceNamesUnique( os.path.join(workingDir, "referenceFastaFiles", i), os.path.join(processedFastaFiles, i)) for i in os.listdir(os.path.join(workingDir, "referenceFastaFiles")) if (".fa" in i and i[-3:] == '.fa') or ( ".fasta" in i and i[-6:] == '.fasta') ] # call reference mutator script; introduces 1%, and 5% mutations (No nucleotide bias used for now) #referenceFastaFiles = mutateReferenceSequences(referenceFastaFiles) #Log the inputs logger.info("Using the following working directory: %s" % workingDir) logger.info("Using the following output directory: %s" % outputDir) for readType, readTypeFastqFiles in readFastqFiles: logger.info("Got the follow read type: %s" % readType) for readFastqFile in readTypeFastqFiles: logger.info("Got the following read fastq file: %s" % readFastqFile) for referenceFastaFile in referenceFastaFiles: logger.info("Got the following reference fasta files: %s" % referenceFastaFile) #This line invokes jobTree i = Stack( Target.makeTargetFn(setupExperiments, args=(readFastqFiles, referenceFastaFiles, mappers, analyses, metaAnalyses, outputDir))).startJobTree(options) if i != 0: raise RuntimeError("Got failed jobs")
def jtFitness(): ## parse arguments parser = OptionParser() Stack.addJobTreeOptions(parser) parser.add_option("--jobFile", help = "Add as a child of jobFile rather " + "than making a new jobTree") parser.add_option("-d", "--dogma", dest="dogmaPath", default="") parser.add_option("-p", "--pathway", dest="pathwayPath", default="") parser.add_option("-b", "--boundaries", dest="discBound", default="") parser.add_option("-s", "--shuffle", dest="shuffleNode", default="") parser.add_option("-n", "--nulls", dest="nNulls", default="10") parser.add_option("-t", "--storedparam", dest="paramFile", default="") options, args = parser.parse_args() print "Using Batch System '" + options.batchSystem + "'" evidList = args if (len(evidList) % 2 == 1) | (len(evidList) == 0): sys.stderr.write("ERROR: incorrect number of arguments\n") sys.exit(1) if len(options.discBound) == 0: disc = "0.3333;0.6667" else: disc = options.discBound if len(options.dogmaPath) == 0: dogma = "%s/%s" % (dogmaDir, dogmaDefault) else: dogma = options.dogmaPath if not dogma.startswith("/"): dogma = "%s/%s" % (os.getcwd(), dogma) if len(options.pathwayPath) == 0: pathway = "%s/%s" % (pathwayDir, pathwayDefault) else: pathway = options.pathwayPath if not pathway.startswith("/"): pathway = "%s/%s" % (os.getcwd(), pathway) if len(options.shuffleNode) == 0: shuffleNode = "NULL" else: shuffleNode = options.shuffleNode nShuffle = int(options.nNulls) if len(options.paramFile) == 0: paramFile = None else: paramFile = options.paramFile ## clean if len(args) == 1: if args[0] == "clean": print "rm -rf .jobTree fold*" os.system("rm -rf .jobTree fold*") sys.exit(0) ## run logger.info("options: " + str(options)) s = Stack(branchFolds(" ".join(evidList), disc, paramFile, paradigmExec, inferSpec, dogma, pathway, shuffleNode, nShuffle, mFolds, os.getcwd())) if options.jobFile: s.addToJobFile(options.jobFile) else: if options.jobTree == None: options.jobTree = "./.jobTree" failed = s.startJobTree(options) if failed: print ("%d jobs failed" % failed) else: logger.info("Run complete!") system("rm -rf .lastjobTree") system("mv .jobTree .lastjobTree")
def main(): ## parse arguments parser = OptionParser(usage="%prog [options] network IPL-matrix features") Stack.addJobTreeOptions(parser) parser.add_option("--jobFile", help="Add as a child of jobFile rather " + "than making a new jobTree") parser.add_option("-w", "--workdir", dest="workdir", help="Common Work directory", default="./") parser.add_option("-i", "--ipl", dest="iplFile", default=None) parser.add_option("-p", "--pathway", dest="pathwayZip", default=None) parser.add_option("-c", "--phenotype", dest="phenotypeFile", default=None) parser.add_option("-o", "--oz", dest="outputZip", default=None) parser.add_option("-s", "--score", dest="scoreFile", default=None) parser.add_option("-f", "--filter", dest="filterParams", default="0.0;0.0") parser.add_option("-b", "--background", dest="nBackground", default="0") options, args = parser.parse_args() print "Using Batch System '" + options.batchSystem + "'" ## clean if len(args) == 1: if args[0] == "clean": print "rm -rf real* null* OCCAM__* LAYOUT background.R .jobTree" system("rm -rf real* null* OCCAM__* LAYOUT background.R .jobTree") sys.exit(0) ## parse arguments assert ((len(args) == 0) or (len(args) == 2) or (len(args) == 3)) if len(args) == 0: pathwayZip = options.pathwayZip if options.pathwayZip is not None else basepathway pathwayLib = os.path.join(options.workdir, "pathway") system("unzip %s -d %s" % (pathwayZip, pathwayLib)) paradigmPathway = None for file in os.listdir(pathwayLib): if file.endswith("_pathway.tab"): paradigmPathway = "%s/%s" % (pathwayLib, file) break scoreFile = None phenotypeFile = options.phenotypeFile dataFile = options.iplFile sampleList = [] for sample in retColumns(dataFile): if not sample.startswith("na_iter"): sampleList.append(sample) filterParams = options.filterParams nNulls = int(options.nBackground) outputZip = options.outputZip assert (os.path.exists(paradigmPathway)) assert (os.path.exists(phenotypeFile)) assert (os.path.exists(dataFile)) elif len(args) == 2: paradigmPathway = args[0] scoreFile = args[1] phenotypeFile = None dataFile = None sampleList = None filterParams = options.filterParams nNulls = 0 outputZip = options.outputZip assert (os.path.exists(paradigmPathway)) assert (os.path.exists(scoreFile)) elif len(args) == 3: paradigmPathway = args[0] scoreFile = None phenotypeFile = args[2] dataFile = args[1] sampleList = [] for sample in retColumns(dataFile): if not sample.startswith("na_iter"): sampleList.append(sample) filterParams = options.filterParams nNulls = int(options.nBackground) outputZip = options.outputZip assert (os.path.exists(paradigmPathway)) assert (os.path.exists(phenotypeFile)) assert (os.path.exists(dataFile)) ## run logger.info("options: " + str(options)) logger.info("starting make") writeScripts() s = Stack( prepareOCCAM(paradigmPathway, scoreFile, phenotypeFile, None, dataFile, sampleList, filterParams, nNulls, outputZip, os.getcwd())) if options.jobFile: s.addToJobFile(options.jobFile) else: if options.jobTree == None: options.jobTree = "./.jobTree" failed = s.startJobTree(options) if failed: print("%d jobs failed" % failed) else: logger.info("Run complete!") system("rm -rf .lastjobTree") system("mv .jobTree .lastjobTree")
def wrapParadigm(): ## parse arguments parser = OptionParser(usage = "%prog [options] attachment file:path [attachment file:path ...]") Stack.addJobTreeOptions(parser) parser.add_option("--jobFile", help = "Add as a child of jobFile rather " + "than making a new jobTree") parser.add_option("-w", "--workdir", dest="workdir", help="Common Work directory", default="./") parser.add_option("-n", "--nulls", dest="nulls", help="Number of Null Samples", default="5") parser.add_option("-d", "--dogma", dest="dogmazip", help="Path to PARADIGM Dogma Specification", default=basedogma) parser.add_option("-p", "--pathway", dest="pathwayzip", help="Path to PARADIGM Pathway Specification", default=basepathway) parser.add_option("-b", "--boundaries", dest="disc", help="Data Discretization Bounds", default="0.33;0.67") parser.add_option("-t", "--storedparam", dest="param", help="Initial Parameter Starting Point", default=None) parser.add_option("-s", "--skipem", action="store_false", dest="em", help="Skip Running EM", default=True) parser.add_option("--fr", "--filter-real", dest="filtered_real", help="Filtered Output", default=None) parser.add_option("--fa", "--filter-all", dest="filtered_all", help="Filtered Output", default=None) parser.add_option("--ur", "--unfilter-real", dest="unfiltered_real", help="Filtered Output", default=None) parser.add_option("--ua", "--unfilter-all", dest="unfiltered_all", help="Filtered Output", default=None) options, args = parser.parse_args() logger.info("options: " + str(options)) print "Using Batch System '" + options.batchSystem + "'" evidList = [] for i, element in enumerate(args): if i % 2 == 1: (fileType, filePath) = args[i].split(":") evidList.append("%s:%s" % (fileType, os.path.abspath(filePath))) else: evidList.append(args[i]) if (len(evidList) % 2 == 1) | (len(evidList) == 0): sys.stderr.write("ERROR: incorrect number of arguments\n") sys.exit(1) workdir = os.path.abspath(options.workdir) nullBatches = int(options.nulls) dogmaZip=os.path.abspath(options.dogmazip) pathwayZip=os.path.abspath(options.pathwayzip) disc=options.disc paramFile=os.path.abspath(options.param) if options.param is not None else None runEM = options.em dogmaLib = os.path.join(workdir, "dogma") pathwayLib = os.path.join(workdir, "pathway") system("unzip %s -d %s" % (dogmaZip, dogmaLib)) system("unzip %s -d %s" % (pathwayZip, pathwayLib)) ## run logger.info("starting prepare") s = Stack(prepareParadigm(" ".join(evidList), disc, paramFile, nullBatches, paradigmExec, inferSpec, dogmaLib, pathwayLib, runEM, workdir)) if options.jobFile: s.addToJobFile(options.jobFile) else: if options.jobTree == None: options.jobTree = "./.jobTree" failed = s.startJobTree(options) if failed: print ("%d jobs failed" % failed) else: logger.info("Run complete!") system("rm -rf .lastjobTree") system("mv .jobTree .lastjobTree")
def main(): parser = OptionParser() Stack.addJobTreeOptions(parser) options, args = parser.parse_args() setLoggingFromOptions(options) outputDir = "muscle_compare_2d/output/" if not os.path.exists(outputDir): logger.info("Output dir {} does not exist. Creating.") os.mkdir(outputDir) if len(os.listdir(outputDir)) > 0: logger.info("Output dir not empty.") if len(args) != 3: raise RuntimeError( "Error: expected three arguments got %s arguments: %s" % (len(args), " ".join(args))) templateRecords = { x.qname for x in pysam.Samfile(args[0]) if not x.is_unmapped } complementRecords = { x.qname for x in pysam.Samfile(args[1]) if not x.is_unmapped } twodSamFile = pysam.Samfile(args[2]) twodRecords = {x.qname: x for x in twodSamFile if not x.is_unmapped} recordsToAnalyze = dict() for name, record in twodRecords.iteritems(): if name not in templateRecords and name not in complementRecords: ref_name = twodSamFile.getrname(record.tid) ref_start, ref_stop = int(record.aend - record.alen), int( record.aend) recordsToAnalyze[name] = [ref_name, ref_start, ref_stop] if os.path.exists("../readFastqFiles/template/") and os.path.exists( "../readFastqFiles/complement"): templateFastqFiles = [ os.path.join("../readFastqFiles/template/", x) for x in os.listdir("../readFastqFiles/template/") if x.endswith(".fastq") or x.endswith(".fq") ] complementFastqFiles = [ os.path.join("../readFastqFiles/complement/", x) for x in os.listdir("../readFastqFiles/complement/") if x.endswith(".fastq") or x.endswith(".fq") ] else: raise RuntimeError( "Error: readFastqFiles does not contain template and/or complement folders" ) referenceFastaFiles = [ os.path.join("../referenceFastaFiles", x) for x in os.listdir("../referenceFastaFiles") if x.endswith(".fa") or x.endswith(".fasta") ] if len(referenceFastaFiles) > 0: references = { y[0].split(" ")[0]: y[1] for x in referenceFastaFiles for y in fastaRead(x) } else: raise RuntimeError("Error: no reference fasta files") if len(recordsToAnalyze) == 0: raise RuntimeError( "Error: none of the mappable twoD reads in this set did not map as template/complement." ) logger.info("Starting to find analyses to run...") args = (recordsToAnalyze, templateFastqFiles, complementFastqFiles, references, outputDir) i = Stack(Target.makeTargetFn(find_analyses, args=args)).startJobTree(options) if i != 0: raise RuntimeError("Got {} failed jobs".format(i))
def main(): parser = OptionParser() Stack.addJobTreeOptions(parser) options, args = parser.parse_args() setLoggingFromOptions(options) outputDir = "blast_combined/output/" if not os.path.exists(outputDir): logger.info("Output dir {} does not exist. Creating.") os.mkdir(outputDir) if len(os.listdir(outputDir)) > 0: logger.info("Output dir not empty.") #find all read fastq files, load into a dict by read type readFastqFiles = dict() for readType in readTypes: readFastqFiles[readType] = [ os.path.join("../output/processedReadFastqFiles/", readType, x) for x in os.listdir( os.path.join("../output/processedReadFastqFiles/", readType)) if x.endswith(".fq") or x.endswith(".fastq") ] #find all reference fasta files referenceFastaFiles = [ x for x in os.listdir("../referenceFastaFiles") if x.endswith(".fasta") or x.endswith(".fa") ] #find all sam files that were analyzed using combinedAnalyses samFiles = {} for readType in readTypes: samFiles[readType] = [ (readFastqFile, os.path.join( "../output", "analysis_" + readType, "experiment_" + os.path.basename(readFastqFile) + "_" + referenceFastaFile + "_" + analysis, "mapping.sam")) for readFastqFile, referenceFastaFile, analysis in product( readFastqFiles[readType], referenceFastaFiles, combinedAnalyses) ] mappedByReadType = defaultdict(set) for readType in readTypes: for readFastqFileFullPath, samFile in samFiles[readType]: readFastqFile = os.path.basename(readFastqFileFullPath) mappedNames = {(x.qname, readFastqFile) for x in pysam.Samfile(samFile) if not x.is_unmapped} mappedByReadType[readType] = mappedByReadType[readType].union( mappedNames) unmappedByReadType = defaultdict(dict) for readType in readTypes: for readFastqFileFullPath, samFile in samFiles[readType]: readFastqFile = os.path.basename(readFastqFileFullPath) for name, seq, qual in fastqRead(readFastqFileFullPath): name = name.split(" ")[0] if (name, readFastqFile) not in mappedByReadType[readType]: unmappedByReadType[readType][(name, readFastqFile)] = seq i = Stack( Target.makeTargetFn(find_analyses, args=(unmappedByReadType, outputDir))).startJobTree(options) if i != 0: raise RuntimeError("Got {} failed jobs".format(i)) for readType in readTypes: #build a counter of blast hits and set of read names that did not map blast_hits, no_hits = Counter(), set() for query, result in parse_blast( open(os.path.join(outputDir, readType + "_blast_out.txt"))): if result is None: no_hits.add(query) else: blast_hits[tuple( result)] += 1 #count number of times each hit was seen #write the unmapped hits to a fasta file outf = open(os.path.join(outputDir, readType + "_no_hits.fasta"), "w") for (name, readFastqFile), seq in unmappedByReadType[readType].iteritems(): if name in no_hits: outf.write(">{}\n{}\n".format(name, seq)) outf.close() #write the blast report blast_out = open( os.path.join(outputDir, readType + "_blast_report.txt"), "w") blast_out.write( "gi|##|gb|##|\tSpecies\tseqID\tCount\n") #header to output for result, count in sorted(blast_hits.items(), key=lambda x: -int(x[-1])): blast_out.write("{}\t{}\n".format("\t".join(result), count)) blast_out.close() #calculate percents and make a barplot blast_count = sum(blast_hits.values()) unmapped_count = len(unmappedByReadType[readType]) - sum( blast_hits.values()) mapped_count = len(mappedByReadType[readType]) #blast_percent = 1.0 * sum(blast_hits.values()) / (len(mappedByReadType[readType]) + len(unmappedByReadType[readType])) #unmapped_percent = (1.0 * len(unmappedByReadType[readType]) - sum(blast_hits.values())) / (len(mappedByReadType[readType]) + len(unmappedByReadType[readType])) #mapped_percent = 1.0 * len(mappedByReadType[readType]) / (len(mappedByReadType[readType]) + len(unmappedByReadType[readType])) outf = open(os.path.join(outputDir, readType + "percents.txt"), "w") outf.write("\n".join( map(str, [blast_count, unmapped_count, mapped_count]))) outf.close() #system("Rscript blast_combined/barplot_blast.R {} {} {} {} {}".format(blast_percent, unmapped_percent, mapped_percent, readType, os.path.join(outputDir, readType + "_blast_barplot.pdf"))) system("Rscript blast_combined/barplot_blast.R {} {} {} {} {}".format( blast_count, unmapped_count, mapped_count, readType, os.path.join(outputDir, readType + "_blast_barplot.pdf")))
def wrapParadigm(): ## parse arguments parser = OptionParser( usage="%prog [options] attachment file:path [attachment file:path ...]" ) Stack.addJobTreeOptions(parser) parser.add_option("--jobFile", help="Add as a child of jobFile rather " + "than making a new jobTree") parser.add_option("-d", "--dogma", dest="dogmaPath", help="Path to PARADIGM Dogma Specification", default="") parser.add_option("-p", "--pathway", dest="pathwayPath", help="Path to PARADIGM Pathway Specification", default="") parser.add_option("-b", "--boundaries", dest="discBound", help="Data Discretization Bounds", default="") parser.add_option("-n", "--nulls", dest="nullBatches", help="Number of Null Samples", default="5") parser.add_option("-t", "--storedparam", dest="paramFile", help="Initial Parameter Starting Point", default="") parser.add_option("-s", "--skipem", action="store_false", dest="runEM", help="Skip Running EM", default=True) options, args = parser.parse_args() print "Using Batch System '" + options.batchSystem + "'" evidList = [] for element in args: if element.startswith("rankAllFile"): evidList.append(re.sub("rankAllFile", "file", element)) else: evidList.append(element) if (len(evidList) % 2 == 1) | (len(evidList) == 0): sys.stderr.write("ERROR: incorrect number of arguments\n") sys.exit(1) if len(options.discBound) == 0: disc = "0.3333;0.6667" else: disc = options.discBound if len(options.dogmaPath) == 0: dogma = "%s/%s" % (dogmaDir, dogmaDefault) else: dogma = options.dogmaPath if len(options.pathwayPath) == 0: pathway = "%s/%s" % (pathwayDir, pathwayDefault) else: pathway = options.pathwayPath nullBatches = int(options.nullBatches) if len(options.paramFile) == 0: paramFile = None else: paramFile = options.paramFile runEM = options.runEM logger.info("options: " + str(options)) ## run logger.info("starting prepare") s = Stack( prepareParadigm(" ".join(evidList), disc, paramFile, nullBatches, paradigmExec, inferSpec, dogma, pathway, runEM, os.getcwd())) if options.jobFile: s.addToJobFile(options.jobFile) else: if options.jobTree == None: options.jobTree = "./.jobTree" failed = s.startJobTree(options) if failed: print("%d jobs failed" % failed) else: logger.info("Run complete!") system("rm -rf .lastjobTree") system("mv .jobTree .lastjobTree")