def updateProject(path): mcProj = MultiCactusProject() mcProj.readXML(path) basePath, name = os.path.split(path) for name,oldPath in mcProj.expMap.items(): fileName = os.path.basename(oldPath) dirName = os.path.dirname(oldPath).rpartition('/')[2] newPath = os.path.join(basePath, dirName, fileName) if not os.path.isfile(newPath): raise RuntimeError("Experiment file %s not found\n" % newPath) mcProj.expMap[name] = newPath exp = ExperimentWrapper(ET.parse(newPath).getroot()) oldDbDir = exp.getDbDir() if oldDbDir is not None: dbDirName = oldDbDir[oldDbDir.find(name):] newDbDir = os.path.join(basePath, dbDirName) exp.setDbDir(newDbDir) oldRefPath = exp.getReferencePath() if oldRefPath is not None: refName = oldRefPath[oldRefPath.find(name):] newRefPath = os.path.join(basePath, refName) exp.setReferencePath(newRefPath) oldHalPath = exp.getHALPath() if oldHalPath is not None: halName = oldHalPath[oldHalPath.find(name):] newHalPath = os.path.join(basePath, halName) exp.setHALPath(newHalPath) oldHalFastaPath = exp.getHALFastaPath() if oldHalFastaPath is not None: halFastaName = oldHalFastaPath[oldHalFastaPath.find(name):] newHalFastaPath = os.path.join(basePath, halFastaName) exp.setHALFastaPath(newHalFastaPath) # seems to have dissappeared from experiment? #oldMafPath = exp.getMAFPath() #if oldMafPath is not None: # mafName = oldMafPath[oldMafPath.find(name):] # newMafPath = os.path.join(basePath, mafName) # exp.setMAFPath(newMafPath) if exp.getDbType() == "kyoto_tycoon": oldHostName = exp.getDbHost() if oldHostName is not None: newHostName = socket.gethostname() exp.setDbHost(newHostName) system("cp %s %s.old" %(newPath, newPath)) exp.writeXML(newPath) mcProj.writeXML(path)
def run(self): logger.info("Progressive Up: " + self.event) # open up the experiment # note that we copy the path into the options here self.options.experimentFile = self.project.expMap[self.event] expXml = ET.parse(self.options.experimentFile).getroot() experiment = ExperimentWrapper(expXml) configXml = ET.parse(experiment.getConfigPath()).getroot() configWrapper = ConfigWrapper(configXml) # need at least 3 processes for every event when using ktserver: # 1 proc to run jobs, 1 proc to run server, 1 proc to run 2ndary server if experiment.getDbType() == "kyoto_tycoon": maxParallel = min(len(self.project.expMap), configWrapper.getMaxParallelSubtrees()) if self.options.batchSystem == "singleMachine": if int(self.options.maxThreads) < maxParallel * 3: raise RuntimeError("At least %d threads are required (only %d were specified) to handle up to %d events using kyoto tycoon. Either increase the number of threads using the --maxThreads option or decrease the number of parallel jobs (currently %d) by adjusting max_parallel_subtrees in the config file" % (maxParallel * 3, self.options.maxThreads, maxParallel, configWrapper.getMaxParallelSubtrees())) else: if int(self.options.maxCpus) < maxParallel * 3: raise RuntimeError("At least %d concurrent cpus are required to handle up to %d events using kyoto tycoon. Either increase the number of cpus using the --maxCpus option or decrease the number of parallel jobs (currently %d) by adjusting max_parallel_subtrees in the config file" % (maxParallel * 3, maxParallel, configWrapper.getMaxParallelSubtrees())) # take union of command line options and config options for hal and reference if self.options.buildReference == False: refNode = findRequiredNode(configXml, "reference") self.options.buildReference = getOptionalAttrib(refNode, "buildReference", bool, False) halNode = findRequiredNode(configXml, "hal") if self.options.buildHal == False: self.options.buildHal = getOptionalAttrib(halNode, "buildHal", bool, False) if self.options.buildFasta == False: self.options.buildFasta = getOptionalAttrib(halNode, "buildFasta", bool, False) # get parameters that cactus_workflow stuff wants workFlowArgs = CactusWorkflowArguments(self.options) # copy over the options so we don't trail them around workFlowArgs.buildReference = self.options.buildReference workFlowArgs.buildHal = self.options.buildHal workFlowArgs.buildFasta = self.options.buildFasta workFlowArgs.overwrite = self.options.overwrite workFlowArgs.globalLeafEventSet = self.options.globalLeafEventSet experiment = ExperimentWrapper(workFlowArgs.experimentNode) donePath = os.path.join(os.path.dirname(workFlowArgs.experimentFile), "DONE") doneDone = os.path.isfile(donePath) refDone = not workFlowArgs.buildReference or os.path.isfile(experiment.getReferencePath()) halDone = not workFlowArgs.buildHal or (os.path.isfile(experiment.getHALFastaPath()) and os.path.isfile(experiment.getHALPath())) if not workFlowArgs.overwrite and doneDone and refDone and halDone: self.logToMaster("Skipping %s because it is already done and overwrite is disabled" % self.event) else: system("rm -f %s" % donePath) # delete database # and overwrite specified (or if reference not present) dbPath = os.path.join(experiment.getDbDir(), experiment.getDbName()) seqPath = os.path.join(experiment.getDbDir(), "sequences") system("rm -f %s* %s %s" % (dbPath, seqPath, experiment.getReferencePath())) if workFlowArgs.configWrapper.getDoTrimStrategy() and workFlowArgs.outgroupEventNames is not None: # Use the trimming strategy to blast ingroups vs outgroups. self.addChildTarget(CactusTrimmingBlastPhase(cactusWorkflowArguments=workFlowArgs, phaseName="trimBlast")) else: self.addChildTarget(CactusSetupPhase(cactusWorkflowArguments=workFlowArgs, phaseName="setup")) logger.info("Going to create alignments and define the cactus tree") self.setFollowOnTarget(FinishUp(workFlowArgs, self.project))