def createFileStructure(mcProj, expTemplate, configTemplate, options): if not os.path.exists(options.path): os.makedirs(options.path) mcProj.writeXML(os.path.join(options.path, "%s_project.xml" % options.name)) for name, expPath in list(mcProj.expMap.items()): path = os.path.join(options.path, name) children = mcProj.entireTree.getChildNames(name) # Get outgroups outgroups = [] if configTemplate.getOutgroupStrategy() != 'none' \ and name in mcProj.outgroup.ogMap: # Outgroup name is the first element of the ogMap tuples outgroups.extend(list(map(itemgetter(0), mcProj.outgroup.ogMap[name]))) subtree = mcProj.entireTree.extractSpanningTree(children + [name] + outgroups) exp = ExperimentWrapper.createExperimentWrapper(NXNewick().writeString(subtree), children + [name] + outgroups, databaseConf=expTemplate.confElem) exp.setRootGenome(name) exp.setOutgroupGenomes(outgroups) if not os.path.exists(path): os.makedirs(path) config = ConfigWrapper(copy.deepcopy(configTemplate.xmlRoot)) if expTemplate.getSequenceID(name): exp.setRootReconstructed(False) exp.setSequenceID(name, expTemplate.getSequenceID(name)) else: exp.setRootReconstructed(True) exp.writeXML(expPath)
def run(self): cactusAlignmentName = "cactusAlignment" cactusAlignment = os.path.join(self.outputDir, cactusAlignmentName) if not os.path.exists(cactusAlignment): #Prepare the assembly #First copy it. if self.assemblyFile[-3:] == '.gz': tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(), suffix=".gz") system("cp %s %s" % (self.assemblyFile, tempAssemblyFile)) system("gunzip %s" % tempAssemblyFile) tempAssemblyFile = tempAssemblyFile[:-3] assert os.path.exists(tempAssemblyFile) else: tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(), suffix="") system("cp %s %s" % (self.assemblyFile, tempAssemblyFile)) #Make the supporting temporary files tempExperimentFile = getTempFile(rootDir=self.getLocalTempDir()) tempJobTreeDir = os.path.join(self.getLocalTempDir(), "jobTree") #Make the experiment file cactusWorkflowExperiment = ExperimentWrapper.createExperimentWrapper( sequences=self.haplotypeSequences + [tempAssemblyFile], newickTreeString=self.newickTree, outputDir=self.getLocalTempDir(), configFile=self.configFile) cactusWorkflowExperiment.setDbName(cactusAlignmentName) cactusWorkflowExperiment.setDbDir( os.path.join(self.getLocalTempDir(), cactusWorkflowExperiment.getDbName()) ) #This needs to be set to ensure the thing gets put in the right directory cactusWorkflowExperiment.writeXML(tempExperimentFile) #Now run cactus workflow runCactusWorkflow(experimentFile=tempExperimentFile, jobTreeDir=tempJobTreeDir, buildAvgs=False, buildReference=True, batchSystem="single_machine", maxThreads=1, jobTreeStats=True) logger.info("Ran the workflow") #Check if the jobtree completed sucessively. runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir) logger.info("Checked the job tree dir") #Compute the stats cactusAlignmentDir = os.path.join(self.getLocalTempDir(), cactusAlignmentName) tempJobTreeStatsFile = os.path.join(self.getLocalTempDir(), "jobTreeStats.xml") system("jobTreeStats --jobTree %s --outputFile %s" % (tempJobTreeDir, tempJobTreeStatsFile)) #Now copy the true assembly back to the output system("mv %s/* %s" % (self.getLocalTempDir(), self.outputDir)) #system("mv %s %s/config.xml" % (tempExperimentFile, self.outputDir)) #system("mv %s %s/" % (tempJobTreeStatsFile, self.outputDir)) #system("mv %s %s/" % (cactusAlignmentDir, self.outputDir)) assert os.path.exists(cactusAlignment) #We're done! self.addChildTarget( MakeStats1(self.outputDir, cactusAlignment, self.options))
def getCactusWorkflowExperimentForTest(sequences, newickTreeString, outputDir, configFile=None, constraints=None, progressive=False, reconstruct=True): """Wrapper to constructor of CactusWorkflowExperiment which additionally incorporates any globally set database conf. """ halFile = os.path.join(outputDir, "test.hal") fastaFile = os.path.join(outputDir, "test.fa") databaseConf = ET.fromstring( _GLOBAL_DATABASE_CONF_STRING ) if _GLOBAL_DATABASE_CONF_STRING is not None else None tree = NXNewick().parseString(newickTreeString, addImpliedRoots=False) genomes = [ tree.getName(id) for id in tree.postOrderTraversal() if tree.isLeaf(id) ] exp = ExperimentWrapper.createExperimentWrapper(newickTreeString, genomes, outputDir, databaseConf=databaseConf, configFile=configFile, halFile=halFile, fastaFile=fastaFile, constraints=constraints, progressive=progressive) for genome, sequence in zip(genomes, sequences): print((genome, sequence)) exp.setSequenceID(genome, sequence) exp.setRootGenome("reference") if reconstruct: exp.setRootReconstructed(True) return exp
def getCactusWorkflowExperimentForTest(sequences, newickTreeString, outputDir, configFile=None, constraints=None, progressive=False): """Wrapper to constructor of CactusWorkflowExperiment which additionally incorporates any globally set database conf. """ halFile = os.path.join(outputDir, "test.hal") fastaFile = os.path.join(outputDir, "test.fa") return ExperimentWrapper.createExperimentWrapper(sequences, newickTreeString, outputDir, databaseConf=_GLOBAL_DATABASE_CONF_STRING, configFile=configFile, halFile=halFile, fastaFile=fastaFile, constraints=constraints, progressive=progressive)
def run(self): cactusAlignmentName = "cactusAlignment" cactusAlignment = os.path.join(self.outputDir, cactusAlignmentName) if not os.path.exists(cactusAlignment): #Prepare the assembly #First copy it. if self.assemblyFile[-3:] == '.gz': tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(), suffix=".gz") system("cp %s %s" % (self.assemblyFile, tempAssemblyFile)) system("gunzip %s" % tempAssemblyFile) tempAssemblyFile = tempAssemblyFile[:-3] assert os.path.exists(tempAssemblyFile) else: tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(), suffix="") system("cp %s %s" % (self.assemblyFile, tempAssemblyFile)) #Make the supporting temporary files tempExperimentFile = getTempFile(rootDir=self.getLocalTempDir()) tempJobTreeDir = os.path.join(self.getLocalTempDir(), "jobTree") #Make the experiment file cactusWorkflowExperiment = ExperimentWrapper.createExperimentWrapper( sequences=self.haplotypeSequences + [ tempAssemblyFile ], newickTreeString=self.newickTree, outputDir=self.getLocalTempDir(), configFile=self.configFile) cactusWorkflowExperiment.setDbName(cactusAlignmentName) cactusWorkflowExperiment.setDbDir(os.path.join(self.getLocalTempDir(), cactusWorkflowExperiment.getDbName())) #This needs to be set to ensure the thing gets put in the right directory cactusWorkflowExperiment.writeXML(tempExperimentFile) #Now run cactus workflow runCactusWorkflow(experimentFile=tempExperimentFile, jobTreeDir=tempJobTreeDir, buildAvgs=False, buildReference=True, batchSystem="single_machine", maxThreads=1, jobTreeStats=True) logger.info("Ran the workflow") #Check if the jobtree completed sucessively. runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir) logger.info("Checked the job tree dir") #Compute the stats cactusAlignmentDir = os.path.join(self.getLocalTempDir(), cactusAlignmentName) tempJobTreeStatsFile = os.path.join(self.getLocalTempDir(),"jobTreeStats.xml") system("jobTreeStats --jobTree %s --outputFile %s" % (tempJobTreeDir, tempJobTreeStatsFile)) #Now copy the true assembly back to the output system("mv %s/* %s" % (self.getLocalTempDir(), self.outputDir)) #system("mv %s %s/config.xml" % (tempExperimentFile, self.outputDir)) #system("mv %s %s/" % (tempJobTreeStatsFile, self.outputDir)) #system("mv %s %s/" % (cactusAlignmentDir, self.outputDir)) assert os.path.exists(cactusAlignment) #We're done! self.addChildTarget(MakeStats1(self.outputDir, cactusAlignment, self.options))
def getCactusWorkflowExperimentForTest(sequences, newickTreeString, outputDir, configFile=None, constraints=None, progressive=False): """Wrapper to constructor of CactusWorkflowExperiment which additionally incorporates any globally set database conf. """ halFile = os.path.join(outputDir, "test.hal") fastaFile = os.path.join(outputDir, "test.fa") databaseConf = ET.fromstring( _GLOBAL_DATABASE_CONF_STRING ) if _GLOBAL_DATABASE_CONF_STRING is not None else None return ExperimentWrapper.createExperimentWrapper(sequences, newickTreeString, outputDir, databaseConf=databaseConf, configFile=configFile, halFile=halFile, fastaFile=fastaFile, constraints=constraints, progressive=progressive)