def createFileStructure(mcProj, expTemplate, configTemplate, options):
    if not os.path.exists(options.path):
        os.makedirs(options.path)
    mcProj.writeXML(os.path.join(options.path, "%s_project.xml" % options.name))

    for name, expPath in list(mcProj.expMap.items()):
        path = os.path.join(options.path, name)
        children = mcProj.entireTree.getChildNames(name)

        # Get outgroups
        outgroups = []
        if configTemplate.getOutgroupStrategy() != 'none' \
        and name in mcProj.outgroup.ogMap:
            # Outgroup name is the first element of the ogMap tuples
            outgroups.extend(list(map(itemgetter(0), mcProj.outgroup.ogMap[name])))

        subtree = mcProj.entireTree.extractSpanningTree(children + [name] + outgroups)
        exp = ExperimentWrapper.createExperimentWrapper(NXNewick().writeString(subtree),
                                                        children + [name] + outgroups,
                                                        databaseConf=expTemplate.confElem)

        exp.setRootGenome(name)
        exp.setOutgroupGenomes(outgroups)

        if not os.path.exists(path):
            os.makedirs(path)
        config = ConfigWrapper(copy.deepcopy(configTemplate.xmlRoot))
        if expTemplate.getSequenceID(name):
            exp.setRootReconstructed(False)
            exp.setSequenceID(name, expTemplate.getSequenceID(name))
        else:
            exp.setRootReconstructed(True)
        exp.writeXML(expPath)
Example #2
0
 def run(self):
     cactusAlignmentName = "cactusAlignment"
     cactusAlignment = os.path.join(self.outputDir, cactusAlignmentName)
     if not os.path.exists(cactusAlignment):
         #Prepare the assembly
         #First copy it.
         if self.assemblyFile[-3:] == '.gz':
             tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(),
                                            suffix=".gz")
             system("cp %s %s" % (self.assemblyFile, tempAssemblyFile))
             system("gunzip %s" % tempAssemblyFile)
             tempAssemblyFile = tempAssemblyFile[:-3]
             assert os.path.exists(tempAssemblyFile)
         else:
             tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(),
                                            suffix="")
             system("cp %s %s" % (self.assemblyFile, tempAssemblyFile))
         #Make the supporting temporary files
         tempExperimentFile = getTempFile(rootDir=self.getLocalTempDir())
         tempJobTreeDir = os.path.join(self.getLocalTempDir(), "jobTree")
         #Make the experiment file
         cactusWorkflowExperiment = ExperimentWrapper.createExperimentWrapper(
             sequences=self.haplotypeSequences + [tempAssemblyFile],
             newickTreeString=self.newickTree,
             outputDir=self.getLocalTempDir(),
             configFile=self.configFile)
         cactusWorkflowExperiment.setDbName(cactusAlignmentName)
         cactusWorkflowExperiment.setDbDir(
             os.path.join(self.getLocalTempDir(),
                          cactusWorkflowExperiment.getDbName())
         )  #This needs to be set to ensure the thing gets put in the right directory
         cactusWorkflowExperiment.writeXML(tempExperimentFile)
         #Now run cactus workflow
         runCactusWorkflow(experimentFile=tempExperimentFile,
                           jobTreeDir=tempJobTreeDir,
                           buildAvgs=False,
                           buildReference=True,
                           batchSystem="single_machine",
                           maxThreads=1,
                           jobTreeStats=True)
         logger.info("Ran the workflow")
         #Check if the jobtree completed sucessively.
         runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir)
         logger.info("Checked the job tree dir")
         #Compute the stats
         cactusAlignmentDir = os.path.join(self.getLocalTempDir(),
                                           cactusAlignmentName)
         tempJobTreeStatsFile = os.path.join(self.getLocalTempDir(),
                                             "jobTreeStats.xml")
         system("jobTreeStats --jobTree %s --outputFile %s" %
                (tempJobTreeDir, tempJobTreeStatsFile))
         #Now copy the true assembly back to the output
         system("mv %s/* %s" % (self.getLocalTempDir(), self.outputDir))
         #system("mv %s %s/config.xml" % (tempExperimentFile, self.outputDir))
         #system("mv %s %s/" % (tempJobTreeStatsFile, self.outputDir))
         #system("mv %s %s/" % (cactusAlignmentDir, self.outputDir))
         assert os.path.exists(cactusAlignment)
         #We're done!
     self.addChildTarget(
         MakeStats1(self.outputDir, cactusAlignment, self.options))
Example #3
0
def getCactusWorkflowExperimentForTest(sequences,
                                       newickTreeString,
                                       outputDir,
                                       configFile=None,
                                       constraints=None,
                                       progressive=False,
                                       reconstruct=True):
    """Wrapper to constructor of CactusWorkflowExperiment which additionally incorporates
    any globally set database conf.
    """
    halFile = os.path.join(outputDir, "test.hal")
    fastaFile = os.path.join(outputDir, "test.fa")
    databaseConf = ET.fromstring(
        _GLOBAL_DATABASE_CONF_STRING
    ) if _GLOBAL_DATABASE_CONF_STRING is not None else None
    tree = NXNewick().parseString(newickTreeString, addImpliedRoots=False)
    genomes = [
        tree.getName(id) for id in tree.postOrderTraversal() if tree.isLeaf(id)
    ]
    exp = ExperimentWrapper.createExperimentWrapper(newickTreeString,
                                                    genomes,
                                                    outputDir,
                                                    databaseConf=databaseConf,
                                                    configFile=configFile,
                                                    halFile=halFile,
                                                    fastaFile=fastaFile,
                                                    constraints=constraints,
                                                    progressive=progressive)
    for genome, sequence in zip(genomes, sequences):
        print((genome, sequence))
        exp.setSequenceID(genome, sequence)
    exp.setRootGenome("reference")
    if reconstruct:
        exp.setRootReconstructed(True)
    return exp
Example #4
0
def getCactusWorkflowExperimentForTest(sequences, newickTreeString, outputDir, configFile=None,
                                       constraints=None, progressive=False):
    """Wrapper to constructor of CactusWorkflowExperiment which additionally incorporates
    any globally set database conf.
    """
    halFile = os.path.join(outputDir, "test.hal")
    fastaFile = os.path.join(outputDir, "test.fa")
    return ExperimentWrapper.createExperimentWrapper(sequences, newickTreeString, outputDir,
                                    databaseConf=_GLOBAL_DATABASE_CONF_STRING, configFile=configFile,
                                    halFile=halFile, fastaFile=fastaFile, constraints=constraints, progressive=progressive)
 def run(self):
     cactusAlignmentName = "cactusAlignment"
     cactusAlignment = os.path.join(self.outputDir, cactusAlignmentName)
     if not os.path.exists(cactusAlignment):
         #Prepare the assembly
         #First copy it.
         if self.assemblyFile[-3:] == '.gz':
            tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(), suffix=".gz")
            system("cp %s %s" % (self.assemblyFile, tempAssemblyFile))
            system("gunzip %s" % tempAssemblyFile)
            tempAssemblyFile = tempAssemblyFile[:-3]
            assert os.path.exists(tempAssemblyFile)
         else:
             tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(), suffix="")
             system("cp %s %s" % (self.assemblyFile, tempAssemblyFile))
         #Make the supporting temporary files
         tempExperimentFile = getTempFile(rootDir=self.getLocalTempDir())
         tempJobTreeDir = os.path.join(self.getLocalTempDir(), "jobTree")
         #Make the experiment file
         cactusWorkflowExperiment = ExperimentWrapper.createExperimentWrapper(
                                              sequences=self.haplotypeSequences + [ tempAssemblyFile ], 
                                              newickTreeString=self.newickTree, 
                                              outputDir=self.getLocalTempDir(),
                                              configFile=self.configFile)
         cactusWorkflowExperiment.setDbName(cactusAlignmentName)
         cactusWorkflowExperiment.setDbDir(os.path.join(self.getLocalTempDir(), cactusWorkflowExperiment.getDbName())) #This needs to be set to ensure the thing gets put in the right directory
         cactusWorkflowExperiment.writeXML(tempExperimentFile)
         #Now run cactus workflow
         runCactusWorkflow(experimentFile=tempExperimentFile, jobTreeDir=tempJobTreeDir, 
                           buildAvgs=False, buildReference=True,
                           batchSystem="single_machine", maxThreads=1, jobTreeStats=True)
         logger.info("Ran the workflow")
         #Check if the jobtree completed sucessively.
         runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir)
         logger.info("Checked the job tree dir")
         #Compute the stats
         cactusAlignmentDir = os.path.join(self.getLocalTempDir(), cactusAlignmentName)
         tempJobTreeStatsFile = os.path.join(self.getLocalTempDir(),"jobTreeStats.xml")
         system("jobTreeStats --jobTree %s --outputFile %s" % (tempJobTreeDir, tempJobTreeStatsFile))
         #Now copy the true assembly back to the output
         system("mv %s/* %s" % (self.getLocalTempDir(), self.outputDir))
         #system("mv %s %s/config.xml" % (tempExperimentFile, self.outputDir))
         #system("mv %s %s/" % (tempJobTreeStatsFile, self.outputDir))
         #system("mv %s %s/" % (cactusAlignmentDir, self.outputDir))
         assert os.path.exists(cactusAlignment)
         #We're done!
     self.addChildTarget(MakeStats1(self.outputDir, cactusAlignment, self.options))
Example #6
0
def getCactusWorkflowExperimentForTest(sequences,
                                       newickTreeString,
                                       outputDir,
                                       configFile=None,
                                       constraints=None,
                                       progressive=False):
    """Wrapper to constructor of CactusWorkflowExperiment which additionally incorporates
    any globally set database conf.
    """
    halFile = os.path.join(outputDir, "test.hal")
    fastaFile = os.path.join(outputDir, "test.fa")
    databaseConf = ET.fromstring(
        _GLOBAL_DATABASE_CONF_STRING
    ) if _GLOBAL_DATABASE_CONF_STRING is not None else None
    return ExperimentWrapper.createExperimentWrapper(sequences,
                                                     newickTreeString,
                                                     outputDir,
                                                     databaseConf=databaseConf,
                                                     configFile=configFile,
                                                     halFile=halFile,
                                                     fastaFile=fastaFile,
                                                     constraints=constraints,
                                                     progressive=progressive)