Esempio n. 1
0
 def testScriptTree_Example(self):
     """Uses the jobTreeTest code to test the scriptTree Target wrapper.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper.py --jobTree %s --logLevel=DEBUG --retryCount=99" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)
Esempio n. 2
0
 def testScriptTree_Example2(self):
     """Tests that the global and local temp dirs of a job behave as expected.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper2.py --jobTree %s --logLevel=DEBUG --retryCount=99" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)
Esempio n. 3
0
 def runComparisonOfBlastScriptVsNaiveBlast(self, blastMode):
     """We compare the output with a naive run of the blast program, to check the results are nearly
     equivalent.
     """
     encodeRegions = [ "ENm00" + str(i) for i in xrange(1,2) ] #, 2) ] #Could go to six
     species = ("human", "mouse", "dog")
     #Other species to try "rat", "monodelphis", "macaque", "chimp"
     for encodeRegion in encodeRegions:
         regionPath = os.path.join(self.encodePath, encodeRegion)
         for i in xrange(len(species)):
             species1 = species[i]
             for species2 in species[i+1:]:
                 seqFile1 = os.path.join(regionPath, "%s.%s.fa" % (species1, encodeRegion))
                 seqFile2 = os.path.join(regionPath, "%s.%s.fa" % (species2, encodeRegion))
                 
                 #Run the random
                 runNaiveBlast(seqFile1, seqFile2, self.tempOutputFile)
                 logger.info("Ran the naive blast okay")
                 
                 #Run the blast
                 jobTreeDir = os.path.join(getTempDirectory(self.tempDir), "jobTree")
                 if blastMode == "allAgainstAll":
                     runCactusBlast([ seqFile1, seqFile2 ], self.tempOutputFile2, jobTreeDir,
                                    chunkSize=500000, overlapSize=10000)
                 else:
                     runCactusBlast([ seqFile1 ], self.tempOutputFile2, jobTreeDir,
                                    chunkSize=500000, overlapSize=10000, targetSequenceFiles=[ seqFile2 ])
                 runJobTreeStatusAndFailIfNotComplete(jobTreeDir)
                 system("rm -rf %s " % jobTreeDir)    
                 logger.info("Ran cactus_blast okay")
                 logger.critical("Comparing cactus_blast and naive blast; using mode: %s" % blastMode)
                 compareResultsFile(self.tempOutputFile, self.tempOutputFile2)
Esempio n. 4
0
 def run(self):
     cactusAlignmentName = "cactusAlignment"
     cactusAlignment = os.path.join(self.outputDir, cactusAlignmentName)
     if not os.path.exists(cactusAlignment):
         #Prepare the assembly
         #First copy it.
         if self.assemblyFile[-3:] == '.gz':
             tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(),
                                            suffix=".gz")
             system("cp %s %s" % (self.assemblyFile, tempAssemblyFile))
             system("gunzip %s" % tempAssemblyFile)
             tempAssemblyFile = tempAssemblyFile[:-3]
             assert os.path.exists(tempAssemblyFile)
         else:
             tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(),
                                            suffix="")
             system("cp %s %s" % (self.assemblyFile, tempAssemblyFile))
         #Make the supporting temporary files
         tempExperimentFile = getTempFile(rootDir=self.getLocalTempDir())
         tempJobTreeDir = os.path.join(self.getLocalTempDir(), "jobTree")
         #Make the experiment file
         cactusWorkflowExperiment = ExperimentWrapper.createExperimentWrapper(
             sequences=self.haplotypeSequences + [tempAssemblyFile],
             newickTreeString=self.newickTree,
             outputDir=self.getLocalTempDir(),
             configFile=self.configFile)
         cactusWorkflowExperiment.setDbName(cactusAlignmentName)
         cactusWorkflowExperiment.setDbDir(
             os.path.join(self.getLocalTempDir(),
                          cactusWorkflowExperiment.getDbName())
         )  #This needs to be set to ensure the thing gets put in the right directory
         cactusWorkflowExperiment.writeXML(tempExperimentFile)
         #Now run cactus workflow
         runCactusWorkflow(experimentFile=tempExperimentFile,
                           jobTreeDir=tempJobTreeDir,
                           buildAvgs=False,
                           buildReference=True,
                           batchSystem="single_machine",
                           maxThreads=1,
                           jobTreeStats=True)
         logger.info("Ran the workflow")
         #Check if the jobtree completed sucessively.
         runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir)
         logger.info("Checked the job tree dir")
         #Compute the stats
         cactusAlignmentDir = os.path.join(self.getLocalTempDir(),
                                           cactusAlignmentName)
         tempJobTreeStatsFile = os.path.join(self.getLocalTempDir(),
                                             "jobTreeStats.xml")
         system("jobTreeStats --jobTree %s --outputFile %s" %
                (tempJobTreeDir, tempJobTreeStatsFile))
         #Now copy the true assembly back to the output
         system("mv %s/* %s" % (self.getLocalTempDir(), self.outputDir))
         #system("mv %s %s/config.xml" % (tempExperimentFile, self.outputDir))
         #system("mv %s %s/" % (tempJobTreeStatsFile, self.outputDir))
         #system("mv %s %s/" % (cactusAlignmentDir, self.outputDir))
         assert os.path.exists(cactusAlignment)
         #We're done!
     self.addChildTarget(
         MakeStats1(self.outputDir, cactusAlignment, self.options))
Esempio n. 5
0
 def testScriptTree_Example2(self):
     """Tests that the global and local temp dirs of a job behave as expected.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper2.py --jobTree %s --logLevel=DEBUG --retryCount=99" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)
Esempio n. 6
0
 def testScriptTree_Example(self):
     """Uses the jobTreeTest code to test the scriptTree Target wrapper.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper.py --jobTree %s --logLevel=DEBUG --retryCount=99" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)
Esempio n. 7
0
 def run(self):
     cactusAlignmentName = "cactusAlignment"
     cactusAlignment = os.path.join(self.outputDir, cactusAlignmentName)
     if not os.path.exists(cactusAlignment):
         #Prepare the assembly
         #First copy it.
         if self.assemblyFile[-3:] == '.gz':
            tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(), suffix=".gz")
            system("cp %s %s" % (self.assemblyFile, tempAssemblyFile))
            system("gunzip %s" % tempAssemblyFile)
            tempAssemblyFile = tempAssemblyFile[:-3]
            assert os.path.exists(tempAssemblyFile)
         else:
             tempAssemblyFile = getTempFile(rootDir=self.getLocalTempDir(), suffix="")
             system("cp %s %s" % (self.assemblyFile, tempAssemblyFile))
         #Make the supporting temporary files
         tempExperimentFile = getTempFile(rootDir=self.getLocalTempDir())
         tempJobTreeDir = os.path.join(self.getLocalTempDir(), "jobTree")
         #Make the experiment file
         cactusWorkflowExperiment = ExperimentWrapper.createExperimentWrapper(
                                              sequences=self.haplotypeSequences + [ tempAssemblyFile ], 
                                              newickTreeString=self.newickTree, 
                                              outputDir=self.getLocalTempDir(),
                                              configFile=self.configFile)
         cactusWorkflowExperiment.setDbName(cactusAlignmentName)
         cactusWorkflowExperiment.setDbDir(os.path.join(self.getLocalTempDir(), cactusWorkflowExperiment.getDbName())) #This needs to be set to ensure the thing gets put in the right directory
         cactusWorkflowExperiment.writeXML(tempExperimentFile)
         #Now run cactus workflow
         runCactusWorkflow(experimentFile=tempExperimentFile, jobTreeDir=tempJobTreeDir, 
                           buildAvgs=False, buildReference=True,
                           batchSystem="single_machine", maxThreads=1, jobTreeStats=True)
         logger.info("Ran the workflow")
         #Check if the jobtree completed sucessively.
         runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir)
         logger.info("Checked the job tree dir")
         #Compute the stats
         cactusAlignmentDir = os.path.join(self.getLocalTempDir(), cactusAlignmentName)
         tempJobTreeStatsFile = os.path.join(self.getLocalTempDir(),"jobTreeStats.xml")
         system("jobTreeStats --jobTree %s --outputFile %s" % (tempJobTreeDir, tempJobTreeStatsFile))
         #Now copy the true assembly back to the output
         system("mv %s/* %s" % (self.getLocalTempDir(), self.outputDir))
         #system("mv %s %s/config.xml" % (tempExperimentFile, self.outputDir))
         #system("mv %s %s/" % (tempJobTreeStatsFile, self.outputDir))
         #system("mv %s %s/" % (cactusAlignmentDir, self.outputDir))
         assert os.path.exists(cactusAlignment)
         #We're done!
     self.addChildTarget(MakeStats1(self.outputDir, cactusAlignment, self.options))
Esempio n. 8
0
 def progressiveFunction(self, experimentFile, jobTreeDir,
                         batchSystem, buildAvgs,
                         buildReference,
                         buildHal,
                         buildFasta,
                         jobTreeStats,
                         subtreeRoot=None):
     tempDir = getTempDirectory(os.getcwd())
     tempExperimentDir = os.path.join(tempDir, "exp")
     runCactusCreateMultiCactusProject(experimentFile,
                                       tempExperimentDir,
                                       fixNames=False,
                                       root=subtreeRoot)
     logger.info("Put the temporary files in %s" % tempExperimentDir)
     runCactusProgressive(os.path.join(tempExperimentDir, "exp_project.xml"),
                          jobTreeDir,
                          batchSystem=batchSystem,
                          buildAvgs=buildAvgs,
                          jobTreeStats=jobTreeStats)
     runJobTreeStatusAndFailIfNotComplete(jobTreeDir)
     system("rm -rf %s" % tempDir)
Esempio n. 9
0
 def testBlastRandom(self):
     """Make some sequences, put them in a file, call blast with random parameters 
     and check it runs okay.
     """
     tempSeqFile = os.path.join(self.tempDir, "tempSeq.fa")
     self.tempFiles.append(tempSeqFile)
     for test in xrange(self.testNo):
         seqNo = random.choice(xrange(0, 10))
         seq = getRandomSequence(8000)[1]
         fileHandle = open(tempSeqFile, 'w')
         for fastaHeader, seq in [ (str(i), mutateSequence(seq, 0.3*random.random())) for i in xrange(seqNo) ]:
             if random.random() > 0.5:
                 seq = reverseComplement(seq)
             fastaWrite(fileHandle, fastaHeader, seq)
         fileHandle.close()
         chunkSize = random.choice(xrange(500, 9000))
         overlapSize = random.choice(xrange(2, 100))
         jobTreeDir = os.path.join(getTempDirectory(self.tempDir), "jobTree")
         runCactusBlast([ tempSeqFile ], self.tempOutputFile, jobTreeDir, chunkSize, overlapSize)
         runJobTreeStatusAndFailIfNotComplete(jobTreeDir)
         if getLogLevelString() == "DEBUG":
             system("cat %s" % self.tempOutputFile)
         system("rm -rf %s " % jobTreeDir)
Esempio n. 10
0
def runWorkflow_TestScript(sequences, newickTreeString, 
                           outputDir=None,
                           batchSystem="single_machine",
                           buildAvgs=False, 
                           buildReference=False,
                           buildHal=False,
                           buildFasta=False,
                           configFile=None,
                           buildJobTreeStats=False,
                           constraints=None,
                           progressive=False,
                           cactusWorkflowFunction=runCactusWorkflow):
    """Runs the workflow and various downstream utilities.
    """
    logger.info("Running cactus workflow test script")
    logger.info("Got the following sequence dirs/files: %s" % " ".join(sequences))
    logger.info("Got the following tree %s" % newickTreeString)
    
    #Setup the output dir
    assert outputDir != None
    logger.info("Using the output dir: %s" % outputDir)
    
    #Setup the flower disk.
    experiment = getCactusWorkflowExperimentForTest(sequences, newickTreeString, 
                                                    outputDir=outputDir,
                                                    configFile=configFile, constraints=constraints,
                                                    progressive=progressive)
    experiment.cleanupDb()
    experimentFile = os.path.join(outputDir, "experiment.xml")
    experiment.writeXML(experimentFile)
    logger.info("The experiment file %s\n" % experimentFile)
   
    #Setup the job tree dir.
    jobTreeDir = os.path.join(outputDir, "jobTree")
    logger.info("Got a job tree dir for the test: %s" % jobTreeDir)
    
    #Run the actual workflow
    cactusWorkflowFunction(experimentFile, jobTreeDir, 
                      batchSystem=batchSystem, buildAvgs=buildAvgs, 
                      buildReference=buildReference,
                      buildHal=buildHal,
                      buildFasta=buildFasta,
                      jobTreeStats=buildJobTreeStats)
    logger.info("Ran the the workflow")
    
    #Check if the jobtree completed sucessively.
    runJobTreeStatusAndFailIfNotComplete(jobTreeDir)
    logger.info("Checked the job tree dir")
    
    #Check if the cactusDisk is okay..
    #runCactusCheck(cactusDiskDatabaseString, recursive=True) #This should also occur during the workflow, so this
    #is redundant, but defensive
    #logger.info("Checked the cactus tree")
    
    #Now run various utilities..
    if buildJobTreeStats:
        jobTreeStatsFile = os.path.join(outputDir, "jobTreeStats.xml")
        runJobTreeStats(jobTreeDir, jobTreeStatsFile)
        
    #Now remove everything we generate
    system("rm -rf %s %s" % (jobTreeDir, experimentFile))   
    
    #Return so calling function can cleanup
    return experiment
Esempio n. 11
0
 def testCactusWorkflow_Blanchette(self): 
     """Runs the workflow on blanchette's simulated (colinear) regions.
     """
     if "SON_TRACE_DATASETS" not in os.environ:
         return
     for test in xrange(self.testNo):
         tempFiles = []
         tempDir = getTempDirectory(os.getcwd())
         
         trueAlignment = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation", "00.job", "true.mfa")
         
         #Load the true alignment.
         columnAlignment = [ i for i in  fastaAlignmentRead(trueAlignment) ]
         fastaHeaders = [ i for i in fastaReadHeaders(trueAlignment) ]
         sequenceNumber = 9
         
         #The tree
         newickTreeString = "((((HUMAN:0.006969, CHIMP:0.009727):0.025291, BABOON:0.044568):0.11,(RAT:0.072818, MOUSE:0.081244):0.260342):0.023260,((DOG:0.07, CAT:0.07):0.087381,(PIG:0.06, COW:0.06):0.104728):0.04);"
         
         #Get random dir
         testDir = getTempDirectory(tempDir)
         
         #random alignment
         alignmentLength = 5000
         randomStart = random.choice(xrange(len(columnAlignment)-alignmentLength))
         subAlignment = columnAlignment[randomStart:randomStart+alignmentLength]
         logger.info("Got a sub alignment, it is %i columns long" % len(subAlignment))
         
         #Get sequences
         sequences = [ (fastaHeaders[seqNo], "".join([ column[seqNo] for column in subAlignment if column[seqNo] != '-' ])) for seqNo in xrange(sequenceNumber) ]
         logger.info("Got the sequences")
         
         #Write sequences into temp files
         tempFastaFiles = []
         for seqNo in xrange(sequenceNumber):
             header, sequence = sequences[seqNo]
             logger.info("Making temp file for header: %s, seq: %s" % (header, sequence))
             tempFastaFile = os.path.join(testDir, "%i.fa" % seqNo)
             tempFastaFiles.append(tempFastaFile)
             fileHandle = open(tempFastaFile, "w")
             fastaWrite(fileHandle, header, sequence)
             fileHandle.close()
         logger.info("Got the temp sequence files")
         
         experiment = getCactusWorkflowExperimentForTest(tempFastaFiles, newickTreeString, testDir)
         experimentFile = os.path.join(testDir, "experiment.xml")
         experiment.writeXML(experimentFile)
         cactusDiskDatabaseString = experiment.getDiskDatabaseString()
         
         jobTree = os.path.join(testDir, "jobTree")
         
         runCactusWorkflow(experimentFile, jobTree)
         logger.info("Ran the the workflow")
         
         #Check the output alignment
         runJobTreeStatusAndFailIfNotComplete(jobTree)
         logger.info("Checked the job tree dir")
         
         #Output the 'TRUE' alignment file
         if os.system("mfaToMaf --help > /dev/null 2>&1") == 0 and\
            os.system("cactus_MAFGenerator --help > /dev/null 2>&1") == 0 and\
            os.system("mafComparator --help > /dev/null 2>&1") == 0 and\
            os.system("cactus_treeStats --help > /dev/null 2>&1") == 0:
             trueMFAFile = os.path.join(testDir, "true.mfa")
             fastaAlignmentWrite(subAlignment, fastaHeaders, len(fastaHeaders), trueMFAFile)
             trueMAFFile = os.path.join(testDir, "true.maf")
             system("mfaToMaf --mfaFile %s --outputFile %s --logLevel %s" % (trueMFAFile, trueMAFFile, getLogLevelString()))
             system("cat %s" % trueMAFFile)
             
             #Now get mafs for the region.
             mAFFile = os.path.join(testDir, "flower.maf")
             system("cactus_MAFGenerator --flowerName 0 --cactusDisk '%s' --outputFile %s --logLevel %s" % (cactusDiskDatabaseString, mAFFile, getLogLevelString()))
             logger.info("Got the MAFs from the flower disk")
             system("cat %s" % mAFFile)
             
             statsFile = os.path.join(testDir, "stats.xml")
             system("cactus_treeStats --cactusDisk '%s' --flowerName 0 --outputFile %s --logLevel %s" % (cactusDiskDatabaseString, statsFile, getLogLevelString()))
             system("cat %s" % statsFile)
             logger.info("Got the cactus tree stats")
             
             #Now compare the mafs to the output.
             resultsFile = os.path.join(testDir, "results.xml")
             system("mafComparator --mafFile1 %s --mafFile2 %s --outputFile %s --logLevel %s" % (trueMAFFile, mAFFile, resultsFile, getLogLevelString()))
             logger.info("Ran the maf comparator")
             
             system("cat %s" % resultsFile)
             
             #Cleanup
             experiment.cleanupDb()
             system("rm -rf %s" % testDir)
             logger.info("Successfully ran test for the problem")
             
         for tempFile in tempFiles:
             os.remove(tempFile)
         system("rm -rf %s" % tempDir)