def testSimulation(self): if TestStatus.getTestStatus() == TestStatus.TEST_LONG: blanchettePath = TestStatus.getPathToDataSets() + "/blanchettesSimulation/00.job" outputPath = TestStatus.getPathToDataSets() + "/ortheus/blanchettesSimulationTest" treeString = "(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);" seqFiles = ["HUMAN", "CHIMP", "BABOON", "RAT", "MOUSE", "COW", "CAT", "DOG"] seqFiles = [blanchettePath + "/" + i for i in seqFiles] outputFile = outputPath + "/outputJob1.mfa" command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % (" ".join(seqFiles), treeString, outputFile) print "running command", command system(command)
def testSimulation(self): if TestStatus.getTestStatus() == TestStatus.TEST_LONG: blanchettePath = TestStatus.getPathToDataSets( ) + "/blanchettesSimulation/00.job" outputPath = TestStatus.getPathToDataSets( ) + "/ortheus/blanchettesSimulationTest" treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);' seqFiles = [ "HUMAN", "CHIMP", "BABOON", "RAT", "MOUSE", "COW", "CAT", "DOG" ] seqFiles = [blanchettePath + "/" + i for i in seqFiles] outputFile = outputPath + "/outputJob1.mfa" command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % \ (" ".join(seqFiles), treeString, outputFile) print "running command", command system(command)
def setUp(self): unittest.TestCase.setUp(self) self.encodeRegion = "ENm001" self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005") self.regionPath = os.path.join(self.encodePath, self.encodeRegion) self.tempDir = getTempDirectory(os.getcwd()) self.tempOutputFile = os.path.join(self.tempDir, "results1.txt")
def testEvolver_Primates_Large(self): inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "large") primateSequences = ("simChimp.masked.fa", "simGorilla.masked.fa", "simHuman.masked.fa", "simOrang.masked.fa") runWorkflow_multipleExamples(self.id(), lambda regionNumber=0, tempDir=None : getInputs(inputDir, primateSequences), batchSystem=self.batchSystem, buildToilStats=True)
def testEvolver_Mammals_Large(self): inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "large") mammalSequences = ("simCow.masked.fa", "simDog.masked.fa", "simHuman.masked.fa", "simMouse.masked.fa", "simRat.masked.fa") runWorkflow_multipleExamples(self.id(), lambda regionNumber=0, tempDir=None : getInputs(inputDir, mammalSequences), batchSystem=self.batchSystem, buildToilStats=True)
def run(self): previousOutputFile = None previousOutputFile2 = None blanchettePath = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation") for i in xrange(self.options.blanchetteRepeats): trueAlignmentMFA = os.path.join(os.path.join(blanchettePath, "%.2i.job" % i), "true.mfa") trueAlignmentMAF = os.path.join(self.getLocalTempDir(), "temp.maf") treeFile = os.path.join(blanchettePath, "tree.newick") system("mfaToMaf --mfaFile %s --outputFile %s --treeFile %s" % (trueAlignmentMFA, trueAlignmentMAF, treeFile)) trueRenamedMAF = trueAlignmentMAF + ".renamed" expPath = os.path.join(self.outputDir, str(i), "experiment.xml") applyNamingToMaf(expPath, trueAlignmentMAF, trueRenamedMAF) trueAlignmentMAF = trueRenamedMAF if self.params.vanilla == False: predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "progressiveCactusAlignment", "Anc0", "Anc0.maf") else: predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "cactusVanilla.maf") outputFile = os.path.join(self.getLocalTempDir(), "temp%i" % i) system("mafComparator --mafFile1 %s --mafFile2 %s --outputFile %s" % (trueAlignmentMAF, predictedAlignmentMaf, outputFile)) system("cp %s %s" % (outputFile, os.path.join(self.outputDir, str(i), "mafComparison.xml"))) if previousOutputFile != None: system("mergeMafComparatorResults.py --results1 %s --results2 %s --outputFile %s" % (outputFile, previousOutputFile, outputFile)) previousOutputFile = outputFile system("mv %s %s" % (previousOutputFile, os.path.join(self.outputDir, "mafComparison.xml")))
def run(self): simDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "loci1") sequences, newickTreeString = getInputs(simDir, ("simHuman.chr6", "simMouse.chr6", "simRat.chr6", "simCow.chr6", "simDog.chr6")) outputDir = os.path.join(self.options.outputDir, "%s%s" % (self.name, self.params)) self.addChildTarget(MakeAlignment(self.options, sequences, newickTreeString, outputDir, self.params)) self.setupStats(outputDir, os.path.join(simDir, "all.burnin.maf"), self.params)
def getInputs(path, sequenceNames): """Requires setting SON_TRACE_DATASETS variable and having access to datasets. """ seqPath = os.path.join(TestStatus.getPathToDataSets(), path) sequences = [ os.path.join(seqPath, sequence) for sequence in sequenceNames ] #Same order as tree newickTreeString = parseNewickTreeFile(os.path.join(path, "tree.newick")) return sequences, newickTreeString
def testENm001(self): if TestStatus.getTestStatus() == TestStatus.TEST_VERY_LONG: encodePath = TestStatus.getPathToDataSets() + "/MAY-2005/ENm001" outputPath = TestStatus.getPathToDataSets() + "/ortheus/encodeTest" #treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);' treeString = '((((human:0.006969,chimp:0.009727):0.025291,baboon:0.044568):0.108727,(rat:0.081244,mouse:0.072818):0.260327):0.02326,(cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.048788):0.749525;' seqFiles = [ "human.ENm001.fa", "chimp.ENm001.fa", "baboon.ENm001.fa", "rat.ENm001.fa", "mouse.ENm001.fa", "cow.ENm001.fa", "cat.ENm001.fa", "dog.ENm001.fa" ] seqFiles = [encodePath + "/" + i for i in seqFiles] outputFile = outputPath + "/outputENm001.mfa" command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % \ (" ".join(seqFiles), treeString, outputFile) print "running command", command system(command)
def getCactusInputs_evolverPrimates(): """Gets the inputs for running cactus_workflow using some simulated, half megabase primate chromosomes. Requires setting SON_TRACE_DATASETS variable and having access to datasets. """ evolverPath = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "loci1") sequences = [ os.path.join(evolverPath, seqFile) for seqFile in ("simHuman.chr6", "simChimp.chr6", "simGorilla.chr6" , "simOrang.chr6") ] newickTreeString = parseNewickTreeFile(os.path.join(evolverPath, "tree.newick")) return sequences, newickTreeString
def run(self): simDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "large") sequences, newickTreeString = getInputs(simDir, ("simHuman.masked.fa", "simMouse.masked.fa")) newickTreeString = "(simHuman:0.144018,simMouse:0.356483);" outputDir = os.path.join(self.options.outputDir, "%s%s" % (self.name, self.params)) self.addChildTarget(MakeAlignment(self.options, sequences, newickTreeString, outputDir, self.params)) self.setupStats(outputDir, os.path.join(simDir, "all.burnin.maf"), self.params)
def testEvolver_Mammals_Loci1(self): if "SON_TRACE_DATASETS" not in os.environ: return inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "loci1") mammalSequences = ("simCow.chr6", "simDog.chr6", "simHuman.chr6", "simMouse.chr6", "simRat.chr6") runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, mammalSequences), testRestrictions=(TestStatus.TEST_MEDIUM,), batchSystem=self.batchSystem, buildToilStats=True)
def testEvolver_Mammals_Large(self): if "SON_TRACE_DATASETS" not in os.environ: return inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "large") mammalSequences = ("simCow.masked.fa", "simDog.masked.fa", "simHuman.masked.fa", "simMouse.masked.fa", "simRat.masked.fa") runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, mammalSequences), testRestrictions=(TestStatus.TEST_VERY_LONG,), batchSystem=self.batchSystem, buildToilStats=True)
def testEvolver_Primates_Large(self): if "SON_TRACE_DATASETS" not in os.environ: return inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "large") primateSequences = ("simChimp.masked.fa", "simGorilla.masked.fa", "simHuman.masked.fa", "simOrang.masked.fa") runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, primateSequences), testRestrictions=(TestStatus.TEST_VERY_LONG,), batchSystem=self.batchSystem, buildJobTreeStats=True)
def testEvolver_Primates_Loci1(self): if "SON_TRACE_DATASETS" not in os.environ: return inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "loci1") primateSequences = ("simChimp.chr6", "simGorilla.chr6", "simHuman.chr6", "simOrang.chr6") runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, primateSequences), testRestrictions=(TestStatus.TEST_SHORT,), batchSystem=self.batchSystem, buildToilStats=True)
def testEvolver_Mammals_Large(self): if "SON_TRACE_DATASETS" not in os.environ: return inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "large") mammalSequences = ("simCow.masked.fa", "simDog.masked.fa", "simHuman.masked.fa", "simMouse.masked.fa", "simRat.masked.fa") runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, mammalSequences), testRestrictions=(TestStatus.TEST_VERY_LONG,), batchSystem=self.batchSystem, buildJobTreeStats=True)
def run(self): simDir = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation", "00.job") sequences = os.path.join(simDir, "HUMAN"), os.path.join(simDir, "MOUSE"), os.path.join(simDir, "DOG") #, newickTreeString = getInputs(simDir, ("HUMAN", "MOUSE")) newickTreeString = "((HUMAN:0.144018,MOUSE:0.356483):0.0238,DOG:0.197);" outputDir = os.path.join(self.options.outputDir, "%s%s" % (self.name, self.params)) self.addChildTarget(MakeAlignment(self.options, sequences, newickTreeString, outputDir, self.params)) self.setupStats(outputDir, os.path.join(simDir, "true.maf"), self.params)
def testEvolver_Primates_Large(self): if "SON_TRACE_DATASETS" not in os.environ: return inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "large") primateSequences = ("simChimp.masked.fa", "simGorilla.masked.fa", "simHuman.masked.fa", "simOrang.masked.fa") runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, primateSequences), testRestrictions=(TestStatus.TEST_VERY_LONG,), batchSystem=self.batchSystem, buildToilStats=True)
def setUp(self): self.testNo = TestStatus.getTestSetup(1, 5, 10, 100) self.tempDir = getTempDirectory(os.getcwd()) self.tempFiles = [] unittest.TestCase.setUp(self) self.tempOutputFile = os.path.join(self.tempDir, "results1.txt") self.tempFiles.append(self.tempOutputFile) self.tempOutputFile2 = os.path.join(self.tempDir, "results2.txt") self.tempFiles.append(self.tempOutputFile2) self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005")
def setUp(self): unittest.TestCase.setUp(self) self.encodeRegion = "ENm001" self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005") self.regionPath = os.path.join(self.encodePath, self.encodeRegion) self.tempDir = getTempDirectory(os.getcwd()) self.tempOutputFile = os.path.join(self.tempDir, "results1.txt") self.toilDir = os.path.join(self.tempDir, "toil") self.toilOptions = Job.Runner.getDefaultOptions(self.toilDir) self.toilOptions.disableCaching = True
def getCactusInputs_chromosomeX(regionNumber=0, tempDir=None): """Gets the inputs for running cactus_workflow using an some mammlian chromosome X's. Requires setting SON_TRACE_DATASETS variable and having access to datasets. """ chrXPath = os.path.join(TestStatus.getPathToDataSets(), "chr_x") sequences = [ os.path.join(chrXPath, seqFile) for seqFile in ("cow.fa", "dog.fa", "human.fa", "mouse.fa", "rat.fa") ] newickTreeString = parseNewickTreeFile(os.path.join(chrXPath, "newickTree.txt")) return sequences, newickTreeString
def testENm001(self): if TestStatus.getTestStatus() == TestStatus.TEST_VERY_LONG: encodePath = TestStatus.getPathToDataSets() + "/MAY-2005/ENm001" outputPath = TestStatus.getPathToDataSets() + "/ortheus/encodeTest" # treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);' treeString = "((((human:0.006969,chimp:0.009727):0.025291,baboon:0.044568):0.108727,(rat:0.081244,mouse:0.072818):0.260327):0.02326,(cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.048788):0.749525;" seqFiles = [ "human.ENm001.fa", "chimp.ENm001.fa", "baboon.ENm001.fa", "rat.ENm001.fa", "mouse.ENm001.fa", "cow.ENm001.fa", "cat.ENm001.fa", "dog.ENm001.fa", ] seqFiles = [encodePath + "/" + i for i in seqFiles] outputFile = outputPath + "/outputENm001.mfa" command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % (" ".join(seqFiles), treeString, outputFile) print "running command", command system(command)
def setUp(self): unittest.TestCase.setUp(self) self.tempDir = getTempDirectory(os.getcwd()) self.tempFiles = [] unittest.TestCase.setUp(self) self.tempOutputFile = os.path.join(self.tempDir, "results1.txt") self.tempFiles.append(self.tempOutputFile) self.tempOutputFile2 = os.path.join(self.tempDir, "results2.txt") self.tempFiles.append(self.tempOutputFile2) self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005") self.defaultLastzArguments = "--ambiguous=iupac" self.defaultRealignArguments = ""
def getCactusInputs_blanchette(regionNumber=0, tempDir=None): """Gets the inputs for running cactus_workflow using a blanchette simulated region (0 <= regionNumber < 50). Requires setting SON_TRACE_DATASETS variable and having access to datasets. """ assert regionNumber >= 0 assert regionNumber < 50 blanchettePath = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation") sequences = [os.path.join(blanchettePath, ("%.2i.job" % regionNumber), species) \ for species in ("HUMAN", "CHIMP", "BABOON", "MOUSE", "RAT", "DOG", "CAT", "PIG", "COW")] #Same order as tree newickTreeString = parseNewickTreeFile(os.path.join(blanchettePath, "tree.newick")) return sequences, newickTreeString
def getCactusInputs_encode(regionNumber=0, tempDir=None): """Gets the inputs for running cactus_workflow using an Encode pilot project region. (0 <= regionNumber < 15). Requires setting SON_TRACE_DATASETS variable and having access to datasets. """ assert regionNumber >= 0 assert regionNumber < 14 encodeRegionString = "ENm%03i" % (regionNumber+1) encodeDatasetPath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005") sequences = [ os.path.join(encodeDatasetPath, encodeRegionString, ("%s.%s.fa" % (species, encodeRegionString))) for\ species in ("human", "chimp", "baboon", "mouse", "rat", "dog", "cow") ] newickTreeString = parseNewickTreeFile(os.path.join(encodeDatasetPath, "reducedTree.newick")) return sequences, newickTreeString
def seqFilePairGenerator(): ##Get sequences encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005") encodeRegions = [ "ENm00" + str(i) for i in xrange(1,2) ] #, 2) ] #Could go to six species = ("human", "mouse") #, "dog")#, "chimp") #Other species to try "rat", "monodelphis", "macaque", "chimp" for encodeRegion in encodeRegions: regionPath = os.path.join(encodePath, encodeRegion) for i in xrange(len(species)): species1 = species[i] for species2 in species[i+1:]: seqFile1 = os.path.join(regionPath, "%s.%s.fa" % (species1, encodeRegion)) seqFile2 = os.path.join(regionPath, "%s.%s.fa" % (species2, encodeRegion)) yield seqFile1, seqFile2
def testAndyYatesFirstExample(self): if TestStatus.getTestStatus() == TestStatus.TEST_LONG: filePath = TestStatus.getPathToDataSets( ) + "/ortheus/andyYatesExample1" seqs = "seq1.fa seq2.fa seq3.fa seq4.fa seq5.fa seq6.fa seq7.fa seq8.fa seq9.fa seq10.fa seq11.fa \ seq12.fa seq13.fa seq14.fa seq15.fa seq16.fa seq17.fa seq18.fa seq19.fa seq20.fa seq21.fa seq22.fa seq23.fa seq24.fa seq25.fa seq26.fa \ seq27.fa seq28.fa seq29.fa seq30.fa seq31.fa seq32.fa seq33.fa seq34.fa seq35.fa seq36.fa" seqs = " ".join(["%s/%s" % (filePath, i) for i in seqs.split()]) command = 'Ortheus.py -l "#-j 0 -e" -e %s -z \ "(((1012:0.0112,1051:0.0119):0.0026,(1055:0.0015,1052:0.0018):0.0370):0.0022,1054:0.0108,1053:0.0116);" \ -A 1054 1051 1054 1054 1053 1012 1054 1054 1053 1054 1051 1054 1051 1051 1053 1051 1051 1012 1051 1054 1012 1054 1053 1051 1053 \ 1054 1054 1051 1012 1012 1054 1053 1053 1012 1054 1051 -f %s/output.16163.mfa -g %s/output.16163.tree-a -k "# -A" -m "java -Xmx1800m -Xms1800m" -a -b' % \ (seqs, filePath, filePath) print "running command", command system(command)
def testAndyYatesFirstExample(self): if TestStatus.getTestStatus() == TestStatus.TEST_LONG: filePath = TestStatus.getPathToDataSets() + "/ortheus/andyYatesExample1" seqs = "seq1.fa seq2.fa seq3.fa seq4.fa seq5.fa seq6.fa seq7.fa seq8.fa seq9.fa seq10.fa seq11.fa \ seq12.fa seq13.fa seq14.fa seq15.fa seq16.fa seq17.fa seq18.fa seq19.fa seq20.fa seq21.fa seq22.fa seq23.fa seq24.fa seq25.fa seq26.fa \ seq27.fa seq28.fa seq29.fa seq30.fa seq31.fa seq32.fa seq33.fa seq34.fa seq35.fa seq36.fa" seqs = " ".join(["%s/%s" % (filePath, i) for i in seqs.split()]) command = ( 'Ortheus.py -l "#-j 0 -e" -e %s -z \ "(((1012:0.0112,1051:0.0119):0.0026,(1055:0.0015,1052:0.0018):0.0370):0.0022,1054:0.0108,1053:0.0116);" \ -A 1054 1051 1054 1054 1053 1012 1054 1054 1053 1054 1051 1054 1051 1051 1053 1051 1051 1012 1051 1054 1012 1054 1053 1051 1053 \ 1054 1054 1051 1012 1012 1054 1053 1053 1012 1054 1051 -f %s/output.16163.mfa -g %s/output.16163.tree-a -k "# -A" -m "java -Xmx1800m -Xms1800m" -a -b' % (seqs, filePath, filePath) ) print "running command", command system(command)
def testCactusWorkflow_Blanchette(self): """Runs the workflow on blanchette's simulated (colinear) regions. """ if "SON_TRACE_DATASETS" not in os.environ: return for test in xrange(self.testNo): tempFiles = [] tempDir = getTempDirectory(os.getcwd()) trueAlignment = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation", "00.job", "true.mfa") #Load the true alignment. columnAlignment = [ i for i in fastaAlignmentRead(trueAlignment) ] fastaHeaders = [ i for i in fastaReadHeaders(trueAlignment) ] sequenceNumber = 9 #The tree newickTreeString = "((((HUMAN:0.006969, CHIMP:0.009727):0.025291, BABOON:0.044568):0.11,(RAT:0.072818, MOUSE:0.081244):0.260342):0.023260,((DOG:0.07, CAT:0.07):0.087381,(PIG:0.06, COW:0.06):0.104728):0.04);" #Get random dir testDir = getTempDirectory(tempDir) #random alignment alignmentLength = 5000 randomStart = random.choice(xrange(len(columnAlignment)-alignmentLength)) subAlignment = columnAlignment[randomStart:randomStart+alignmentLength] logger.info("Got a sub alignment, it is %i columns long" % len(subAlignment)) #Get sequences sequences = [ (fastaHeaders[seqNo], "".join([ column[seqNo] for column in subAlignment if column[seqNo] != '-' ])) for seqNo in xrange(sequenceNumber) ] logger.info("Got the sequences") #Write sequences into temp files tempFastaFiles = [] for seqNo in xrange(sequenceNumber): header, sequence = sequences[seqNo] logger.info("Making temp file for header: %s, seq: %s" % (header, sequence)) tempFastaFile = os.path.join(testDir, "%i.fa" % seqNo) tempFastaFiles.append(tempFastaFile) fileHandle = open(tempFastaFile, "w") fastaWrite(fileHandle, header, sequence) fileHandle.close() logger.info("Got the temp sequence files") experiment = getCactusWorkflowExperimentForTest(tempFastaFiles, newickTreeString, testDir) experimentFile = os.path.join(testDir, "experiment.xml") experiment.writeXML(experimentFile) cactusDiskDatabaseString = experiment.getDiskDatabaseString() jobTree = os.path.join(testDir, "jobTree") runCactusWorkflow(experimentFile, jobTree) logger.info("Ran the the workflow") #Check the output alignment runJobTreeStatusAndFailIfNotComplete(jobTree) logger.info("Checked the job tree dir") #Output the 'TRUE' alignment file if os.system("mfaToMaf --help > /dev/null 2>&1") == 0 and\ os.system("cactus_MAFGenerator --help > /dev/null 2>&1") == 0 and\ os.system("mafComparator --help > /dev/null 2>&1") == 0 and\ os.system("cactus_treeStats --help > /dev/null 2>&1") == 0: trueMFAFile = os.path.join(testDir, "true.mfa") fastaAlignmentWrite(subAlignment, fastaHeaders, len(fastaHeaders), trueMFAFile) trueMAFFile = os.path.join(testDir, "true.maf") system("mfaToMaf --mfaFile %s --outputFile %s --logLevel %s" % (trueMFAFile, trueMAFFile, getLogLevelString())) system("cat %s" % trueMAFFile) #Now get mafs for the region. mAFFile = os.path.join(testDir, "flower.maf") system("cactus_MAFGenerator --flowerName 0 --cactusDisk '%s' --outputFile %s --logLevel %s" % (cactusDiskDatabaseString, mAFFile, getLogLevelString())) logger.info("Got the MAFs from the flower disk") system("cat %s" % mAFFile) statsFile = os.path.join(testDir, "stats.xml") system("cactus_treeStats --cactusDisk '%s' --flowerName 0 --outputFile %s --logLevel %s" % (cactusDiskDatabaseString, statsFile, getLogLevelString())) system("cat %s" % statsFile) logger.info("Got the cactus tree stats") #Now compare the mafs to the output. resultsFile = os.path.join(testDir, "results.xml") system("mafComparator --mafFile1 %s --mafFile2 %s --outputFile %s --logLevel %s" % (trueMAFFile, mAFFile, resultsFile, getLogLevelString())) logger.info("Ran the maf comparator") system("cat %s" % resultsFile) #Cleanup experiment.cleanupDb() system("rm -rf %s" % testDir) logger.info("Successfully ran test for the problem") for tempFile in tempFiles: os.remove(tempFile) system("rm -rf %s" % tempDir)