def setUp(self): unittest.TestCase.setUp(self) self.testNo = TestStatus.getTestSetup(1, 1, 2, 2) self.depth = TestStatus.getTestSetup(1, 2, 3, 5) self.jobTreeDir = os.path.join(os.getcwd(), "jobTree") #A directory for the job tree to be created in self.tempFileTreeDir = os.path.join(os.getcwd(), "tempFileTree") #Ensures that file tree is visible self.tempFileTree = TempFileTree(self.tempFileTreeDir) #A place to get temp files from
def runWorkflow_multipleExamples(inputGenFunction, testNumber=1, testRestrictions=(TestStatus.TEST_SHORT, TestStatus.TEST_MEDIUM, \ TestStatus.TEST_LONG, TestStatus.TEST_VERY_LONG,), inverseTestRestrictions=False, batchSystem="single_machine", buildAvgs=False, buildReference=False, buildReferenceSequence=False, buildCactusPDF=False, buildAdjacencyPDF=False, buildReferencePDF=False, makeCactusTreeStats=False, makeMAFs=False, configFile=None, buildJobTreeStats=False): """A wrapper to run a number of examples. """ if (inverseTestRestrictions and TestStatus.getTestStatus() not in testRestrictions) or \ (not inverseTestRestrictions and TestStatus.getTestStatus() in testRestrictions): for test in xrange(testNumber): tempDir = getTempDirectory(os.getcwd()) sequences, newickTreeString = inputGenFunction(regionNumber=test, tempDir=tempDir) runWorkflow_TestScript(sequences, newickTreeString, batchSystem=batchSystem, buildAvgs=buildAvgs, buildReference=buildReference, buildCactusPDF=buildCactusPDF, buildAdjacencyPDF=buildAdjacencyPDF, makeCactusTreeStats=makeCactusTreeStats, makeMAFs=makeMAFs, configFile=configFile, buildJobTreeStats=buildJobTreeStats) system("rm -rf %s" % tempDir) logger.info("Finished random test %i" % test)
def setUp(self): unittest.TestCase.setUp(self) self.testNo = TestStatus.getTestSetup(1, 1, 5, 5) self.depth = TestStatus.getTestSetup(1, 2, 2, 3) self.jobTreeDir = os.getcwd() + "/jobTree" #A directory for the job tree to be created in self.tempFileTreeDir = os.path.join(os.getcwd(), "tempFileTree") self.tempFileTree = TempFileTree(self.tempFileTreeDir) #A place to get temp files from parasolRestart()
def setUp(self): self.testNo = TestStatus.getTestSetup(1, 5, 10, 100) self.tempDir = getTempDirectory(os.getcwd()) self.tempFiles = [] unittest.TestCase.setUp(self) self.tempOutputFile = os.path.join(self.tempDir, "results1.txt") self.tempFiles.append(self.tempOutputFile) self.tempOutputFile2 = os.path.join(self.tempDir, "results2.txt") self.tempFiles.append(self.tempOutputFile2) self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005")
def testSimulation(self): if TestStatus.getTestStatus() == TestStatus.TEST_LONG: blanchettePath = TestStatus.getPathToDataSets() + "/blanchettesSimulation/00.job" outputPath = TestStatus.getPathToDataSets() + "/ortheus/blanchettesSimulationTest" treeString = "(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);" seqFiles = ["HUMAN", "CHIMP", "BABOON", "RAT", "MOUSE", "COW", "CAT", "DOG"] seqFiles = [blanchettePath + "/" + i for i in seqFiles] outputFile = outputPath + "/outputJob1.mfa" command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % (" ".join(seqFiles), treeString, outputFile) print "running command", command system(command)
def testCactus_Random(self, matchingAlgorithm): configFile = getConfigFile(matchingAlgorithm) runWorkflow_multipleExamples(getCactusInputs_random, testNumber=TestStatus.getTestSetup(), buildReference=True, configFile=configFile) os.remove(configFile)
def setUp(self): unittest.TestCase.setUp(self) self.encodeRegion = "ENm001" self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005") self.regionPath = os.path.join(self.encodePath, self.encodeRegion) self.tempDir = getTempDirectory(os.getcwd()) self.tempOutputFile = os.path.join(self.tempDir, "results1.txt")
def run(self): previousOutputFile = None previousOutputFile2 = None blanchettePath = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation") for i in xrange(self.options.blanchetteRepeats): trueAlignmentMFA = os.path.join(os.path.join(blanchettePath, "%.2i.job" % i), "true.mfa") trueAlignmentMAF = os.path.join(self.getLocalTempDir(), "temp.maf") treeFile = os.path.join(blanchettePath, "tree.newick") system("mfaToMaf --mfaFile %s --outputFile %s --treeFile %s" % (trueAlignmentMFA, trueAlignmentMAF, treeFile)) trueRenamedMAF = trueAlignmentMAF + ".renamed" expPath = os.path.join(self.outputDir, str(i), "experiment.xml") applyNamingToMaf(expPath, trueAlignmentMAF, trueRenamedMAF) trueAlignmentMAF = trueRenamedMAF if self.params.vanilla == False: predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "progressiveCactusAlignment", "Anc0", "Anc0.maf") else: predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "cactusVanilla.maf") outputFile = os.path.join(self.getLocalTempDir(), "temp%i" % i) system("mafComparator --mafFile1 %s --mafFile2 %s --outputFile %s" % (trueAlignmentMAF, predictedAlignmentMaf, outputFile)) system("cp %s %s" % (outputFile, os.path.join(self.outputDir, str(i), "mafComparison.xml"))) if previousOutputFile != None: system("mergeMafComparatorResults.py --results1 %s --results2 %s --outputFile %s" % (outputFile, previousOutputFile, outputFile)) previousOutputFile = outputFile system("mv %s %s" % (previousOutputFile, os.path.join(self.outputDir, "mafComparison.xml")))
def getInputs(path, sequenceNames): """Requires setting SON_TRACE_DATASETS variable and having access to datasets. """ seqPath = os.path.join(TestStatus.getPathToDataSets(), path) sequences = [ os.path.join(seqPath, sequence) for sequence in sequenceNames ] #Same order as tree newickTreeString = parseNewickTreeFile(os.path.join(path, "tree.newick")) return sequences, newickTreeString
def testAndyYatesFirstExample(self): if TestStatus.getTestStatus() == TestStatus.TEST_LONG: filePath = TestStatus.getPathToDataSets() + "/ortheus/andyYatesExample1" seqs = "seq1.fa seq2.fa seq3.fa seq4.fa seq5.fa seq6.fa seq7.fa seq8.fa seq9.fa seq10.fa seq11.fa \ seq12.fa seq13.fa seq14.fa seq15.fa seq16.fa seq17.fa seq18.fa seq19.fa seq20.fa seq21.fa seq22.fa seq23.fa seq24.fa seq25.fa seq26.fa \ seq27.fa seq28.fa seq29.fa seq30.fa seq31.fa seq32.fa seq33.fa seq34.fa seq35.fa seq36.fa" seqs = " ".join(["%s/%s" % (filePath, i) for i in seqs.split()]) command = ( 'Ortheus.py -l "#-j 0 -e" -e %s -z \ "(((1012:0.0112,1051:0.0119):0.0026,(1055:0.0015,1052:0.0018):0.0370):0.0022,1054:0.0108,1053:0.0116);" \ -A 1054 1051 1054 1054 1053 1012 1054 1054 1053 1054 1051 1054 1051 1051 1053 1051 1051 1012 1051 1054 1012 1054 1053 1051 1053 \ 1054 1054 1051 1012 1012 1054 1053 1053 1012 1054 1051 -f %s/output.16163.mfa -g %s/output.16163.tree-a -k "# -A" -m "java -Xmx1800m -Xms1800m" -a -b' % (seqs, filePath, filePath) ) print "running command", command system(command)
def run(self): simDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "loci1") sequences, newickTreeString = getInputs(simDir, ("simHuman.chr6", "simMouse.chr6", "simRat.chr6", "simCow.chr6", "simDog.chr6")) outputDir = os.path.join(self.options.outputDir, "%s%s" % (self.name, self.params)) self.addChildTarget(MakeAlignment(self.options, sequences, newickTreeString, outputDir, self.params)) self.setupStats(outputDir, os.path.join(simDir, "all.burnin.maf"), self.params)
def setUp(self): #This is the number of random problems to solve, handed to the test code self.testNo = TestStatus.getTestSetup(shortTestNo=1, mediumTestNo=5, longTestNo=10, veryLongTestNo=100) self.tempFiles = [] self.tempDir = getTempDirectory(os.getcwd()) self.tempBlossomDirectory = self.tempDir + "/tempBlossom" unittest.TestCase.setUp(self)
def getCactusInputs_evolverPrimates(): """Gets the inputs for running cactus_workflow using some simulated, half megabase primate chromosomes. Requires setting SON_TRACE_DATASETS variable and having access to datasets. """ evolverPath = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "loci1") sequences = [ os.path.join(evolverPath, seqFile) for seqFile in ("simHuman.chr6", "simChimp.chr6", "simGorilla.chr6" , "simOrang.chr6") ] newickTreeString = parseNewickTreeFile(os.path.join(evolverPath, "tree.newick")) return sequences, newickTreeString
def run(self): simDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "large") sequences, newickTreeString = getInputs(simDir, ("simHuman.masked.fa", "simMouse.masked.fa")) newickTreeString = "(simHuman:0.144018,simMouse:0.356483);" outputDir = os.path.join(self.options.outputDir, "%s%s" % (self.name, self.params)) self.addChildTarget(MakeAlignment(self.options, sequences, newickTreeString, outputDir, self.params)) self.setupStats(outputDir, os.path.join(simDir, "all.burnin.maf"), self.params)
def testEvolver_Primates_Large(self): if "SON_TRACE_DATASETS" not in os.environ: return inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "large") primateSequences = ("simChimp.masked.fa", "simGorilla.masked.fa", "simHuman.masked.fa", "simOrang.masked.fa") runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, primateSequences), testRestrictions=(TestStatus.TEST_VERY_LONG,), batchSystem=self.batchSystem, buildJobTreeStats=True)
def run(self): simDir = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation", "00.job") sequences = os.path.join(simDir, "HUMAN"), os.path.join(simDir, "MOUSE"), os.path.join(simDir, "DOG") #, newickTreeString = getInputs(simDir, ("HUMAN", "MOUSE")) newickTreeString = "((HUMAN:0.144018,MOUSE:0.356483):0.0238,DOG:0.197);" outputDir = os.path.join(self.options.outputDir, "%s%s" % (self.name, self.params)) self.addChildTarget(MakeAlignment(self.options, sequences, newickTreeString, outputDir, self.params)) self.setupStats(outputDir, os.path.join(simDir, "true.maf"), self.params)
def testEvolver_Mammals_Large(self): if "SON_TRACE_DATASETS" not in os.environ: return inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "large") mammalSequences = ("simCow.masked.fa", "simDog.masked.fa", "simHuman.masked.fa", "simMouse.masked.fa", "simRat.masked.fa") runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, mammalSequences), testRestrictions=(TestStatus.TEST_VERY_LONG,), batchSystem=self.batchSystem, buildJobTreeStats=True)
def testEvolver_Primates_Loci1(self): if "SON_TRACE_DATASETS" not in os.environ: return inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "loci1") primateSequences = ("simChimp.chr6", "simGorilla.chr6", "simHuman.chr6", "simOrang.chr6") runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, primateSequences), testRestrictions=(TestStatus.TEST_SHORT,), batchSystem=self.batchSystem, buildToilStats=True)
def testEvolver_Mammals_Loci1(self): if "SON_TRACE_DATASETS" not in os.environ: return inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "loci1") mammalSequences = ("simCow.chr6", "simDog.chr6", "simHuman.chr6", "simMouse.chr6", "simRat.chr6") runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, mammalSequences), testRestrictions=(TestStatus.TEST_MEDIUM,), batchSystem=self.batchSystem, buildToilStats=True)
def getCactusInputs_chromosomeX(regionNumber=0, tempDir=None): """Gets the inputs for running cactus_workflow using an some mammlian chromosome X's. Requires setting SON_TRACE_DATASETS variable and having access to datasets. """ chrXPath = os.path.join(TestStatus.getPathToDataSets(), "chr_x") sequences = [ os.path.join(chrXPath, seqFile) for seqFile in ("cow.fa", "dog.fa", "human.fa", "mouse.fa", "rat.fa") ] newickTreeString = parseNewickTreeFile(os.path.join(chrXPath, "newickTree.txt")) return sequences, newickTreeString
def testENm001(self): if TestStatus.getTestStatus() == TestStatus.TEST_VERY_LONG: encodePath = TestStatus.getPathToDataSets() + "/MAY-2005/ENm001" outputPath = TestStatus.getPathToDataSets() + "/ortheus/encodeTest" # treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);' treeString = "((((human:0.006969,chimp:0.009727):0.025291,baboon:0.044568):0.108727,(rat:0.081244,mouse:0.072818):0.260327):0.02326,(cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.048788):0.749525;" seqFiles = [ "human.ENm001.fa", "chimp.ENm001.fa", "baboon.ENm001.fa", "rat.ENm001.fa", "mouse.ENm001.fa", "cow.ENm001.fa", "cat.ENm001.fa", "dog.ENm001.fa", ] seqFiles = [encodePath + "/" + i for i in seqFiles] outputFile = outputPath + "/outputENm001.mfa" command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % (" ".join(seqFiles), treeString, outputFile) print "running command", command system(command)
def setUp(self): unittest.TestCase.setUp(self) self.tempDir = getTempDirectory(os.getcwd()) self.tempFiles = [] unittest.TestCase.setUp(self) self.tempOutputFile = os.path.join(self.tempDir, "results1.txt") self.tempFiles.append(self.tempOutputFile) self.tempOutputFile2 = os.path.join(self.tempDir, "results2.txt") self.tempFiles.append(self.tempOutputFile2) self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005") self.defaultLastzArguments = "--ambiguous=iupac" self.defaultRealignArguments = ""
def getCactusInputs_blanchette(regionNumber=0, tempDir=None): """Gets the inputs for running cactus_workflow using a blanchette simulated region (0 <= regionNumber < 50). Requires setting SON_TRACE_DATASETS variable and having access to datasets. """ assert regionNumber >= 0 assert regionNumber < 50 blanchettePath = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation") sequences = [os.path.join(blanchettePath, ("%.2i.job" % regionNumber), species) \ for species in ("HUMAN", "CHIMP", "BABOON", "MOUSE", "RAT", "DOG", "CAT", "PIG", "COW")] #Same order as tree newickTreeString = parseNewickTreeFile(os.path.join(blanchettePath, "tree.newick")) return sequences, newickTreeString
def runWorkflow_multipleExamples(inputGenFunction, testNumber=1, testRestrictions=(TestStatus.TEST_SHORT, TestStatus.TEST_MEDIUM, \ TestStatus.TEST_LONG, TestStatus.TEST_VERY_LONG,), inverseTestRestrictions=False, batchSystem="single_machine", buildAvgs=False, buildReference=False, configFile=None, buildJobTreeStats=False, useConstraints=False, cactusWorkflowFunction=runCactusWorkflow, buildHal=False, buildFasta=False, progressive=False): """A wrapper to run a number of examples. """ if (inverseTestRestrictions and TestStatus.getTestStatus() not in testRestrictions) or \ (not inverseTestRestrictions and TestStatus.getTestStatus() in testRestrictions): for test in xrange(testNumber): tempDir = getTempDirectory(os.getcwd()) if useConstraints: sequences, newickTreeString, constraints = inputGenFunction(regionNumber=test, tempDir=tempDir) else: sequences, newickTreeString = inputGenFunction(regionNumber=test, tempDir=tempDir) constraints = None experiment = runWorkflow_TestScript(sequences, newickTreeString, outputDir=tempDir, batchSystem=batchSystem, buildAvgs=buildAvgs, buildReference=buildReference, buildHal=buildHal, buildFasta=buildFasta, configFile=configFile, buildJobTreeStats=buildJobTreeStats, constraints=constraints, progressive=progressive, cactusWorkflowFunction=cactusWorkflowFunction) experiment.cleanupDb() system("rm -rf %s" % tempDir) logger.info("Finished random test %i" % test)
def getCactusInputs_encode(regionNumber=0, tempDir=None): """Gets the inputs for running cactus_workflow using an Encode pilot project region. (0 <= regionNumber < 15). Requires setting SON_TRACE_DATASETS variable and having access to datasets. """ assert regionNumber >= 0 assert regionNumber < 14 encodeRegionString = "ENm%03i" % (regionNumber+1) encodeDatasetPath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005") sequences = [ os.path.join(encodeDatasetPath, encodeRegionString, ("%s.%s.fa" % (species, encodeRegionString))) for\ species in ("human", "chimp", "baboon", "mouse", "rat", "dog", "cow") ] newickTreeString = parseNewickTreeFile(os.path.join(encodeDatasetPath, "reducedTree.newick")) return sequences, newickTreeString
def seqFilePairGenerator(): ##Get sequences encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005") encodeRegions = [ "ENm00" + str(i) for i in xrange(1,2) ] #, 2) ] #Could go to six species = ("human", "mouse") #, "dog")#, "chimp") #Other species to try "rat", "monodelphis", "macaque", "chimp" for encodeRegion in encodeRegions: regionPath = os.path.join(encodePath, encodeRegion) for i in xrange(len(species)): species1 = species[i] for species2 in species[i+1:]: seqFile1 = os.path.join(regionPath, "%s.%s.fa" % (species1, encodeRegion)) seqFile2 = os.path.join(regionPath, "%s.%s.fa" % (species2, encodeRegion)) yield seqFile1, seqFile2
def setUp(self): self.testNo = TestStatus.getTestSetup() self.tempFiles = [] unittest.TestCase.setUp(self)
def testCactusNormalisation_Random(self): runWorkflow_multipleExamples(getCactusInputs_random, testNumber=TestStatus.getTestSetup())
def setUp(self): self.testNo = TestStatus.getTestSetup() unittest.TestCase.setUp(self)
def setUp(self): self.testNo = TestStatus.getTestSetup(3, 10, 0, 0) self.batchSystem = "parasol" unittest.TestCase.setUp(self)
def testCactusNormalisation_Random(self): runWorkflow_multipleExamples(self.id(), getCactusInputs_random, testNumber=TestStatus.getTestSetup())
def setUp(self): unittest.TestCase.setUp(self) self.testNo = TestStatus.getTestSetup(1, 2, 10, 10) self.tempDir = getTempDirectory(os.getcwd()) self.jobTreeDir = os.path.join(self.tempDir, "jobTree") #A directory for the job tree to be created in
def testCactus_Random(self): """Build mafs from cactusDisks containing trees, face and an reference (the output will include the MAFS ordered by reference) """ runWorkflow_multipleExamples(getCactusInputs_random, testNumber=TestStatus.getTestSetup(), makeMAFs=True)
def testCactusCore_Random(self): for test in xrange(TestStatus.getTestSetup()): randomConfigFile = getRandomConfigFile() runWorkflow_multipleExamples(getCactusInputs_random, configFile=randomConfigFile) os.remove(randomConfigFile)
def setUp(self): self.testNo = TestStatus.getTestSetup() unittest.TestCase.setUp(self) self.tempDir = getTempDirectory(os.getcwd()) self.tempFiles = []
def setUp(self): unittest.TestCase.setUp(self) self.testNo = TestStatus.getTestSetup(1, 2, 10, 10)
def testCactus_Random(self): runWorkflow_multipleExamples(getCactusInputs_random, testNumber=TestStatus.getTestSetup(), buildAvgs=True)
def testCactusCore_Random(self): for test in xrange(TestStatus.getTestSetup()): randomConfigFile=getRandomConfigFile() runWorkflow_multipleExamples(getCactusInputs_random, configFile=randomConfigFile) os.remove(randomConfigFile)
def testCactusRecursiveHalGenerator_Random(self): runWorkflow_multipleExamples(getCactusInputs_random, testNumber=TestStatus.getTestSetup(), buildReference=True, buildHal=True, buildFasta=True)
def setUp(self): self.testNo = TestStatus.getTestSetup() unittest.TestCase.setUp(self) self.trees = randomTreeSet()