예제 #1
0
 def testSimulation(self):
     if TestStatus.getTestStatus() == TestStatus.TEST_LONG:
         blanchettePath = TestStatus.getPathToDataSets() + "/blanchettesSimulation/00.job"
         outputPath = TestStatus.getPathToDataSets() + "/ortheus/blanchettesSimulationTest"
         treeString = "(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);"
         seqFiles = ["HUMAN", "CHIMP", "BABOON", "RAT", "MOUSE", "COW", "CAT", "DOG"]
         seqFiles = [blanchettePath + "/" + i for i in seqFiles]
         outputFile = outputPath + "/outputJob1.mfa"
         command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % (" ".join(seqFiles), treeString, outputFile)
         print "running command", command
         system(command)
예제 #2
0
 def testSimulation(self):
     if TestStatus.getTestStatus() == TestStatus.TEST_LONG:
         blanchettePath = TestStatus.getPathToDataSets(
         ) + "/blanchettesSimulation/00.job"
         outputPath = TestStatus.getPathToDataSets(
         ) + "/ortheus/blanchettesSimulationTest"
         treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);'
         seqFiles = [
             "HUMAN", "CHIMP", "BABOON", "RAT", "MOUSE", "COW", "CAT", "DOG"
         ]
         seqFiles = [blanchettePath + "/" + i for i in seqFiles]
         outputFile = outputPath + "/outputJob1.mfa"
         command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % \
         (" ".join(seqFiles), treeString, outputFile)
         print "running command", command
         system(command)
예제 #3
0
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.encodeRegion = "ENm001"
     self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005")
     self.regionPath = os.path.join(self.encodePath, self.encodeRegion)
     self.tempDir = getTempDirectory(os.getcwd())
     self.tempOutputFile = os.path.join(self.tempDir, "results1.txt")
예제 #4
0
 def testEvolver_Primates_Large(self):
     inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "large")
     primateSequences = ("simChimp.masked.fa", "simGorilla.masked.fa", "simHuman.masked.fa", "simOrang.masked.fa")
     runWorkflow_multipleExamples(self.id(),
                                  lambda regionNumber=0, tempDir=None : getInputs(inputDir, primateSequences),
                                  batchSystem=self.batchSystem,
                                  buildToilStats=True)
예제 #5
0
 def testEvolver_Mammals_Large(self):
     inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "large")
     mammalSequences = ("simCow.masked.fa", "simDog.masked.fa", "simHuman.masked.fa", "simMouse.masked.fa", "simRat.masked.fa")
     runWorkflow_multipleExamples(self.id(),
                                  lambda regionNumber=0, tempDir=None : getInputs(inputDir, mammalSequences),
                                  batchSystem=self.batchSystem,
                                  buildToilStats=True)
 def run(self):
     previousOutputFile = None
     previousOutputFile2 = None
     blanchettePath = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation")
     for i in xrange(self.options.blanchetteRepeats):
         trueAlignmentMFA = os.path.join(os.path.join(blanchettePath, "%.2i.job" % i), "true.mfa")
         trueAlignmentMAF = os.path.join(self.getLocalTempDir(), "temp.maf")
         treeFile = os.path.join(blanchettePath, "tree.newick")
         system("mfaToMaf --mfaFile %s --outputFile %s --treeFile %s" % (trueAlignmentMFA, trueAlignmentMAF, treeFile))
         
         
         trueRenamedMAF = trueAlignmentMAF + ".renamed"
         expPath = os.path.join(self.outputDir, str(i), "experiment.xml")
         applyNamingToMaf(expPath, trueAlignmentMAF, trueRenamedMAF)
         trueAlignmentMAF = trueRenamedMAF
         if self.params.vanilla == False:            
             predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "progressiveCactusAlignment", "Anc0", "Anc0.maf")
         else:
             predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "cactusVanilla.maf")
         
         outputFile = os.path.join(self.getLocalTempDir(), "temp%i" % i)
         system("mafComparator --mafFile1 %s --mafFile2 %s --outputFile %s" % (trueAlignmentMAF, predictedAlignmentMaf, outputFile))
         system("cp %s %s" % (outputFile, os.path.join(self.outputDir, str(i), "mafComparison.xml")))
         if previousOutputFile != None:
             system("mergeMafComparatorResults.py --results1 %s --results2 %s --outputFile %s" % (outputFile, previousOutputFile, outputFile))
         previousOutputFile = outputFile
         
     system("mv %s %s" % (previousOutputFile, os.path.join(self.outputDir, "mafComparison.xml")))   
 def run(self):
     simDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "loci1")
     sequences, newickTreeString = getInputs(simDir, ("simHuman.chr6", "simMouse.chr6", "simRat.chr6", "simCow.chr6", "simDog.chr6"))
     outputDir = os.path.join(self.options.outputDir, "%s%s"  % (self.name, self.params))
     self.addChildTarget(MakeAlignment(self.options, sequences, newickTreeString, outputDir,
                                       self.params))
     self.setupStats(outputDir, os.path.join(simDir, "all.burnin.maf"), self.params)
예제 #8
0
def getInputs(path, sequenceNames):
    """Requires setting SON_TRACE_DATASETS variable and having access to datasets.
    """
    seqPath = os.path.join(TestStatus.getPathToDataSets(), path)
    sequences = [ os.path.join(seqPath, sequence) for sequence in sequenceNames ] #Same order as tree
    newickTreeString = parseNewickTreeFile(os.path.join(path, "tree.newick"))
    return sequences, newickTreeString  
예제 #9
0
파일: test.py 프로젝트: rndw/cactus
def getInputs(path, sequenceNames):
    """Requires setting SON_TRACE_DATASETS variable and having access to datasets.
    """
    seqPath = os.path.join(TestStatus.getPathToDataSets(), path)
    sequences = [ os.path.join(seqPath, sequence) for sequence in sequenceNames ] #Same order as tree
    newickTreeString = parseNewickTreeFile(os.path.join(path, "tree.newick"))
    return sequences, newickTreeString  
예제 #10
0
 def testENm001(self):
     if TestStatus.getTestStatus() == TestStatus.TEST_VERY_LONG:
         encodePath = TestStatus.getPathToDataSets() + "/MAY-2005/ENm001"
         outputPath = TestStatus.getPathToDataSets() + "/ortheus/encodeTest"
         #treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);'
         treeString = '((((human:0.006969,chimp:0.009727):0.025291,baboon:0.044568):0.108727,(rat:0.081244,mouse:0.072818):0.260327):0.02326,(cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.048788):0.749525;'
         seqFiles = [
             "human.ENm001.fa", "chimp.ENm001.fa", "baboon.ENm001.fa",
             "rat.ENm001.fa", "mouse.ENm001.fa", "cow.ENm001.fa",
             "cat.ENm001.fa", "dog.ENm001.fa"
         ]
         seqFiles = [encodePath + "/" + i for i in seqFiles]
         outputFile = outputPath + "/outputENm001.mfa"
         command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % \
         (" ".join(seqFiles), treeString, outputFile)
         print "running command", command
         system(command)
예제 #11
0
파일: test.py 프로젝트: rndw/cactus
def getCactusInputs_evolverPrimates():
    """Gets the inputs for running cactus_workflow using some simulated, half megabase primate chromosomes.
    
    Requires setting SON_TRACE_DATASETS variable and having access to datasets.
    """
    evolverPath = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "loci1")
    sequences = [ os.path.join(evolverPath, seqFile) for seqFile in ("simHuman.chr6", "simChimp.chr6", "simGorilla.chr6" , "simOrang.chr6") ]
    newickTreeString = parseNewickTreeFile(os.path.join(evolverPath, "tree.newick"))
    return sequences, newickTreeString
    def run(self):
        simDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "large")
        sequences, newickTreeString = getInputs(simDir, ("simHuman.masked.fa", "simMouse.masked.fa"))
        newickTreeString = "(simHuman:0.144018,simMouse:0.356483);"

        outputDir = os.path.join(self.options.outputDir, "%s%s"  % (self.name, self.params))
        self.addChildTarget(MakeAlignment(self.options, sequences, newickTreeString, outputDir,
                                          self.params))
        self.setupStats(outputDir, os.path.join(simDir, "all.burnin.maf"), self.params)
예제 #13
0
 def testEvolver_Mammals_Loci1(self):
     if "SON_TRACE_DATASETS" not in os.environ:
         return
     inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "loci1")
     mammalSequences = ("simCow.chr6", "simDog.chr6", "simHuman.chr6", "simMouse.chr6", "simRat.chr6")
     runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, mammalSequences),
                                  testRestrictions=(TestStatus.TEST_MEDIUM,),
                                  batchSystem=self.batchSystem,
                                  buildToilStats=True)
예제 #14
0
def getCactusInputs_evolverPrimates():
    """Gets the inputs for running cactus_workflow using some simulated, half megabase primate chromosomes.
    
    Requires setting SON_TRACE_DATASETS variable and having access to datasets.
    """
    evolverPath = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "loci1")
    sequences = [ os.path.join(evolverPath, seqFile) for seqFile in ("simHuman.chr6", "simChimp.chr6", "simGorilla.chr6" , "simOrang.chr6") ]
    newickTreeString = parseNewickTreeFile(os.path.join(evolverPath, "tree.newick"))
    return sequences, newickTreeString
예제 #15
0
 def testEvolver_Mammals_Large(self):
     if "SON_TRACE_DATASETS" not in os.environ:
         return
     inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "large")
     mammalSequences = ("simCow.masked.fa", "simDog.masked.fa", "simHuman.masked.fa", "simMouse.masked.fa", "simRat.masked.fa")
     runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, mammalSequences),
                                  testRestrictions=(TestStatus.TEST_VERY_LONG,),
                                  batchSystem=self.batchSystem,
                                  buildToilStats=True)
예제 #16
0
 def testEvolver_Primates_Large(self):
     if "SON_TRACE_DATASETS" not in os.environ:
         return
     inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "large")
     primateSequences = ("simChimp.masked.fa", "simGorilla.masked.fa", "simHuman.masked.fa", "simOrang.masked.fa")
     runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, primateSequences),
                                  testRestrictions=(TestStatus.TEST_VERY_LONG,),
                                  batchSystem=self.batchSystem,
                                  buildJobTreeStats=True)
예제 #17
0
 def testEvolver_Primates_Loci1(self):
     if "SON_TRACE_DATASETS" not in os.environ:
         return
     inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "loci1")
     primateSequences = ("simChimp.chr6", "simGorilla.chr6", "simHuman.chr6", "simOrang.chr6")
     runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, primateSequences),
                                  testRestrictions=(TestStatus.TEST_SHORT,),
                                  batchSystem=self.batchSystem,
                                  buildToilStats=True)
예제 #18
0
 def testEvolver_Mammals_Large(self):
     if "SON_TRACE_DATASETS" not in os.environ:
         return
     inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "mammals", "large")
     mammalSequences = ("simCow.masked.fa", "simDog.masked.fa", "simHuman.masked.fa", "simMouse.masked.fa", "simRat.masked.fa")
     runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, mammalSequences),
                                  testRestrictions=(TestStatus.TEST_VERY_LONG,),
                                  batchSystem=self.batchSystem,
                                  buildJobTreeStats=True)
 def run(self):
     simDir = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation", "00.job")
     sequences = os.path.join(simDir, "HUMAN"), os.path.join(simDir, "MOUSE"), os.path.join(simDir, "DOG")
     #, newickTreeString = getInputs(simDir, ("HUMAN", "MOUSE"))
     newickTreeString = "((HUMAN:0.144018,MOUSE:0.356483):0.0238,DOG:0.197);"
     outputDir = os.path.join(self.options.outputDir, "%s%s"  % (self.name, self.params))
     self.addChildTarget(MakeAlignment(self.options, sequences, newickTreeString, outputDir,
                                       self.params))
     self.setupStats(outputDir, os.path.join(simDir, "true.maf"), self.params)
예제 #20
0
 def testEvolver_Primates_Large(self):
     if "SON_TRACE_DATASETS" not in os.environ:
         return
     inputDir = os.path.join(TestStatus.getPathToDataSets(), "evolver", "primates", "large")
     primateSequences = ("simChimp.masked.fa", "simGorilla.masked.fa", "simHuman.masked.fa", "simOrang.masked.fa")
     runWorkflow_multipleExamples(lambda regionNumber=0, tempDir=None : getInputs(inputDir, primateSequences),
                                  testRestrictions=(TestStatus.TEST_VERY_LONG,),
                                  batchSystem=self.batchSystem,
                                  buildToilStats=True)
예제 #21
0
 def setUp(self):
     self.testNo = TestStatus.getTestSetup(1, 5, 10, 100)
     self.tempDir = getTempDirectory(os.getcwd())
     self.tempFiles = []
     unittest.TestCase.setUp(self)
     self.tempOutputFile = os.path.join(self.tempDir, "results1.txt")
     self.tempFiles.append(self.tempOutputFile)
     self.tempOutputFile2 = os.path.join(self.tempDir, "results2.txt")
     self.tempFiles.append(self.tempOutputFile2) 
     self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005")
예제 #22
0
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.encodeRegion = "ENm001"
     self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005")
     self.regionPath = os.path.join(self.encodePath, self.encodeRegion)
     self.tempDir = getTempDirectory(os.getcwd())
     self.tempOutputFile = os.path.join(self.tempDir, "results1.txt")
     self.toilDir = os.path.join(self.tempDir, "toil")
     self.toilOptions = Job.Runner.getDefaultOptions(self.toilDir)
     self.toilOptions.disableCaching = True
예제 #23
0
def getCactusInputs_chromosomeX(regionNumber=0, tempDir=None):
    """Gets the inputs for running cactus_workflow using an some mammlian chromosome
    X's.
    
    Requires setting SON_TRACE_DATASETS variable and having access to datasets.
    """
    chrXPath = os.path.join(TestStatus.getPathToDataSets(), "chr_x")
    sequences = [ os.path.join(chrXPath, seqFile) for seqFile in ("cow.fa", "dog.fa", "human.fa", "mouse.fa", "rat.fa") ]
    newickTreeString = parseNewickTreeFile(os.path.join(chrXPath, "newickTree.txt"))
    return sequences, newickTreeString
예제 #24
0
파일: test.py 프로젝트: rndw/cactus
def getCactusInputs_chromosomeX(regionNumber=0, tempDir=None):
    """Gets the inputs for running cactus_workflow using an some mammlian chromosome
    X's.
    
    Requires setting SON_TRACE_DATASETS variable and having access to datasets.
    """
    chrXPath = os.path.join(TestStatus.getPathToDataSets(), "chr_x")
    sequences = [ os.path.join(chrXPath, seqFile) for seqFile in ("cow.fa", "dog.fa", "human.fa", "mouse.fa", "rat.fa") ]
    newickTreeString = parseNewickTreeFile(os.path.join(chrXPath, "newickTree.txt"))
    return sequences, newickTreeString
예제 #25
0
 def setUp(self):
     self.testNo = TestStatus.getTestSetup(1, 5, 10, 100)
     self.tempDir = getTempDirectory(os.getcwd())
     self.tempFiles = []
     unittest.TestCase.setUp(self)
     self.tempOutputFile = os.path.join(self.tempDir, "results1.txt")
     self.tempFiles.append(self.tempOutputFile)
     self.tempOutputFile2 = os.path.join(self.tempDir, "results2.txt")
     self.tempFiles.append(self.tempOutputFile2) 
     self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005")
예제 #26
0
 def testENm001(self):
     if TestStatus.getTestStatus() == TestStatus.TEST_VERY_LONG:
         encodePath = TestStatus.getPathToDataSets() + "/MAY-2005/ENm001"
         outputPath = TestStatus.getPathToDataSets() + "/ortheus/encodeTest"
         # treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);'
         treeString = "((((human:0.006969,chimp:0.009727):0.025291,baboon:0.044568):0.108727,(rat:0.081244,mouse:0.072818):0.260327):0.02326,(cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.048788):0.749525;"
         seqFiles = [
             "human.ENm001.fa",
             "chimp.ENm001.fa",
             "baboon.ENm001.fa",
             "rat.ENm001.fa",
             "mouse.ENm001.fa",
             "cow.ENm001.fa",
             "cat.ENm001.fa",
             "dog.ENm001.fa",
         ]
         seqFiles = [encodePath + "/" + i for i in seqFiles]
         outputFile = outputPath + "/outputENm001.mfa"
         command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % (" ".join(seqFiles), treeString, outputFile)
         print "running command", command
         system(command)
예제 #27
0
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.tempDir = getTempDirectory(os.getcwd())
     self.tempFiles = []
     unittest.TestCase.setUp(self)
     self.tempOutputFile = os.path.join(self.tempDir, "results1.txt")
     self.tempFiles.append(self.tempOutputFile)
     self.tempOutputFile2 = os.path.join(self.tempDir, "results2.txt")
     self.tempFiles.append(self.tempOutputFile2) 
     self.encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005")
     self.defaultLastzArguments = "--ambiguous=iupac"
     self.defaultRealignArguments = ""
예제 #28
0
def getCactusInputs_blanchette(regionNumber=0, tempDir=None):
    """Gets the inputs for running cactus_workflow using a blanchette simulated region
    (0 <= regionNumber < 50).
    
    Requires setting SON_TRACE_DATASETS variable and having access to datasets.
    """
    assert regionNumber >= 0
    assert regionNumber < 50
    blanchettePath = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation")
    sequences = [os.path.join(blanchettePath, ("%.2i.job" % regionNumber), species) \
                 for species in ("HUMAN", "CHIMP", "BABOON", "MOUSE", "RAT", "DOG", "CAT", "PIG", "COW")] #Same order as tree
    newickTreeString = parseNewickTreeFile(os.path.join(blanchettePath, "tree.newick"))
    return sequences, newickTreeString
예제 #29
0
파일: test.py 프로젝트: rndw/cactus
def getCactusInputs_blanchette(regionNumber=0, tempDir=None):
    """Gets the inputs for running cactus_workflow using a blanchette simulated region
    (0 <= regionNumber < 50).
    
    Requires setting SON_TRACE_DATASETS variable and having access to datasets.
    """
    assert regionNumber >= 0
    assert regionNumber < 50
    blanchettePath = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation")
    sequences = [os.path.join(blanchettePath, ("%.2i.job" % regionNumber), species) \
                 for species in ("HUMAN", "CHIMP", "BABOON", "MOUSE", "RAT", "DOG", "CAT", "PIG", "COW")] #Same order as tree
    newickTreeString = parseNewickTreeFile(os.path.join(blanchettePath, "tree.newick"))
    return sequences, newickTreeString
예제 #30
0
파일: test.py 프로젝트: rndw/cactus
def getCactusInputs_encode(regionNumber=0, tempDir=None):
    """Gets the inputs for running cactus_workflow using an Encode pilot project region.
     (0 <= regionNumber < 15).
    
    Requires setting SON_TRACE_DATASETS variable and having access to datasets.
    """
    assert regionNumber >= 0
    assert regionNumber < 14
    encodeRegionString = "ENm%03i" % (regionNumber+1)
    encodeDatasetPath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005")
    sequences = [ os.path.join(encodeDatasetPath, encodeRegionString, ("%s.%s.fa" % (species, encodeRegionString))) for\
                species in ("human", "chimp", "baboon", "mouse", "rat", "dog", "cow") ]
    newickTreeString = parseNewickTreeFile(os.path.join(encodeDatasetPath, "reducedTree.newick"))
    return sequences, newickTreeString
예제 #31
0
def getCactusInputs_encode(regionNumber=0, tempDir=None):
    """Gets the inputs for running cactus_workflow using an Encode pilot project region.
     (0 <= regionNumber < 15).
    
    Requires setting SON_TRACE_DATASETS variable and having access to datasets.
    """
    assert regionNumber >= 0
    assert regionNumber < 14
    encodeRegionString = "ENm%03i" % (regionNumber+1)
    encodeDatasetPath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005")
    sequences = [ os.path.join(encodeDatasetPath, encodeRegionString, ("%s.%s.fa" % (species, encodeRegionString))) for\
                species in ("human", "chimp", "baboon", "mouse", "rat", "dog", "cow") ]
    newickTreeString = parseNewickTreeFile(os.path.join(encodeDatasetPath, "reducedTree.newick"))
    return sequences, newickTreeString
예제 #32
0
def seqFilePairGenerator():
     ##Get sequences
    encodePath = os.path.join(TestStatus.getPathToDataSets(), "MAY-2005")
    encodeRegions = [ "ENm00" + str(i) for i in xrange(1,2) ] #, 2) ] #Could go to six
    species = ("human", "mouse") #, "dog")#, "chimp") 
    #Other species to try "rat", "monodelphis", "macaque", "chimp"
    for encodeRegion in encodeRegions:
        regionPath = os.path.join(encodePath, encodeRegion)
        for i in xrange(len(species)):
            species1 = species[i]
            for species2 in species[i+1:]:
                seqFile1 = os.path.join(regionPath, "%s.%s.fa" % (species1, encodeRegion))
                seqFile2 = os.path.join(regionPath, "%s.%s.fa" % (species2, encodeRegion))
                yield seqFile1, seqFile2
예제 #33
0
    def testAndyYatesFirstExample(self):
        if TestStatus.getTestStatus() == TestStatus.TEST_LONG:
            filePath = TestStatus.getPathToDataSets(
            ) + "/ortheus/andyYatesExample1"
            seqs = "seq1.fa seq2.fa seq3.fa seq4.fa seq5.fa seq6.fa seq7.fa seq8.fa seq9.fa seq10.fa seq11.fa \
            seq12.fa seq13.fa seq14.fa seq15.fa seq16.fa seq17.fa seq18.fa seq19.fa seq20.fa seq21.fa seq22.fa seq23.fa seq24.fa seq25.fa seq26.fa \
            seq27.fa seq28.fa seq29.fa seq30.fa seq31.fa seq32.fa seq33.fa seq34.fa seq35.fa seq36.fa"

            seqs = " ".join(["%s/%s" % (filePath, i) for i in seqs.split()])
            command = 'Ortheus.py -l "#-j 0 -e" -e %s -z \
            "(((1012:0.0112,1051:0.0119):0.0026,(1055:0.0015,1052:0.0018):0.0370):0.0022,1054:0.0108,1053:0.0116);" \
            -A 1054 1051 1054 1054 1053 1012 1054 1054 1053 1054 1051 1054 1051 1051 1053 1051 1051 1012 1051 1054 1012 1054 1053 1051 1053 \
            1054 1054 1051 1012 1012 1054 1053 1053 1012 1054 1051 -f %s/output.16163.mfa -g %s/output.16163.tree-a -k "# -A" -m "java -Xmx1800m -Xms1800m" -a -b'                                                                                                                                                                   % \
            (seqs, filePath, filePath)
            print "running command", command
            system(command)
예제 #34
0
 def testAndyYatesFirstExample(self):
     if TestStatus.getTestStatus() == TestStatus.TEST_LONG:
         filePath = TestStatus.getPathToDataSets() + "/ortheus/andyYatesExample1"
         seqs = "seq1.fa seq2.fa seq3.fa seq4.fa seq5.fa seq6.fa seq7.fa seq8.fa seq9.fa seq10.fa seq11.fa \
         seq12.fa seq13.fa seq14.fa seq15.fa seq16.fa seq17.fa seq18.fa seq19.fa seq20.fa seq21.fa seq22.fa seq23.fa seq24.fa seq25.fa seq26.fa \
         seq27.fa seq28.fa seq29.fa seq30.fa seq31.fa seq32.fa seq33.fa seq34.fa seq35.fa seq36.fa"
         seqs = " ".join(["%s/%s" % (filePath, i) for i in seqs.split()])
         command = (
             'Ortheus.py -l "#-j 0 -e" -e %s -z \
         "(((1012:0.0112,1051:0.0119):0.0026,(1055:0.0015,1052:0.0018):0.0370):0.0022,1054:0.0108,1053:0.0116);" \
         -A 1054 1051 1054 1054 1053 1012 1054 1054 1053 1054 1051 1054 1051 1051 1053 1051 1051 1012 1051 1054 1012 1054 1053 1051 1053 \
         1054 1054 1051 1012 1012 1054 1053 1053 1012 1054 1051 -f %s/output.16163.mfa -g %s/output.16163.tree-a -k "# -A" -m "java -Xmx1800m -Xms1800m" -a -b'
             % (seqs, filePath, filePath)
         )
         print "running command", command
         system(command)
예제 #35
0
 def testCactusWorkflow_Blanchette(self): 
     """Runs the workflow on blanchette's simulated (colinear) regions.
     """
     if "SON_TRACE_DATASETS" not in os.environ:
         return
     for test in xrange(self.testNo):
         tempFiles = []
         tempDir = getTempDirectory(os.getcwd())
         
         trueAlignment = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation", "00.job", "true.mfa")
         
         #Load the true alignment.
         columnAlignment = [ i for i in  fastaAlignmentRead(trueAlignment) ]
         fastaHeaders = [ i for i in fastaReadHeaders(trueAlignment) ]
         sequenceNumber = 9
         
         #The tree
         newickTreeString = "((((HUMAN:0.006969, CHIMP:0.009727):0.025291, BABOON:0.044568):0.11,(RAT:0.072818, MOUSE:0.081244):0.260342):0.023260,((DOG:0.07, CAT:0.07):0.087381,(PIG:0.06, COW:0.06):0.104728):0.04);"
         
         #Get random dir
         testDir = getTempDirectory(tempDir)
         
         #random alignment
         alignmentLength = 5000
         randomStart = random.choice(xrange(len(columnAlignment)-alignmentLength))
         subAlignment = columnAlignment[randomStart:randomStart+alignmentLength]
         logger.info("Got a sub alignment, it is %i columns long" % len(subAlignment))
         
         #Get sequences
         sequences = [ (fastaHeaders[seqNo], "".join([ column[seqNo] for column in subAlignment if column[seqNo] != '-' ])) for seqNo in xrange(sequenceNumber) ]
         logger.info("Got the sequences")
         
         #Write sequences into temp files
         tempFastaFiles = []
         for seqNo in xrange(sequenceNumber):
             header, sequence = sequences[seqNo]
             logger.info("Making temp file for header: %s, seq: %s" % (header, sequence))
             tempFastaFile = os.path.join(testDir, "%i.fa" % seqNo)
             tempFastaFiles.append(tempFastaFile)
             fileHandle = open(tempFastaFile, "w")
             fastaWrite(fileHandle, header, sequence)
             fileHandle.close()
         logger.info("Got the temp sequence files")
         
         experiment = getCactusWorkflowExperimentForTest(tempFastaFiles, newickTreeString, testDir)
         experimentFile = os.path.join(testDir, "experiment.xml")
         experiment.writeXML(experimentFile)
         cactusDiskDatabaseString = experiment.getDiskDatabaseString()
         
         jobTree = os.path.join(testDir, "jobTree")
         
         runCactusWorkflow(experimentFile, jobTree)
         logger.info("Ran the the workflow")
         
         #Check the output alignment
         runJobTreeStatusAndFailIfNotComplete(jobTree)
         logger.info("Checked the job tree dir")
         
         #Output the 'TRUE' alignment file
         if os.system("mfaToMaf --help > /dev/null 2>&1") == 0 and\
            os.system("cactus_MAFGenerator --help > /dev/null 2>&1") == 0 and\
            os.system("mafComparator --help > /dev/null 2>&1") == 0 and\
            os.system("cactus_treeStats --help > /dev/null 2>&1") == 0:
             trueMFAFile = os.path.join(testDir, "true.mfa")
             fastaAlignmentWrite(subAlignment, fastaHeaders, len(fastaHeaders), trueMFAFile)
             trueMAFFile = os.path.join(testDir, "true.maf")
             system("mfaToMaf --mfaFile %s --outputFile %s --logLevel %s" % (trueMFAFile, trueMAFFile, getLogLevelString()))
             system("cat %s" % trueMAFFile)
             
             #Now get mafs for the region.
             mAFFile = os.path.join(testDir, "flower.maf")
             system("cactus_MAFGenerator --flowerName 0 --cactusDisk '%s' --outputFile %s --logLevel %s" % (cactusDiskDatabaseString, mAFFile, getLogLevelString()))
             logger.info("Got the MAFs from the flower disk")
             system("cat %s" % mAFFile)
             
             statsFile = os.path.join(testDir, "stats.xml")
             system("cactus_treeStats --cactusDisk '%s' --flowerName 0 --outputFile %s --logLevel %s" % (cactusDiskDatabaseString, statsFile, getLogLevelString()))
             system("cat %s" % statsFile)
             logger.info("Got the cactus tree stats")
             
             #Now compare the mafs to the output.
             resultsFile = os.path.join(testDir, "results.xml")
             system("mafComparator --mafFile1 %s --mafFile2 %s --outputFile %s --logLevel %s" % (trueMAFFile, mAFFile, resultsFile, getLogLevelString()))
             logger.info("Ran the maf comparator")
             
             system("cat %s" % resultsFile)
             
             #Cleanup
             experiment.cleanupDb()
             system("rm -rf %s" % testDir)
             logger.info("Successfully ran test for the problem")
             
         for tempFile in tempFiles:
             os.remove(tempFile)
         system("rm -rf %s" % tempDir)