def testCactusSetup(self): """Creates a bunch of random inputs and then passes them to cactus setup. """ for test in xrange(self.testNo): tempDir = os.path.relpath(getTempDirectory(os.getcwd())) sequenceNumber = random.choice(xrange(100)) sequences, newickTreeString = getCactusInputs_random( tempDir=tempDir, sequenceNumber=sequenceNumber) #Setup the flower disk. experiment = getCactusWorkflowExperimentForTest( sequences, newickTreeString, os.path.join('/data', os.path.relpath(tempDir))) cactusDiskDatabaseString = experiment.getDiskDatabaseString() cactusSequencesPath = os.path.join(experiment.getDbDir(), "cactusSequences") runCactusSetup(cactusDiskDatabaseString=cactusDiskDatabaseString, cactusSequencesPath=cactusSequencesPath, sequences=sequences, newickTreeString=newickTreeString) runCactusSetup(cactusDiskDatabaseString=cactusDiskDatabaseString, cactusSequencesPath=cactusSequencesPath, sequences=sequences, newickTreeString=newickTreeString) experiment.cleanupDb() system("rm -rf %s" % tempDir) logger.info("Finished test %i of cactus_setup.py", test)
def testCactusSetup(self): """Creates a bunch of random inputs and then passes them to cactus setup. """ for test in xrange(self.testNo): tempDir = getTempDirectory(os.getcwd()) sequenceNumber = random.choice(xrange(100)) sequences, newickTreeString = getCactusInputs_random(tempDir=tempDir, sequenceNumber=sequenceNumber) #Setup the flower disk. experiment = getCactusWorkflowExperimentForTest(sequences, newickTreeString, tempDir) cactusDiskDatabaseString = experiment.getDiskDatabaseString() runCactusSetup(cactusDiskDatabaseString, sequences, newickTreeString) runCactusSetup(cactusDiskDatabaseString, sequences, newickTreeString) experiment.cleanupDb() system("rm -rf %s" % tempDir) logger.info("Finished test %i of cactus_setup.py", test)
def testCactus_Random_fixedAncestor(self): """Tests that cactus doesn't crash when aligning to a fixed ancestral sequence.""" sequences, _ = getCactusInputs_random(treeLeafNumber=3) rootSeq = sequences.pop() # Create a star tree tree = '(%s)root;' % ",".join([str(x) + ":1.0" for x in range(len(sequences))]) outputDir = getTempDirectory() experiment = getCactusWorkflowExperimentForTest(sequences, tree, outputDir, progressive=True) experiment.setSequenceID("root", rootSeq) experiment.setRootReconstructed(False) experimentFile = os.path.join(outputDir, "experiment.xml") experiment.writeXML(experimentFile) jobTreeDir = os.path.join(outputDir, "jobTree") self.progressiveFunction(experimentFile, jobTreeDir, 'singleMachine', False, True, True, False)
def testCactusWorkflow_Blanchette(self): """Runs the workflow on blanchette's simulated (colinear) regions. """ if "SON_TRACE_DATASETS" not in os.environ: return for test in xrange(self.testNo): tempFiles = [] tempDir = getTempDirectory(os.getcwd()) trueAlignment = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation", "00.job", "true.mfa") #Load the true alignment. columnAlignment = [ i for i in fastaAlignmentRead(trueAlignment) ] fastaHeaders = [ i for i in fastaReadHeaders(trueAlignment) ] sequenceNumber = 9 #The tree newickTreeString = "((((HUMAN:0.006969, CHIMP:0.009727):0.025291, BABOON:0.044568):0.11,(RAT:0.072818, MOUSE:0.081244):0.260342):0.023260,((DOG:0.07, CAT:0.07):0.087381,(PIG:0.06, COW:0.06):0.104728):0.04);" #Get random dir testDir = getTempDirectory(tempDir) #random alignment alignmentLength = 5000 randomStart = random.choice(xrange(len(columnAlignment)-alignmentLength)) subAlignment = columnAlignment[randomStart:randomStart+alignmentLength] logger.info("Got a sub alignment, it is %i columns long" % len(subAlignment)) #Get sequences sequences = [ (fastaHeaders[seqNo], "".join([ column[seqNo] for column in subAlignment if column[seqNo] != '-' ])) for seqNo in xrange(sequenceNumber) ] logger.info("Got the sequences") #Write sequences into temp files tempFastaFiles = [] for seqNo in xrange(sequenceNumber): header, sequence = sequences[seqNo] logger.info("Making temp file for header: %s, seq: %s" % (header, sequence)) tempFastaFile = os.path.join(testDir, "%i.fa" % seqNo) tempFastaFiles.append(tempFastaFile) fileHandle = open(tempFastaFile, "w") fastaWrite(fileHandle, header, sequence) fileHandle.close() logger.info("Got the temp sequence files") experiment = getCactusWorkflowExperimentForTest(tempFastaFiles, newickTreeString, testDir) experimentFile = os.path.join(testDir, "experiment.xml") experiment.writeXML(experimentFile) cactusDiskDatabaseString = experiment.getDiskDatabaseString() jobTree = os.path.join(testDir, "jobTree") runCactusWorkflow(experimentFile, jobTree) logger.info("Ran the the workflow") #Check the output alignment runJobTreeStatusAndFailIfNotComplete(jobTree) logger.info("Checked the job tree dir") #Output the 'TRUE' alignment file if os.system("mfaToMaf --help > /dev/null 2>&1") == 0 and\ os.system("cactus_MAFGenerator --help > /dev/null 2>&1") == 0 and\ os.system("mafComparator --help > /dev/null 2>&1") == 0 and\ os.system("cactus_treeStats --help > /dev/null 2>&1") == 0: trueMFAFile = os.path.join(testDir, "true.mfa") fastaAlignmentWrite(subAlignment, fastaHeaders, len(fastaHeaders), trueMFAFile) trueMAFFile = os.path.join(testDir, "true.maf") system("mfaToMaf --mfaFile %s --outputFile %s --logLevel %s" % (trueMFAFile, trueMAFFile, getLogLevelString())) system("cat %s" % trueMAFFile) #Now get mafs for the region. mAFFile = os.path.join(testDir, "flower.maf") system("cactus_MAFGenerator --flowerName 0 --cactusDisk '%s' --outputFile %s --logLevel %s" % (cactusDiskDatabaseString, mAFFile, getLogLevelString())) logger.info("Got the MAFs from the flower disk") system("cat %s" % mAFFile) statsFile = os.path.join(testDir, "stats.xml") system("cactus_treeStats --cactusDisk '%s' --flowerName 0 --outputFile %s --logLevel %s" % (cactusDiskDatabaseString, statsFile, getLogLevelString())) system("cat %s" % statsFile) logger.info("Got the cactus tree stats") #Now compare the mafs to the output. resultsFile = os.path.join(testDir, "results.xml") system("mafComparator --mafFile1 %s --mafFile2 %s --outputFile %s --logLevel %s" % (trueMAFFile, mAFFile, resultsFile, getLogLevelString())) logger.info("Ran the maf comparator") system("cat %s" % resultsFile) #Cleanup experiment.cleanupDb() system("rm -rf %s" % testDir) logger.info("Successfully ran test for the problem") for tempFile in tempFiles: os.remove(tempFile) system("rm -rf %s" % tempDir)