Ejemplo n.º 1
0
 def testFastaReadWriteC(self):
     """Tests consistency with C version of this function.
     """
     tempFile = getTempFile()
     self.tempFiles.append(tempFile)
     tempFile2 = getTempFile()
     self.tempFiles.append(tempFile2)
     for test in xrange(0, self.testNo):
         fastaNumber = random.choice(xrange(10))
         l = [ getRandomSequence() for i in xrange(fastaNumber) ]
         fileHandle = open(tempFile, 'w')
         for name, seq in l:
             fastaWrite(fileHandle, name, seq)
         fileHandle.close()
         
         command = "sonLib_fastaCTest %s %s" % (tempFile, tempFile2)
         
         print command
         
         system(command)
         
         fileHandle = open(tempFile2, 'r')
         l.reverse()
         for i in fastaRead(fileHandle):
             name, seq = i
             assert i == l.pop()
             fastaWrite(sys.stdout, name, seq)
         fileHandle.close()
Ejemplo n.º 2
0
 def makeHub(self):
     """Make an assembly hub for the test set, and place it in
     outputDir/hub."""
     cmd = "hal2assemblyHub.py --hub %s --longLabel %s --shortLabel %s %s --jobTree %s/jobTree %s" % (self.label, self.label, self.label, self.hal, getTempDirectory(), os.path.join(self.outputDir, "hub"))
     if self.getOption("Evaluation", "misalignmentWigTrack") is not None:
         cmd += " --wigDirs %s --nowigLiftover" % ",".join(glob(os.path.join(self.wigDir, '*')))
     system(cmd)
Ejemplo n.º 3
0
    def getCoalescences(self):
        """Runs the "correct-coalescences" evaluation on the test set.

        The reference genome is given by the coalescenceRefGenome
        option in the config file.
        """
        refGenome = self.getOption("Evaluation", "coalescenceRefGenome")
        system("scoreHalPhylogenies.py --jobTree %s/jobTree %s %s %s" % \
               (getTempDirectory(), self.hal, refGenome,
                os.path.join(self.outputDir, "coalescences.xml")))
Ejemplo n.º 4
0
    def align(self, progressiveCactusDir, configFile):
        """Run the actual alignment."""
        os.chdir(self.path)

        configFile = nameValue("config", configFile)
        root = nameValue("root", self.getOption("Alignment", "root"))
        system("%s/bin/runProgressiveCactus.sh --stats %s %s %s %s %s" % (
            progressiveCactusDir, configFile, root, self.seqFile, self.workDir,
            self.hal))

        # Copy the alignment log to the output directory
        system("cp %s %s" % (os.path.join(self.workDir, "cactus.log"),
                             self.outputDir))
Ejemplo n.º 5
0
    def makeDotplot(self):
        """Puts a dotplot in dotplot.pdf, given the dotplot option

        The dotplot option has the format:
        "genomeX.seqX:startX-endX,genomeY.seqY:startY-endY"
        """
        dotplotString = self.getOption("Evaluation", "dotplot")
        match = re.match(
            r'(.*?)\.([^:,]*),(.*?)\.([^:]*)',
            # r'(.*?)\.([^:,]*):?([0-9]*)?-?([0-9]*)?,(.*?)\.([^:]*):?([0-9]*)?-?([0-9]*)?',
            dotplotString)
        genomeX, seqX, genomeY, seqY = match.groups()
        tempFile = os.path.join(self.workDir, "tmp.dotplot")
        system("runDotplot.py %s %s %s %s %s > %s" % \
               (self.hal, genomeX, seqX, genomeY, seqY, tempFile))
        system("plotDotplot.R %s %s" % (tempFile, os.path.join(self.outputDir,
                                                               "dotplot.pdf")))
Ejemplo n.º 6
0
    def getMafComparatorXML(self):
        """Find the precision and recall relative to the true alignment by
        running mafComparator.

        Assumes that the test set config has specified a true MAF
        containing only sequence names (not UCSC-styled "genome.chr"
        names).

        Also parses the "wiggle" parts of the XML into proper .wig
        files, if the misalignmentWigTrack option is enabled in the
        test set's config.
        """
        truth = self.getOption('Evaluation', 'truth')

        # Extract the maf for our alignment
        test = os.path.join(getTempDirectory(), 'test.maf')
        system("hal2maf --onlySequenceNames --global --noAncestors %s %s" % \
               (self.hal, test))

        xmlPath = os.path.join(self.outputDir, "mafComparator.xml")
        comparatorCmd = "mafComparator  --samples 20000000 --maf1 %s --maf2 %s --out %s" % (truth, test, xmlPath)
        if self.getOption("Evaluation", "misalignmentWigTrack") is not None:
            # Add the options to generate the requested wiggle track
            comparatorCmd += " " + nameValue("wigglePairs", self.getOption("Evaluation", "misalignmentWigTrack"))
            comparatorCmd += " --wiggleBinLength 1"
        system(comparatorCmd)

        if self.getOption("Evaluation", "misalignmentWigTrack") is not None:
            # Extract the wiggle files
            genome = getGenomeForSequence(self.hal, self.getOption("Evaluation", "misalignmentWigTrack").split(":")[0])
            system("mkdir -p %s %s" % (os.path.join(self.wigDir, "underalignment", genome), os.path.join(self.wigDir, "overalignment", genome)))
            underalignmentPath = os.path.join(self.wigDir, "underalignment", genome, genome + ".wig")
            overalignmentPath = os.path.join(self.wigDir, "overalignment", genome, genome + ".wig")
            getWigsFromXML(xmlPath, underalignmentPath, overalignmentPath)
Ejemplo n.º 7
0
    def testCigarReadWrite(self):
        """Tests the C code for reading and writing cigars against the python parser for cigars.
        """
        tempFile = getTempFile()
        self.tempFiles.append(tempFile)
        for test in xrange(0, self.testNo):
            pairwiseAlignmentNumber = random.choice(xrange(10))
            l = [
                getRandomPairwiseAlignment()
                for i in xrange(pairwiseAlignmentNumber)
            ]
            fileHandle = open(tempFile, 'w')

            keepProbs = random.random() > 0.5
            if keepProbs == False:
                for pA in l:
                    for op in pA.operationList:
                        op.score = 0.0

            for pairwiseAlignment in l:
                cigarWrite(fileHandle, pairwiseAlignment, keepProbs)
            fileHandle.close()

            #Now call sonLib_cigarsTest and read and write chains
            command = "sonLib_cigarTest %s %s" % (tempFile, keepProbs)
            #return
            system(command)

            #Now check the chain is okay
            fileHandle = open(tempFile, 'r')
            l.reverse()

            for pairwiseAlignment in cigarRead(fileHandle):
                pairwiseAlignment2 = l.pop()
                cigarWrite(sys.stdout, pairwiseAlignment, keepProbs)
                cigarWrite(sys.stdout, pairwiseAlignment2, keepProbs)
                assert pairwiseAlignment == pairwiseAlignment2
            assert len(l) == 0
            fileHandle.close()
Ejemplo n.º 8
0
    def testCigarReadWrite(self):
        """Tests the C code for reading and writing cigars against the python parser for cigars.
        """
        tempFile = getTempFile()
        self.tempFiles.append(tempFile)
        for test in xrange(0, self.testNo):
            pairwiseAlignmentNumber = random.choice(xrange(10))
            l = [ getRandomPairwiseAlignment() for i in xrange(pairwiseAlignmentNumber) ]
            fileHandle = open(tempFile, 'w')
            
            keepProbs = random.random() > 0.5
            if keepProbs == False:
                for pA in l:
                    for op in pA.operationList:
                        op.score = 0.0
            
            for pairwiseAlignment in l:
                cigarWrite(fileHandle, pairwiseAlignment, keepProbs)
            fileHandle.close()

            #Now call sonLib_cigarsTest and read and write chains
            command = "sonLib_cigarTest %s %s" % (tempFile, keepProbs)
            #return
            system(command)
            
            #Now check the chain is okay
            fileHandle = open(tempFile, 'r')
            l.reverse()
            
            for pairwiseAlignment in cigarRead(fileHandle):
                pairwiseAlignment2 = l.pop()
                cigarWrite(sys.stdout, pairwiseAlignment, keepProbs)
                cigarWrite(sys.stdout, pairwiseAlignment2, keepProbs)
                assert pairwiseAlignment == pairwiseAlignment2
            assert len(l) == 0
            fileHandle.close()
def pipeline(target, opts):
    tempDir = target.getGlobalTempDir()

    opts.progressiveCactusDir = tempDir

    # setup progressiveCactus to point to the right commit, and run
    # make
    initializeProgressiveCactus(opts)

    # FIXME this is terrible
    opts.progressiveCactusDir = os.path.join(tempDir, "progressiveCactus")

    tests = setupTestSets(opts)

    # ensure our output dir exists, and redirect our stderr there for
    # logging purposes.
    if not os.path.isdir(opts.outputDir):
        os.mkdir(opts.outputDir)
    sys.stderr = open(os.path.join(opts.outputDir, "log"), 'w')

    for test in tests:
        target.addChildTarget(test)

    # Put git commit in the output dir
    os.chdir(opts.progressiveCactusDir)
    system("git rev-parse HEAD > %s/progressiveCactus_version" % opts.outputDir)
    os.chdir(os.path.join(opts.progressiveCactusDir, "submodules/cactus"))
    system("git rev-parse HEAD > %s/cactus_version" % opts.outputDir)

    # Put config in the output dir
    if opts.cactusConfigFile is not None:
        system("cp %s %s/config.xml" % (opts.cactusConfigFile, opts.outputDir))
    else:
        # we used the default config
        system("cp %s %s/config.xml" % (os.path.join(opts.progressiveCactusDir,
                                                     "submodules/cactus/cactus_progressive_config.xml"),
                                        opts.outputDir))
Ejemplo n.º 10
0
def initializeProgressiveCactus(opts):
    """Points progressiveCactus to the correct commit and compiles."""
    os.chdir(opts.progressiveCactusDir)
    system("git clone https://github.com/glennhickey/progressiveCactus.git")
    os.chdir("progressiveCactus")
    system("git fetch")
    system("git checkout %s" % (opts.progressiveCactusBranch))
    system("git pull")
    system("git submodule update --init --recursive")
    if opts.cactusBranch is not None:
        os.chdir("submodules/cactus")
        system("git fetch")
        system("git checkout %s" % (opts.cactusBranch))
        os.chdir(opts.progressiveCactusDir)
    system("make")
Ejemplo n.º 11
0
 def getCoverage(self):
     """Report all-by-all coverage to outputDir/coverage."""
     system("halStats --allCoverage %s > %s" % (self.hal, os.path.join(self.outputDir, "coverage")))
Ejemplo n.º 12
0
 def tearDown(self):
     unittest.TestCase.tearDown(self)
     system("rm -rf %s" % self.tempDir)
     for tempFile in self.tempFiles:
         os.remove(tempFile)