Ejemplo n.º 1
0
    def testCactusRealign(self):
        """Runs cactus realign using the default parameters and checks that the realigned output cigars align
        the same subsequences.
        """
        for seqFile1, seqFile2 in seqFilePairGenerator():
            lastzOutput = getTempFile(rootDir=self.tempDir)
            runLastz(seqFile1,
                     seqFile2,
                     alignmentsFile=lastzOutput,
                     lastzArguments=self.defaultLastzArguments,
                     work_dir=self.tempDir)
            realignOutput = getTempFile(rootDir=self.tempDir)
            runCactusRealign(seqFile1,
                             seqFile2,
                             inputAlignmentsFile=lastzOutput,
                             outputAlignmentsFile=realignOutput,
                             realignArguments=self.defaultRealignArguments,
                             work_dir=self.tempDir)

            for realignLine, lastzLine in zip(
                [i for i in open(lastzOutput, 'r') if i != ''],
                [i for i in open(realignOutput, 'r') if i != '']):
                realignCigar = cigarReadFromString(realignLine)
                lastzCigar = cigarReadFromString(lastzLine)
                self.assertTrue(realignCigar.sameCoordinates(lastzCigar))
Ejemplo n.º 2
0
    def run(self, fileStore):
        seqFile1 = fileStore.readGlobalFile(self.seqFileID1)
        seqFile2 = fileStore.readGlobalFile(self.seqFileID2)
        if self.blastOptions.compressFiles:
            seqFile1 = decompressFastaFile(seqFile1,
                                           fileStore.getLocalTempFile())
            seqFile2 = decompressFastaFile(seqFile2,
                                           fileStore.getLocalTempFile())
        blastResultsFile = fileStore.getLocalTempFile()
        runLastz(seqFile1,
                 seqFile2,
                 blastResultsFile,
                 lastzArguments=self.blastOptions.lastzArguments,
                 gpuLastz=self.blastOptions.gpuLastz)
        if self.blastOptions.realign:
            realignResultsFile = fileStore.getLocalTempFile()
            runCactusRealign(
                seqFile1,
                seqFile2,
                inputAlignmentsFile=blastResultsFile,
                outputAlignmentsFile=realignResultsFile,
                realignArguments=self.blastOptions.realignArguments)
            blastResultsFile = realignResultsFile

        resultsFile = fileStore.getLocalTempFile()
        cactus_call(parameters=[
            "cactus_blast_convertCoordinates", blastResultsFile, resultsFile,
            str(self.blastOptions.roundsOfCoordinateConversion)
        ])
        logger.info("Ran the blast okay")
        return fileStore.writeGlobalFile(resultsFile)
Ejemplo n.º 3
0
    def testCactusRealignDummy(self):
        """Runs cactus realign using the "rescoreOriginalAlignment" mode
        and checks the output is equivalent to what you'd get by just running lastz.
        """
        for seqFile1, seqFile2 in seqFilePairGenerator():

            lastzOutput = getTempFile(rootDir=self.tempDir)
            runLastz(seqFile1,
                     seqFile2,
                     alignmentsFile=lastzOutput,
                     lastzArguments=self.defaultLastzArguments)
            realignOutput = getTempFile(rootDir=self.tempDir)
            runCactusRealign(seqFile1,
                             seqFile2,
                             inputAlignmentsFile=lastzOutput,
                             outputAlignmentsFile=realignOutput,
                             realignArguments=self.defaultRealignArguments +
                             " --rescoreOriginalAlignment")

            for realignLine, lastzLine in zip(
                [i for i in open(lastzOutput, 'r') if i != ''],
                [i for i in open(realignOutput, 'r') if i != '']):
                realignCigar = cigarReadFromString(realignLine)
                lastzCigar = cigarReadFromString(lastzLine)
                self.assertTrue(realignCigar != None)
                self.assertTrue(realignCigar == lastzCigar)
Ejemplo n.º 4
0
    def testCactusRealignSplitSequences(self):
        """Runs cactus realign, splitting indels longer than 100bp, and check
        that the coverage from the results is the same as the coverage from
        realigning with no arguments.."""
        for seqFile1, seqFile2 in seqFilePairGenerator():
            lastzOutput = getTempFile(rootDir=self.tempDir)
            runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput,
                     lastzArguments=self.defaultLastzArguments)
            
            realignOutput = getTempFile(rootDir=self.tempDir)
            runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput,
                             outputAlignmentsFile=realignOutput,
                             realignArguments=self.defaultRealignArguments)
            
            splitRealignOutput = getTempFile(rootDir=self.tempDir)
            runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput,
                             outputAlignmentsFile=splitRealignOutput,
                             realignArguments=self.defaultRealignArguments + " --splitIndelsLongerThanThis 100")

            # Check coverage on seqFile1
            splitRealignCoverage = runCactusCoverage(seqFile1, splitRealignOutput)
            realignCoverage = runCactusCoverage(seqFile1, realignOutput)
            self.assertTrue(splitRealignCoverage == realignCoverage)
            # Check coverage on seqFile2
            splitRealignCoverage = runCactusCoverage(seqFile2, splitRealignOutput)
            realignCoverage = runCactusCoverage(seqFile2, realignOutput)
            self.assertTrue(splitRealignCoverage == realignCoverage)
            os.remove(realignOutput)
            os.remove(splitRealignOutput)
Ejemplo n.º 5
0
    def testCactusRealignRescoreByIdentityAndProb(self):
        """Runs cactus realign using the default parameters and checks that the realigned output cigars align 
        the same subsequences.
        """
        for seqFile1, seqFile2 in seqFilePairGenerator():
            lastzOutput = getTempFile(rootDir=self.tempDir)
            runLastz(seqFile1,
                     seqFile2,
                     alignmentsFile=lastzOutput,
                     lastzArguments=self.defaultLastzArguments,
                     work_dir=self.tempDir)

            realignByIdentityOutput = getTempFile(rootDir=self.tempDir)
            runCactusRealign(seqFile1,
                             seqFile2,
                             inputAlignmentsFile=lastzOutput,
                             outputAlignmentsFile=realignByIdentityOutput,
                             realignArguments=self.defaultRealignArguments +
                             " --rescoreByIdentity",
                             work_dir=self.tempDir)

            realignByPosteriorProbOutput = getTempFile(rootDir=self.tempDir)
            runCactusRealign(seqFile1,
                             seqFile2,
                             inputAlignmentsFile=lastzOutput,
                             outputAlignmentsFile=realignByPosteriorProbOutput,
                             realignArguments=self.defaultRealignArguments +
                             " --rescoreByPosteriorProb",
                             work_dir=self.tempDir)

            realignByIdentityIgnoringGapsOutput = getTempFile(
                rootDir=self.tempDir)
            runCactusRealign(
                seqFile1,
                seqFile2,
                inputAlignmentsFile=lastzOutput,
                outputAlignmentsFile=realignByIdentityIgnoringGapsOutput,
                realignArguments=self.defaultRealignArguments +
                " --rescoreByIdentityIgnoringGaps",
                work_dir=self.tempDir)
            for realignLineByIdentity, realignLineByPosteriorProb, realignLineByIdentityIgnoringGaps, lastzLine in \
                                          zip([ i for i in open(realignByIdentityOutput, 'r') if i != '' ], \
                                              [ i for i in open(realignByPosteriorProbOutput, 'r') if i != '' ], \
                                              [ i for i in open(realignByIdentityIgnoringGapsOutput, 'r') if i != '' ], \
                                              [ i for i in open(lastzOutput, 'r') if i != '' ]):
                realignCigarByIdentity = cigarReadFromString(
                    realignLineByIdentity)
                realignCigarByPosteriorProb = cigarReadFromString(
                    realignLineByPosteriorProb)
                realignCigarByIdentityIgnoringGaps = cigarReadFromString(
                    realignLineByIdentityIgnoringGaps)
                lastzCigar = cigarReadFromString(lastzLine)
                #Check scores are as expected
                self.assertTrue(realignCigarByIdentity.score >= 0)
                self.assertTrue(realignCigarByIdentity.score <= 100.0)
                self.assertTrue(realignCigarByPosteriorProb.score >= 0)
                self.assertTrue(realignCigarByPosteriorProb.score <= 100.0)
                self.assertTrue(realignCigarByIdentityIgnoringGaps.score >= 0)
                self.assertTrue(
                    realignCigarByIdentityIgnoringGaps.score <= 100.0)
Ejemplo n.º 6
0
def runNaiveBlast(seqFile1, seqFile2, outputFile, tempDir, lastzArguments=""):
    """Runs the blast command in a very naive way (not splitting things up).
    """
    startTime = time.time()
    tmpSeqFile1 = os.path.join(tempDir, "seq1.fa")
    tmpSeqFile2 = os.path.join(tempDir, "seq2.fa")
    shutil.copyfile(seqFile1, tmpSeqFile1)
    shutil.copyfile(seqFile2, tmpSeqFile2)
    runLastz(tmpSeqFile1, tmpSeqFile2, alignmentsFile=outputFile, lastzArguments=lastzArguments)
    return time.time()-startTime
Ejemplo n.º 7
0
def runNaiveBlast(seqFile1, seqFile2, outputFile, tempDir, lastzArguments=""):
    """Runs the blast command in a very naive way (not splitting things up).
    """
    startTime = time.time()
    tmpSeqFile1 = os.path.join(tempDir, "seq1.fa")
    tmpSeqFile2 = os.path.join(tempDir, "seq2.fa")
    shutil.copyfile(seqFile1, tmpSeqFile1)
    shutil.copyfile(seqFile2, tmpSeqFile2)
    runLastz(tmpSeqFile1, tmpSeqFile2, alignmentsFile=outputFile, lastzArguments=lastzArguments)
    return time.time()-startTime
Ejemplo n.º 8
0
 def testCactusRealign(self):
     """Runs cactus realign using the default parameters and checks that the realigned output cigars align
     the same subsequences.
     """
     for seqFile1, seqFile2 in seqFilePairGenerator():
         lastzOutput = getTempFile(rootDir=self.tempDir)
         runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput,
                  lastzArguments=self.defaultLastzArguments)
         realignOutput = getTempFile(rootDir=self.tempDir)
         runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile = lastzOutput,
                          outputAlignmentsFile = realignOutput,
                          realignArguments=self.defaultRealignArguments)
         
         for realignLine, lastzLine in zip([ i for i in open(lastzOutput, 'r') if i != '' ], 
                                           [ i for i in open(realignOutput, 'r') if i != '' ]):
             realignCigar = cigarReadFromString(realignLine)
             lastzCigar = cigarReadFromString(lastzLine)
             self.assertTrue(realignCigar.sameCoordinates(lastzCigar))
Ejemplo n.º 9
0
    def testCactusRealignDummy(self):
        """Runs cactus realign using the "rescoreOriginalAlignment" mode
        and checks the output is equivalent to what you'd get by just running lastz.
        """
        for seqFile1, seqFile2 in seqFilePairGenerator():

            lastzOutput = getTempFile(rootDir=self.tempDir)
            runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput,
                     lastzArguments=self.defaultLastzArguments)
            realignOutput = getTempFile(rootDir=self.tempDir)
            runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile = lastzOutput,
                             outputAlignmentsFile = realignOutput,
                             realignArguments=self.defaultRealignArguments + " --rescoreOriginalAlignment")
                                      
            for realignLine, lastzLine in zip([ i for i in open(lastzOutput, 'r') if i != '' ],
                                              [ i for i in open(realignOutput, 'r') if i != '' ]):
                realignCigar = cigarReadFromString(realignLine)
                lastzCigar = cigarReadFromString(lastzLine)
                self.assertTrue(realignCigar != None)
                self.assertTrue(realignCigar == lastzCigar)
Ejemplo n.º 10
0
    def testCactusRealignRescoreByIdentityAndProb(self):
        """Runs cactus realign using the default parameters and checks that the realigned output cigars align 
        the same subsequences.
        """
        for seqFile1, seqFile2 in seqFilePairGenerator():
            lastzOutput = getTempFile(rootDir=self.tempDir)
            runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput,
                     lastzArguments=self.defaultLastzArguments)

            realignByIdentityOutput = getTempFile(rootDir=self.tempDir)
            runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput,
                             outputAlignmentsFile=realignByIdentityOutput,
                             realignArguments=self.defaultRealignArguments + " --rescoreByIdentity")

            realignByPosteriorProbOutput = getTempFile(rootDir=self.tempDir)
            runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput,
                             outputAlignmentsFile=realignByPosteriorProbOutput,
                             realignArguments=self.defaultRealignArguments + " --rescoreByPosteriorProb")

            realignByIdentityIgnoringGapsOutput = getTempFile(rootDir=self.tempDir)
            runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput,
                             outputAlignmentsFile=realignByIdentityIgnoringGapsOutput,
                             realignArguments=self.defaultRealignArguments + " --rescoreByIdentityIgnoringGaps")
            for realignLineByIdentity, realignLineByPosteriorProb, realignLineByIdentityIgnoringGaps, lastzLine in \
                                          zip([ i for i in open(realignByIdentityOutput, 'r') if i != '' ], \
                                              [ i for i in open(realignByPosteriorProbOutput, 'r') if i != '' ], \
                                              [ i for i in open(realignByIdentityIgnoringGapsOutput, 'r') if i != '' ], \
                                              [ i for i in open(lastzOutput, 'r') if i != '' ]):
                realignCigarByIdentity = cigarReadFromString(realignLineByIdentity)
                realignCigarByPosteriorProb = cigarReadFromString(realignLineByPosteriorProb)
                realignCigarByIdentityIgnoringGaps = cigarReadFromString(realignLineByIdentityIgnoringGaps)
                lastzCigar = cigarReadFromString(lastzLine)
                #Check scores are as expected
                self.assertTrue(realignCigarByIdentity.score >= 0)
                self.assertTrue(realignCigarByIdentity.score <= 100.0)
                self.assertTrue(realignCigarByPosteriorProb.score >= 0)
                self.assertTrue(realignCigarByPosteriorProb.score <= 100.0)
                self.assertTrue(realignCigarByIdentityIgnoringGaps.score >= 0)
                self.assertTrue(realignCigarByIdentityIgnoringGaps.score <= 100.0)
Ejemplo n.º 11
0
    def run(self, fileStore):
        seqFile1 = fileStore.readGlobalFile(self.seqFileID1)
        seqFile2 = fileStore.readGlobalFile(self.seqFileID2)
        if self.blastOptions.compressFiles:
            seqFile1 = decompressFastaFile(seqFile1, fileStore.getLocalTempFile())
            seqFile2 = decompressFastaFile(seqFile2, fileStore.getLocalTempFile())
        blastResultsFile = fileStore.getLocalTempFile()

        runLastz(seqFile1, seqFile2, blastResultsFile, lastzArguments = self.blastOptions.lastzArguments)
        if self.blastOptions.realign:
            realignResultsFile = fileStore.getLocalTempFile()
            runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=blastResultsFile,
                             outputAlignmentsFile=realignResultsFile,
                             realignArguments=self.blastOptions.realignArguments)
            blastResultsFile = realignResultsFile
            
        resultsFile = fileStore.getLocalTempFile()
        cactus_call(parameters=["cactus_blast_convertCoordinates",
                                blastResultsFile,
                                resultsFile,
                                str(self.blastOptions.roundsOfCoordinateConversion)])
        logger.info("Ran the blast okay")
        return fileStore.writeGlobalFile(resultsFile)