Esempi in Python per Hmm, esempi in Python per cPecan.cPecanEm.Hmm

Esempio n. 1

0

Mostra file

File: marginCallerLib.py Progetto: isovic/marginAlign

def loadHmmSubstitutionMatrix(hmmFile):
    """Load the substitution matrix from an HMM file
    """
    hmm = Hmm.loadHmm(hmmFile)
    m = hmm.emissions[:len(BASES)**2]
    m = map(lambda i : m[i] / sum(m[4*(i/4):4*(1 + i/4)]), range(len(m))) #Normalise m
    return dict(zip(product(BASES, BASES), m))

Esempio n. 2

0

Mostra file

File: marginCallerLib.py Progetto: tmfarrell/ont_dap

def loadHmmSubstitutionMatrix(hmmFile):
    """Load the substitution matrix from an HMM file
    """
    hmm = Hmm.loadHmm(hmmFile)
    m = hmm.emissions[:len(BASES)**2]
    m = map(lambda i: m[i] / sum(m[4 * (i / 4):4 * (1 + i / 4)]),
            range(len(m)))  #Normalise m
    return dict(zip(product(BASES, BASES), m))

Esempio n. 3

0

Mostra file

File: cPecanEmTest.py Progetto: adderan/cPecan

 def testCPecanEmMultipleTrials(self):
     """Runs uns cPecanEm with multiple different trials.
     """
     for seqFile1, seqFile2 in seqFilePairGenerator():
         tempDir = getTempDirectory(rootDir=os.getcwd())
         jobTreeDir = os.path.join(tempDir, "jobTree")
         alignmentsFile = os.path.join(tempDir, "alignments.cigars")
         computeAlignments(seqFile1, seqFile2, alignmentsFile)
         logger.info("Computed alignments for seqs %s and %s" % (seqFile1, seqFile2))
         outputModelFile = os.path.join(tempDir, "outputModel.txt")
         outputModelXMLFile = os.path.join(tempDir, "outputModel.xml")
         outputBlastFile = os.path.join(tempDir, "outputBlast.txt")
         #First run the script to generate a model and do one iteration of EM to 
         #get the likelihood to compare with the final likelihood
         trials=3
         runCPecanEm(sequenceFiles=[ seqFile1, seqFile2 ], 
                      alignmentsFile=alignmentsFile, outputModelFile=outputModelFile, 
                      jobTreeDir=jobTreeDir,
                      trials=trials,
                      outputTrialHmms=True,
                      iterations=5, randomStart=True, logLevel=getLogLevelString(),
                      optionsToRealign="--diagonalExpansion=6 --splitMatrixBiggerThanThis=100",
                      outputXMLModelFile=outputModelXMLFile,
                      blastScoringMatrixFile=outputBlastFile)
         trialHmms = [ Hmm.loadHmm(outputModelFile + ("_%i" % i)) for i in xrange(trials) ]
         hmm = Hmm.loadHmm(outputModelFile)
         node = ET.parse(outputModelXMLFile).getroot()
         logger.info("After multiple trials and iterations of EM the best likelihood found was %s, the likelihoods of the variants were: %s" % 
                     (hmm.likelihood, " ".join(map(lambda x : str(x.likelihood), trialHmms))))
         
         matchProbs, gapOpen, gapExtend = makeBlastScoringMatrix(hmm, ("ACTG",))
         logger.info("Gap open: %s, Gap extend: %s, Match probs %s" % (gapOpen, gapExtend, " ".join(map(str, matchProbs))))
         
         self.assertTrue(float(node.attrib["maxLikelihood"]) == hmm.likelihood)
         
         #Now use the blast file to compute a new matrix
         computeAlignments(seqFile1, seqFile2, alignmentsFile, lastzArguments=("--ambiguous=iupac --scores=%s" % outputBlastFile))
         
         #Run modifyHmm to check it works
         system("cPecanModifyHmm %s %s --gcContent=0.5 --substitutionRate=0.05 --setFlatIndelEmissions" % (outputModelFile, outputModelFile))
         hmm = Hmm.loadHmm(outputModelFile)
         node = ET.parse(outputModelXMLFile).getroot()
         
         system("rm -rf %s" % tempDir)

Esempio n. 4

0

Mostra file

File: cPecanEmTest.py Progetto: ArtRand/cPecan

 def testHMMToBlast(self):
     hmmFile = getTempFile()
     fH = open(hmmFile, 'w')
     ##This is an HMM trained from some nanopore data.
     fH.write("1 0.769849545837 0.192124461785 0.0373796764444 0.000454412327473 0.000191903606847 0.615582885081 0.384417114919 0.0 0.0 0.0 0.360492263331 0.0 0.639507736669 0.0 0.0 0.000837020116237 0.0 0.0 0.999162979884 0.0 0.00263503613846 0.0 0.0 0.0 0.997364963862 -83964693614.2\n")
     fH.write("0.124467347093 0.0510185372341 0.055395149667 0.0165710761929 0.0424721479638 0.107314387884 0.0326918092026 0.0169710192012 0.0512318749092 0.0282356959149 0.112202089573 0.0215204542575 0.0384239342042 0.0479915657848 0.046228721193 0.207264189725 0.0660896084237 0.0593325150728 0.0603492612177 0.0582600197325 0.0584825157865 0.0522874453523 0.0584394811677 0.0575754515175 0.0527867639602 0.0495513728754 0.0503223877237 0.0558605997644 0.0825830058041 0.076482194432 0.0786344471539 0.0829629300156 0.061340281862 0.0603951822769 0.0697104777685 0.0624365718074 0.0515829309891 0.044694507015 0.0563446095066 0.0500495229974 0.0535285782675 0.0498949344494 0.0552268591741 0.0513874548672 0.0834191759666 0.0807522229048 0.088970651067 0.0802660390805 0.0585941736742 0.0666707645145 0.0671013181763 0.0540169346484 0.0546857600483 0.0614356162075 0.0637011684438 0.0493325269862 0.0497656880321 0.0565185406621 0.0574882564972 0.0453586109249 0.0756527827468 0.0866857198132 0.0833713253471 0.0696208132772 0.0489551609873 0.0485086714065 0.055299084522 0.0480626877811 0.0360480113306 0.0352929139202 0.0404027968092 0.0362560298668 0.0504819795621 0.0505241023095 0.0581098587574 0.0526378897109 0.107006368897 0.106127475919 0.11956635493 0.10672061329")
     fH.close()
     hmm = Hmm.loadHmm(hmmFile)
     matchProbs, gapOpen, gapExtend = makeBlastScoringMatrix(hmm, ("TTTGG",))
     writeLastzScoringMatrix(sys.stdout, matchProbs, gapOpen, gapExtend)
     logger.info("Gap open: %s, Gap extend: %s, Match probs %s" % (gapOpen, gapExtend, " ".join(map(str, matchProbs))))

Esempio n. 5

0

Mostra file

File: cPecanEmTest.py Progetto: ArtRand/cPecan

 def testCPecanEm(self):
     """Runs cPecanEm. 
     """
     trial = 0
     for modelType in ("fiveState", "fiveStateAsymmetric", "threeState", "threeStateAsymmetric"):
         for seqFile1, seqFile2 in seqFilePairGenerator():
             tempDir = getTempDirectory(rootDir=os.getcwd())
             jobTreeDir = os.path.join(tempDir, "jobTree")
             alignmentsFile = os.path.join(tempDir, "alignments.cigars")
             computeAlignments(seqFile1, seqFile2, alignmentsFile)
             logger.info("Computed alignments for seqs %s and %s" % (seqFile1, seqFile2))
             outputModelFile = os.path.join(tempDir, "outputModel.txt")
             #First run the script to generate a model and do one iteration of EM to 
             #get the likelihood to compare with the final likelihood
             runCPecanEm(sequenceFiles=[ seqFile1, seqFile2 ], 
                          alignmentsFile=alignmentsFile, outputModelFile=outputModelFile, 
                          modelType=modelType,
                          jobTreeDir=jobTreeDir,
                          iterations=1, trials=1, randomStart=False, logLevel=getLogLevelString(),
                          setJukesCantorStartingEmissions=0.2,
                          #useDefaultModelAsStart=,
                          trainEmissions=True,
                          tieEmissions=True,
                          optionsToRealign="--diagonalExpansion=6 --splitMatrixBiggerThanThis=100")
             hmm = Hmm.loadHmm(outputModelFile)
             system("rm -rf %s" % jobTreeDir) #Cleanup the old jobTree
             logger.info("For trial %s the likelihood after 1 iteration of EM is %s" % (trial, hmm.likelihood))
             iterations = 5
             runCPecanEm(sequenceFiles=[ seqFile1, seqFile2 ], 
                         alignmentsFile=alignmentsFile, outputModelFile=outputModelFile, jobTreeDir=jobTreeDir,
                         optionsToRealign="--diagonalExpansion=6 --splitMatrixBiggerThanThis=100",
                         iterations=iterations, inputModelFile=outputModelFile, logLevel=getLogLevelString(),
                         maxAlignmentLengthPerJob=10000) #, updateTheBand=True)
             hmm2 = Hmm.loadHmm(outputModelFile)
             logger.info("For trial %s the likelihood after a further %s iterations of EM is %s" % (trial, iterations, hmm2.likelihood))
             self.assertTrue(hmm.likelihood < hmm2.likelihood)
             hmm2.normalise()
             logger.info("Final transitions: %s" % " ".join(map(str, hmm2.transitions)))
             logger.info("Final emissions: %s" % " ".join(map(str, hmm2.emissions)))
             system("rm -rf %s" % tempDir)
             trial += 1

Esempio n. 6

0

Mostra file

File: cPecanModifyHmm.py Progetto: adderan/cPecan

def main():
    #Parse the inputs args/options
    parser = OptionParser(usage="usage: inputModel outputModel [options]", 
                          version="%prog 0.1")

    parser.add_option("--substitutionRate", dest="substitutionRate", 
                      help="The probability per base of a difference between \
                      the sequenced reference and the reference the reads are aligned to. \
                      Value must be between 0 and 1.", 
                      default=0.00, type=float)
    
    parser.add_option("--gcContent", dest="gcContent", 
                      help="The desired GC content of the model. \
                      By default no adjustment is made; value must be between 0 and 1.", 
                      default=None, type=float)
    
    parser.add_option("--setFlatIndelEmissions", dest="setFlatIndelEmissions", 
                      help="Set all indel emissions probability to be equal regardless of base.", 
                      default=False, action="store_true")

    #Parse the options/arguments
    options, args = parser.parse_args()
    
    #Print help message if no input
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    #Exit if the arguments are not what we expect
    if len(args) != 2:
        raise RuntimeError("Expected two arguments, got: %s" % " ".join(args))

    #Load HMM
    hmm = Hmm.loadHmm(sys.argv[1])

    #Normalise background emission frequencies, if requested to GC% given
    if options.gcContent != None:
        if options.gcContent < 0 or options.gcContent > 1.0:
            raise RuntimeError("Substitution rate is not a value between 0 and 1, got: %s" % options.gcContent)
        normaliseHmmByReferenceGCContent(hmm, options.gcContent)
    
    #Modify match emissions by proposed substitution rate
    if options.substitutionRate < 0 or options.substitutionRate > 1.0:
        raise RuntimeError("Substitution rate is not a value between 0 and 1, got: %s" % options.substitutionRate)
    modifyHmmEmissionsByExpectedVariationRate(hmm, options.substitutionRate)
    
    if options.setFlatIndelEmissions:
        setHmmIndelEmissionsToBeFlat(hmm)
    
    #Write out HMM
    hmm.write(sys.argv[2])

Esempio n. 7

0

Mostra file

File: cPecanEmTest.py Progetto: pbasting/cactus

 def testHMMToBlast(self):
     hmmFile = getTempFile()
     fH = open(hmmFile, 'w')
     ##This is an HMM trained from some nanopore data.
     fH.write(
         "1 0.769849545837 0.192124461785 0.0373796764444 0.000454412327473 0.000191903606847 0.615582885081 0.384417114919 0.0 0.0 0.0 0.360492263331 0.0 0.639507736669 0.0 0.0 0.000837020116237 0.0 0.0 0.999162979884 0.0 0.00263503613846 0.0 0.0 0.0 0.997364963862 -83964693614.2\n"
     )
     fH.write(
         "0.124467347093 0.0510185372341 0.055395149667 0.0165710761929 0.0424721479638 0.107314387884 0.0326918092026 0.0169710192012 0.0512318749092 0.0282356959149 0.112202089573 0.0215204542575 0.0384239342042 0.0479915657848 0.046228721193 0.207264189725 0.0660896084237 0.0593325150728 0.0603492612177 0.0582600197325 0.0584825157865 0.0522874453523 0.0584394811677 0.0575754515175 0.0527867639602 0.0495513728754 0.0503223877237 0.0558605997644 0.0825830058041 0.076482194432 0.0786344471539 0.0829629300156 0.061340281862 0.0603951822769 0.0697104777685 0.0624365718074 0.0515829309891 0.044694507015 0.0563446095066 0.0500495229974 0.0535285782675 0.0498949344494 0.0552268591741 0.0513874548672 0.0834191759666 0.0807522229048 0.088970651067 0.0802660390805 0.0585941736742 0.0666707645145 0.0671013181763 0.0540169346484 0.0546857600483 0.0614356162075 0.0637011684438 0.0493325269862 0.0497656880321 0.0565185406621 0.0574882564972 0.0453586109249 0.0756527827468 0.0866857198132 0.0833713253471 0.0696208132772 0.0489551609873 0.0485086714065 0.055299084522 0.0480626877811 0.0360480113306 0.0352929139202 0.0404027968092 0.0362560298668 0.0504819795621 0.0505241023095 0.0581098587574 0.0526378897109 0.107006368897 0.106127475919 0.11956635493 0.10672061329"
     )
     fH.close()
     hmm = Hmm.loadHmm(hmmFile)
     matchProbs, gapOpen, gapExtend = makeBlastScoringMatrix(
         hmm, ("TTTGG", ))
     writeLastzScoringMatrix(sys.stdout, matchProbs, gapOpen, gapExtend)
     logger.info("Gap open: %s, Gap extend: %s, Match probs %s" %
                 (gapOpen, gapExtend, " ".join(map(str, matchProbs))))

Esempio n. 8

0

Mostra file

File: marginAlignLib.py Progetto: tmfarrell/ont_dap

def learnModelFromSamFileTargetFn2(target, unnormalisedOutputModel, options):
    hmm = Hmm.loadHmm(unnormalisedOutputModel)
    setHmmIndelEmissionsToBeFlat(hmm)
    #Normalise background emission frequencies, if requested to GC% given
    normaliseHmmByReferenceGCContent(hmm, 0.5)
    hmm.write(options.outputModel)

Esempio n. 9

0

Mostra file

File: cPecanModifyHmm.py Progetto: pbasting/cactus

def main():
    #Parse the inputs args/options
    parser = OptionParser(usage="usage: inputModel outputModel [options]",
                          version="%prog 0.1")

    parser.add_option("--substitutionRate",
                      dest="substitutionRate",
                      help="The probability per base of a difference between \
                      the sequenced reference and the reference the reads are aligned to. \
                      Value must be between 0 and 1.",
                      default=0.00,
                      type=float)

    parser.add_option("--gcContent",
                      dest="gcContent",
                      help="The desired GC content of the model. \
                      By default no adjustment is made; value must be between 0 and 1.",
                      default=None,
                      type=float)

    parser.add_option(
        "--setFlatIndelEmissions",
        dest="setFlatIndelEmissions",
        help=
        "Set all indel emissions probability to be equal regardless of base.",
        default=False,
        action="store_true")

    #Parse the options/arguments
    options, args = parser.parse_args()

    #Print help message if no input
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    #Exit if the arguments are not what we expect
    if len(args) != 2:
        raise RuntimeError("Expected two arguments, got: %s" % " ".join(args))

    #Load HMM
    hmm = Hmm.loadHmm(sys.argv[1])

    #Normalise background emission frequencies, if requested to GC% given
    if options.gcContent != None:
        if options.gcContent < 0 or options.gcContent > 1.0:
            raise RuntimeError(
                "Substitution rate is not a value between 0 and 1, got: %s" %
                options.gcContent)
        normaliseHmmByReferenceGCContent(hmm, options.gcContent)

    #Modify match emissions by proposed substitution rate
    if options.substitutionRate < 0 or options.substitutionRate > 1.0:
        raise RuntimeError(
            "Substitution rate is not a value between 0 and 1, got: %s" %
            options.substitutionRate)
    modifyHmmEmissionsByExpectedVariationRate(hmm, options.substitutionRate)

    if options.setFlatIndelEmissions:
        setHmmIndelEmissionsToBeFlat(hmm)

    #Write out HMM
    hmm.write(sys.argv[2])

Esempio n. 10

0

Mostra file

 def checkHmm(self, hmmFile):
     Hmm.loadHmm(
         hmmFile)  #This performs a bunch of internal consistency checks

Esempio n. 11

0

Mostra file

File: tests.py Progetto: tmfarrell/ont_dap

 def checkHmm(self, hmmFile):
     Hmm.loadHmm(hmmFile) #This performs a bunch of internal consistency checks

Esempio n. 12

0

Mostra file

File: marginAlignLib.py Progetto: jpfeil/marginAlign

def learnModelFromSamFileTargetFn2(target, unnormalisedOutputModel, options):
    hmm = Hmm.loadHmm(unnormalisedOutputModel)
    setHmmIndelEmissionsToBeFlat(hmm)
    #Normalise background emission frequencies, if requested to GC% given
    normaliseHmmByReferenceGCContent(hmm, 0.5)
    hmm.write(options.outputModel)

Esempio n. 13

0

Mostra file

File: cPecanEmTest.py Progetto: pbasting/cactus

    def testCPecanEmMultipleTrials(self):
        """Runs uns cPecanEm with multiple different trials.
        """
        for seqFile1, seqFile2 in seqFilePairGenerator():
            tempDir = getTempDirectory(rootDir=os.getcwd())
            jobTreeDir = os.path.join(tempDir, "jobTree")
            alignmentsFile = os.path.join(tempDir, "alignments.cigars")
            computeAlignments(seqFile1, seqFile2, alignmentsFile)
            logger.info("Computed alignments for seqs %s and %s" %
                        (seqFile1, seqFile2))
            outputModelFile = os.path.join(tempDir, "outputModel.txt")
            outputModelXMLFile = os.path.join(tempDir, "outputModel.xml")
            outputBlastFile = os.path.join(tempDir, "outputBlast.txt")
            #First run the script to generate a model and do one iteration of EM to
            #get the likelihood to compare with the final likelihood
            trials = 3
            runCPecanEm(
                sequenceFiles=[seqFile1, seqFile2],
                alignmentsFile=alignmentsFile,
                outputModelFile=outputModelFile,
                jobTreeDir=jobTreeDir,
                trials=trials,
                outputTrialHmms=True,
                iterations=5,
                randomStart=True,
                logLevel=getLogLevelString(),
                optionsToRealign=
                "--diagonalExpansion=6 --splitMatrixBiggerThanThis=100",
                outputXMLModelFile=outputModelXMLFile,
                blastScoringMatrixFile=outputBlastFile)
            trialHmms = [
                Hmm.loadHmm(outputModelFile + ("_%i" % i))
                for i in xrange(trials)
            ]
            hmm = Hmm.loadHmm(outputModelFile)
            node = ET.parse(outputModelXMLFile).getroot()
            logger.info(
                "After multiple trials and iterations of EM the best likelihood found was %s, the likelihoods of the variants were: %s"
                % (hmm.likelihood, " ".join(
                    map(lambda x: str(x.likelihood), trialHmms))))

            matchProbs, gapOpen, gapExtend = makeBlastScoringMatrix(
                hmm, ("ACTG", ))
            logger.info("Gap open: %s, Gap extend: %s, Match probs %s" %
                        (gapOpen, gapExtend, " ".join(map(str, matchProbs))))

            self.assertTrue(
                float(node.attrib["maxLikelihood"]) == hmm.likelihood)

            #Now use the blast file to compute a new matrix
            computeAlignments(seqFile1,
                              seqFile2,
                              alignmentsFile,
                              lastzArguments=("--ambiguous=iupac --scores=%s" %
                                              outputBlastFile))

            #Run modifyHmm to check it works
            system(
                "cPecanModifyHmm %s %s --gcContent=0.5 --substitutionRate=0.05 --setFlatIndelEmissions"
                % (outputModelFile, outputModelFile))
            hmm = Hmm.loadHmm(outputModelFile)
            node = ET.parse(outputModelXMLFile).getroot()

            system("rm -rf %s" % tempDir)

Esempio n. 14

0

Mostra file

File: cPecanEmTest.py Progetto: pbasting/cactus

 def testCPecanEm(self):
     """Runs cPecanEm. 
     """
     trial = 0
     for modelType in ("fiveState", "fiveStateAsymmetric", "threeState",
                       "threeStateAsymmetric"):
         for seqFile1, seqFile2 in seqFilePairGenerator():
             tempDir = getTempDirectory(rootDir=os.getcwd())
             jobTreeDir = os.path.join(tempDir, "jobTree")
             alignmentsFile = os.path.join(tempDir, "alignments.cigars")
             computeAlignments(seqFile1, seqFile2, alignmentsFile)
             logger.info("Computed alignments for seqs %s and %s" %
                         (seqFile1, seqFile2))
             outputModelFile = os.path.join(tempDir, "outputModel.txt")
             #First run the script to generate a model and do one iteration of EM to
             #get the likelihood to compare with the final likelihood
             runCPecanEm(
                 sequenceFiles=[seqFile1, seqFile2],
                 alignmentsFile=alignmentsFile,
                 outputModelFile=outputModelFile,
                 modelType=modelType,
                 jobTreeDir=jobTreeDir,
                 iterations=1,
                 trials=1,
                 randomStart=False,
                 logLevel=getLogLevelString(),
                 setJukesCantorStartingEmissions=0.2,
                 #useDefaultModelAsStart=,
                 trainEmissions=True,
                 tieEmissions=True,
                 optionsToRealign=
                 "--diagonalExpansion=6 --splitMatrixBiggerThanThis=100")
             hmm = Hmm.loadHmm(outputModelFile)
             system("rm -rf %s" % jobTreeDir)  #Cleanup the old jobTree
             logger.info(
                 "For trial %s the likelihood after 1 iteration of EM is %s"
                 % (trial, hmm.likelihood))
             iterations = 5
             runCPecanEm(
                 sequenceFiles=[seqFile1, seqFile2],
                 alignmentsFile=alignmentsFile,
                 outputModelFile=outputModelFile,
                 jobTreeDir=jobTreeDir,
                 optionsToRealign=
                 "--diagonalExpansion=6 --splitMatrixBiggerThanThis=100",
                 iterations=iterations,
                 inputModelFile=outputModelFile,
                 logLevel=getLogLevelString(),
                 maxAlignmentLengthPerJob=10000)  #, updateTheBand=True)
             hmm2 = Hmm.loadHmm(outputModelFile)
             logger.info(
                 "For trial %s the likelihood after a further %s iterations of EM is %s"
                 % (trial, iterations, hmm2.likelihood))
             self.assertTrue(hmm.likelihood < hmm2.likelihood)
             hmm2.normalise()
             logger.info("Final transitions: %s" %
                         " ".join(map(str, hmm2.transitions)))
             logger.info("Final emissions: %s" %
                         " ".join(map(str, hmm2.emissions)))
             system("rm -rf %s" % tempDir)
             trial += 1