Python RateEstimation.getQMatrix Examples

Programming Language: Python

Namespace/Package Name: CGAT

Class/Type: RateEstimation

Method/Function: getQMatrix

Examples at hotexamples.com: 4

Python RateEstimation.getQMatrix - 4 examples found. These are the top rated real world Python examples of CGAT.RateEstimation.getQMatrix extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

countSubstitutions(2)

getDistanceGTR(2)

getQMatrix(2)

getRateMatrix(2)

evaluateCodonPair(1)

setFrequencies(1)

Example #1

Show file

def processMali(mali, options):

    ncols = mali.getNumColumns()

    if ncols == 0:
        raise "refusing to process empty alignment."

    ## add annotation of states
    if options.block_size != None:
        if options.block_size < 1:
            size = int(float(ncols) / 3.0 * options.block_size) * 3
        else:
            size = int(options.block_size) * 3

        size = min(size, ncols)
        mali.addAnnotation("STATE", "N" * size + "C" * (ncols - size))

    ## remove gene ids
    for id in mali.getIdentifiers():
        if options.separator in id:
            species = id.split(options.separator)[0]
            mali.rename(id, species)

    map_new2old = mali.mapIdentifiers()
    map_old2new = IOTools.getInvertedDictionary(map_new2old, make_unique=True)

    ids = mali.getIdentifiers()
    xgram = XGram.XGram()

    if options.xrate_min_increment:
        xgram.setMinIncrement(options.xrate_min_increment)

    ninput, noutput, nskipped = 0, 0, 0

    # remove empty columns and masked columns
    if options.clean_mali:
        mali.mGapChars = mali.mGapChars + ("n", "N")
        mali.removeGaps(minimum_gaps=1, frame=3)

    if options.input_filename_tree:
        nexus = TreeTools.Newick2Nexus(open(options.input_filename_tree, "r"))
        tree = nexus.trees[0]
        tree.relabel(map_old2new)
    else:
        tree = None

    annotation = mali.getAnnotation("STATE")
    chars = set(list(annotation))
    for c in chars:
        assert c in (
            "N", "C"), "unknown annotation %s: only 'N' and 'C' are recognized"
    if len(chars) == 1:
        if options.loglevel >= 1:
            options.stdlog.write("# WARNING: only a single block")
        blocks = (("B0_", chars[0]), )
    else:
        blocks = (("B0_", "N"), ("B1_", "C"))

    result, mali, ids = prepareGrammar(xgram, mali, tree, map_old2new, blocks,
                                       options)

    trained_model = result.getModel()

    pis, matrices = RateEstimation.getRateMatrix(trained_model)

    annotation = mali.getAnnotation("STATE")

    for block, code in blocks:

        terminals = ("%sCOD0" % block, "%sCOD1" % block, "%sCOD2" % block)

        pi = pis[terminals]

        if options.shared_rates == "all":
            rate_prefix_rs = ""
            rate_prefix_rn = ""
            rate_prefix_ri = ""
            rate_prefix_rv = ""
        elif options.shared_rates == "kappa":
            rate_prefix_rs = block
            rate_prefix_rn = block
            rate_prefix_ri = ""
            rate_prefix_rv = ""
        elif options.shared_rates == "kappa-ds":
            rate_prefix_rs = ""
            rate_prefix_rn = block
            rate_prefix_ri = ""
            rate_prefix_rv = ""
        elif options.shared_rates == "omega":
            rate_prefix_rs = ""
            rate_prefix_rn = ""
            rate_prefix_ri = block
            rate_prefix_rv = block
        elif options.shared_rates == "omega-ds":
            rate_prefix_rs = ""
            rate_prefix_rn = ""
            rate_prefix_ri = block
            rate_prefix_rv = ""
        elif options.shared_rates == "ds":
            rate_prefix_rs = ""
            rate_prefix_rn = block
            rate_prefix_ri = block
            rate_prefix_rv = block
        else:
            rate_prefix_rs = block
            rate_prefix_rn = block
            rate_prefix_ri = block
            rate_prefix_rv = block

        if options.shared_frequencies:
            frequency_prefix = ""
        else:
            frequency_prefix = block

        rs = trained_model.mGrammar.getParameter('%sRs' % rate_prefix_rs)
        rn = trained_model.mGrammar.getParameter('%sRn' % rate_prefix_rn)
        ri = trained_model.mGrammar.getParameter('%sRi' % rate_prefix_ri)
        rv = trained_model.mGrammar.getParameter('%sRv' % rate_prefix_rv)

        nchars = annotation.count(code)

        msg = "iter=%i Rs=%6.4f Rn=%6.4f Ri=%6.4f Rv=%6.4f" % (
            result.getNumIterations(), rs, rn, ri, rv)

        try:
            Q, t = RateEstimation.getQMatrix(pi,
                                             Rsi=rs * ri,
                                             Rsv=rs * rv,
                                             Rni=rn * ri,
                                             Rnv=rn * rv)
            avg_omega = (rs + rn) / 2.0
            Q0, t0 = RateEstimation.getQMatrix(pi,
                                               Rsi=ri * avg_omega,
                                               Rsv=rv * avg_omega,
                                               Rni=ri * avg_omega,
                                               Rnv=rv * avg_omega)

            avg_kappa = (ri + rv) / 2.0
            Q1, t1 = RateEstimation.getQMatrix(pi,
                                               Rsi=rs * avg_kappa,
                                               Rsv=rs * avg_kappa,
                                               Rni=rn * avg_kappa,
                                               Rnv=rn * avg_kappa)

            rI, rV, rS, rN = RateEstimation.countSubstitutions(pi, Q)
            rI0, rV0, rS0, rN0 = RateEstimation.countSubstitutions(pi, Q0)
            rI1, rV1, rS1, rN1 = RateEstimation.countSubstitutions(pi, Q1)

            dS = rS / (3 * rS0) * t
            dN = rN / (3 * rN0) * t

            o_kappa = options.value_format % (rI / rI0 * rV0 / rV)
            o_omega = options.value_format % (dN / dS)

            o_dn = options.value_format % dN
            o_ds = options.value_format % dS
            o_rn = options.value_format % rN
            o_rs = options.value_format % rS
            o_rn0 = options.value_format % rN0
            o_rs0 = options.value_format % rS0
            o_t = options.value_format % t
            o_t0 = options.value_format % t0

        except ZeroDivisionError:

            o_kappa = "na"
            o_omega = "na"
            o_dn = "na"
            o_ds = "na"
            o_rn = "na"
            o_rs = "na"
            o_rn0 = "na"
            o_rs0 = "na"
            o_t = "na"
            o_t0 = "na"
            Q = None
            msg = "insufficient data to estimate rate matrix."

        options.stdout.write("\t".join(
            map(str, (code, block, o_dn, o_ds, o_omega, "na", "na", "na", "na",
                      o_kappa, result.getLogLikelihood(), "na", nchars))))

        if options.with_rho:
            options.stdout.write(
                "\t" +
                "\t".join(map(str, (o_rn, o_rs, o_t, o_rn0, o_rs0, o_t0))))

        options.stdout.write("\t%s\n" % msg)

Example #2

Show file

File: mali2kaks.py Project: Charlie-George/cgat

def outputXRateResult(mali, result, rsi, rsv, rni, rnv, msg):
    """output the results of running the Xrate four parameter grammar.
    """
    ids = mali.getIdentifiers()

    pi, matrix = RateEstimation.getRateMatrix(result.getModel(),
                                              terminals=('COD0', 'COD1', 'COD2'))

    if rsi is None:
        o_dn, o_ds, o_omega = "na", "na", "na"
        o_rn, o_rn0, o_rs, o_rs0 = "na", "na", "na", "na"
        o_t, o_t0 = "na", "na"
        o_N, o_S = "na", "na"
        o_kappa = "na",
        msg = "estimated rate parameters are zero"
    else:
        Q, t = RateEstimation.getQMatrix(pi,
                                         Rsi=rsi,
                                         Rsv=rsv,
                                         Rni=rni,
                                         Rnv=rnv)

        # get rate matrix as if omega was set to 1
        Q0, t0 = RateEstimation.getQMatrix(pi,
                                           Rsi=(rsi + rni) / 2.0,
                                           Rsv = (rsv + rnv) / 2.0,
                                           Rni = (rsi + rni) / 2.0,
                                           Rnv = (rsv + rnv) / 2.0)

        # get rate matrix as if kappa was set to 1
        Q1, t1 = RateEstimation.getQMatrix(pi,
                                           Rsi=(rsi + rsv) / 2.0,
                                           Rsv = (rsi + rsv) / 2.0,
                                           Rni = (rni + rnv) / 2.0,
                                           Rnv = (rni + rnv) / 2.0)

        rI, rV, rS, rN = RateEstimation.countSubstitutions(pi, Q)
        rI0, rV0, rS0, rN0 = RateEstimation.countSubstitutions(pi, Q0)
        rI1, rV1, rS1, rN1 = RateEstimation.countSubstitutions(pi, Q1)

        # 64.0/61.0 results from the fact that xrate does not normalize
        # the terminals
        dS = rS / (3 * rS0) * t
        dN = rN / (3 * rN0) * t

        o_omega = options.value_format % (dN / dS)
        o_dn = options.value_format % dN
        o_ds = options.value_format % dS
        o_rn = options.value_format % rN
        o_rs = options.value_format % rS
        o_rn0 = options.value_format % rN0
        o_rs0 = options.value_format % rS0
        o_t = options.value_format % t
        o_t0 = options.value_format % t0
        o_S = options.value_format % (mali.getNumColumns() * rS0)
        o_N = options.value_format % (mali.getNumColumns() * rN0)

        # kappa is given normalized by sites like omega
        o_kappa = options.value_format % (rI / rI1 * rV1 / rV)

        # kappa1 is given by the ratio of the rates NOT normalized by the
        # sites.
        msg += " rI/rV=%f rI0/rV0=%f kappa1=%s" % (rI / rV,
                                                   rI0 / rV0,
                                                   options.value_format % ((rsi + rni) / (rsv + rnv)))

    options.stdout.write("\t".join(map(str, (mali.getEntry(ids[0]).mId,
                                             mali.getEntry(ids[1]).mId,
                                             o_dn, o_ds, o_omega,
                                             o_N, o_S, "na", "na",
                                             o_kappa, result.getLogLikelihood(
    ),
        "na"))))

    if options.with_rho:
        options.stdout.write("\t" + "\t".join(map(str, (o_rn, o_rs, o_t,
                                                        o_rn0, o_rs0, o_t0))))

    if options.with_counts:
        info = Genomics.CalculatePairIndices(mali[ids[0]], mali[ids[1]])
        options.stdout.write("\t%s" % (str(info)))

    options.stdout.write("\t%s\n" % msg)
    options.stdout.flush()

Example #3

Show file

def outputXRateResult(mali, result, rsi, rsv, rni, rnv, msg):
    """output the results of running the Xrate four parameter grammar.
    """
    ids = mali.getIdentifiers()

    pi, matrix = RateEstimation.getRateMatrix(result.getModel(),
                                              terminals=('COD0', 'COD1',
                                                         'COD2'))

    if rsi == None:
        o_dn, o_ds, o_omega = "na", "na", "na"
        o_rn, o_rn0, o_rs, o_rs0 = "na", "na", "na", "na"
        o_t, o_t0 = "na", "na"
        o_N, o_S = "na", "na"
        o_kappa = "na",
        msg = "estimated rate parameters are zero"
    else:
        Q, t = RateEstimation.getQMatrix(pi,
                                         Rsi=rsi,
                                         Rsv=rsv,
                                         Rni=rni,
                                         Rnv=rnv)

        ## get rate matrix as if omega was set to 1
        Q0, t0 = RateEstimation.getQMatrix(pi,
                                           Rsi=(rsi + rni) / 2.0,
                                           Rsv=(rsv + rnv) / 2.0,
                                           Rni=(rsi + rni) / 2.0,
                                           Rnv=(rsv + rnv) / 2.0)

        ## get rate matrix as if kappa was set to 1
        Q1, t1 = RateEstimation.getQMatrix(pi,
                                           Rsi=(rsi + rsv) / 2.0,
                                           Rsv=(rsi + rsv) / 2.0,
                                           Rni=(rni + rnv) / 2.0,
                                           Rnv=(rni + rnv) / 2.0)

        rI, rV, rS, rN = RateEstimation.countSubstitutions(pi, Q)
        rI0, rV0, rS0, rN0 = RateEstimation.countSubstitutions(pi, Q0)
        rI1, rV1, rS1, rN1 = RateEstimation.countSubstitutions(pi, Q1)

        # 64.0/61.0 results from the fact that xrate does not normalize
        # the terminals
        dS = rS / (3 * rS0) * t
        dN = rN / (3 * rN0) * t

        o_omega = options.value_format % (dN / dS)
        o_dn = options.value_format % dN
        o_ds = options.value_format % dS
        o_rn = options.value_format % rN
        o_rs = options.value_format % rS
        o_rn0 = options.value_format % rN0
        o_rs0 = options.value_format % rS0
        o_t = options.value_format % t
        o_t0 = options.value_format % t0
        o_S = options.value_format % (mali.getNumColumns() * rS0)
        o_N = options.value_format % (mali.getNumColumns() * rN0)

        ## kappa is given normalized by sites like omega
        o_kappa = options.value_format % (rI / rI1 * rV1 / rV)

        ## kappa1 is given by the ratio of the rates NOT normalized by the sites.
        msg += " rI/rV=%f rI0/rV0=%f kappa1=%s" % (rI / rV, rI0 / rV0,
                                                   options.value_format %
                                                   ((rsi + rni) / (rsv + rnv)))

    options.stdout.write("\t".join(
        map(str, (mali.getEntry(ids[0]).mId, mali.getEntry(
            ids[1]).mId, o_dn, o_ds, o_omega, o_N, o_S, "na", "na", o_kappa,
                  result.getLogLikelihood(), "na"))))

    if options.with_rho:
        options.stdout.write(
            "\t" + "\t".join(map(str, (o_rn, o_rs, o_t, o_rn0, o_rs0, o_t0))))

    if options.with_counts:
        info = Genomics.CalculatePairIndices(mali[ids[0]], mali[ids[1]])
        options.stdout.write("\t%s" % (str(info)))

    options.stdout.write("\t%s\n" % msg)
    options.stdout.flush()

Example #4

Show file

File: xrate_tms.py Project: BioinformaticsArchive/cgat

def processMali( mali, options ):

    ncols = mali.getNumColumns()

    if ncols == 0:
        raise "refusing to process empty alignment."

    ## add annotation of states
    if options.block_size != None:
        if options.block_size < 1:
            size = int( float( ncols ) / 3.0 * options.block_size) * 3
        else:
            size = int( options.block_size ) * 3
        
        size = min( size, ncols )
        mali.addAnnotation( "STATE", "N" * size + "C" * (ncols - size))
            
    ## remove gene ids
    for id in mali.getIdentifiers():
        if options.separator in id:
            species = id.split(options.separator)[0]
            mali.rename( id, species )

    map_new2old = mali.mapIdentifiers()
    map_old2new = IOTools.getInvertedDictionary( map_new2old, make_unique = True )
    
    ids = mali.getIdentifiers()
    xgram = XGram.XGram()

    if options.xrate_min_increment:
        xgram.setMinIncrement( options.xrate_min_increment )

    ninput, noutput, nskipped = 0, 0, 0

    # remove empty columns and masked columns
    if options.clean_mali:
        mali.mGapChars = mali.mGapChars + ("n", "N")
        mali.removeGaps( minimum_gaps = 1, frame=3 )

    if options.input_filename_tree:
        nexus = TreeTools.Newick2Nexus( open(options.input_filename_tree,"r") )
        tree = nexus.trees[0]
        tree.relabel( map_old2new )
    else:
        tree = None

    annotation = mali.getAnnotation( "STATE" )
    chars = set(list(annotation))
    for c in chars:
        assert c in ("N", "C"), "unknown annotation %s: only 'N' and 'C' are recognized"
    if len(chars) == 1:
        if options.loglevel >= 1:
            options.stdlog.write("# WARNING: only a single block" )
        blocks = ( ("B0_", chars[0]), )
    else:
        blocks = ( ("B0_", "N"), 
                   ("B1_", "C") )
    
    result, mali, ids = prepareGrammar( xgram, mali, tree, map_old2new, blocks, options )

    trained_model = result.getModel()

    pis, matrices = RateEstimation.getRateMatrix( trained_model )

    annotation = mali.getAnnotation( "STATE" )

    for block, code in blocks :

        terminals = ( "%sCOD0" % block,
                      "%sCOD1" % block,
                      "%sCOD2" % block )
        
        pi = pis[terminals]

        if options.shared_rates == "all":
            rate_prefix_rs = ""
            rate_prefix_rn = ""
            rate_prefix_ri = ""
            rate_prefix_rv = ""
        elif options.shared_rates == "kappa":
            rate_prefix_rs = block
            rate_prefix_rn = block
            rate_prefix_ri = ""
            rate_prefix_rv = ""
        elif options.shared_rates == "kappa-ds":
            rate_prefix_rs = ""
            rate_prefix_rn = block
            rate_prefix_ri = ""
            rate_prefix_rv = ""
        elif options.shared_rates == "omega":
            rate_prefix_rs = ""
            rate_prefix_rn = ""
            rate_prefix_ri = block
            rate_prefix_rv = block
        elif options.shared_rates == "omega-ds":
            rate_prefix_rs = ""
            rate_prefix_rn = ""
            rate_prefix_ri = block
            rate_prefix_rv = ""
        elif options.shared_rates == "ds":
            rate_prefix_rs = ""
            rate_prefix_rn = block
            rate_prefix_ri = block
            rate_prefix_rv = block
        else:
            rate_prefix_rs = block
            rate_prefix_rn = block
            rate_prefix_ri = block
            rate_prefix_rv = block
        
        if options.shared_frequencies:
            frequency_prefix = ""
        else:
            frequency_prefix = block

        rs = trained_model.mGrammar.getParameter( '%sRs' % rate_prefix_rs )
        rn = trained_model.mGrammar.getParameter( '%sRn' % rate_prefix_rn )
        ri = trained_model.mGrammar.getParameter( '%sRi' % rate_prefix_ri )
        rv = trained_model.mGrammar.getParameter( '%sRv' % rate_prefix_rv )    

        nchars = annotation.count( code )

        msg = "iter=%i Rs=%6.4f Rn=%6.4f Ri=%6.4f Rv=%6.4f" % ( result.getNumIterations(), rs, rn, ri, rv )
        
        try:
            Q, t = RateEstimation.getQMatrix( pi,
                                              Rsi=rs * ri,
                                              Rsv=rs * rv,
                                              Rni=rn * ri,
                                              Rnv=rn * rv )
            avg_omega = (rs + rn) / 2.0
            Q0, t0 = RateEstimation.getQMatrix( pi,
                                                Rsi = ri * avg_omega,
                                                Rsv = rv * avg_omega,
                                                Rni = ri * avg_omega,
                                                Rnv = rv * avg_omega )

            avg_kappa = (ri + rv) / 2.0
            Q1, t1 = RateEstimation.getQMatrix( pi,
                                                Rsi = rs * avg_kappa,
                                                Rsv = rs * avg_kappa,
                                                Rni = rn * avg_kappa,
                                                Rnv = rn * avg_kappa )

            rI, rV, rS, rN = RateEstimation.countSubstitutions( pi, Q )
            rI0, rV0, rS0, rN0 = RateEstimation.countSubstitutions( pi, Q0 )    
            rI1, rV1, rS1, rN1 = RateEstimation.countSubstitutions( pi, Q1 )    

            dS = rS / (3 * rS0) * t
            dN = rN / (3 * rN0) * t

            o_kappa = options.value_format % ( rI / rI0 * rV0 / rV )
            o_omega = options.value_format % (dN / dS)

            o_dn = options.value_format % dN
            o_ds = options.value_format % dS
            o_rn = options.value_format % rN
            o_rs = options.value_format % rS
            o_rn0 = options.value_format % rN0
            o_rs0 = options.value_format % rS0
            o_t = options.value_format % t
            o_t0 = options.value_format % t0

        except ZeroDivisionError:

            o_kappa = "na"
            o_omega = "na"
            o_dn = "na"
            o_ds = "na"
            o_rn = "na"
            o_rs = "na"
            o_rn0 = "na"
            o_rs0 = "na"
            o_t = "na"
            o_t0 = "na"
            Q = None
            msg = "insufficient data to estimate rate matrix."
        
        options.stdout.write( "\t".join( map(str, (
                        code, block,
                        o_dn, o_ds, o_omega,
                        "na", "na", "na", "na",
                        o_kappa, 
                        result.getLogLikelihood(),
                        "na",
                        nchars ))))

        if options.with_rho:
            options.stdout.write( "\t" + "\t".join( map(str, (o_rn, o_rs, o_t,
                                                              o_rn0, o_rs0, o_t0 ))))
            
        options.stdout.write( "\t%s\n" %  msg )