예제 #1
0
def printPairs(pairs, mali, map_new2old, options):
    """print pairs form codeml."""
    noutput = 0
    for pair in pairs:
        options.stdout.write("\t".join(
            map(str, (mali.getEntry(pair.mName2).mId, mali.getEntry(
                pair.mName1).mId, pair.mKa, pair.mKs, pair.mKaks, pair.mN,
                      pair.mS, "na", "na", pair.mKappa, pair.mLogLikelihood,
                      pair.mTau))))

        if options.with_rho:
            options.stdout.write("\t" + "\t".join(
                map(str, (pair.mRn, pair.mRs, pair.mBranchLength, pair.mRn0,
                          pair.mRs0, "na"))))

        if options.with_counts:
            info = Genomics.CalculatePairIndices(mali[pair.mName1],
                                                 mali[pair.mName2])
            options.stdout.write("\t%s" % (str(info)))

        options.stdout.write("\t" + pair.mError + "\n")
        options.stdout.flush()
        noutput += 1

    return noutput
예제 #2
0
def printPair(pair, mali, map_new2old, options, msg=""):
    """print pairs form codeml."""

    ids = mali.getIdentifiers()
    if options.output_format == "list":
        options.stdout.write("\t".join(
            (map_new2old[ids[0]], map_new2old[ids[1]],
             options.format % pair.mDistanceMatrix[ids[0]][ids[1]],
             options.format % pair.mLogLikelihood,
             printValue(pair.mAlpha,
                        options.format), printValue(pair.mKappa,
                                                    options.format), msg)))

    elif options.output_format == "tree":
        options.stdout.write(">pair%i" % (noutput + 1))
        options.stdout.write("%s\n" % pair.mTree)

    if options.with_counts:
        info = Genomics.CalculatePairIndices(mali[ids[1]],
                                             mali[ids[0]],
                                             with_codons=options.is_codons)
        options.stdout.write("\t%s" % (str(info)))

    options.stdout.write("\n")

    return 1
예제 #3
0
def outputXRateResult(mali, result, rsi, rsv, rni, rnv, msg):
    """output the results of running the Xrate four parameter grammar.
    """
    ids = mali.getIdentifiers()

    pi, matrix = RateEstimation.getRateMatrix(result.getModel(),
                                              terminals=('COD0', 'COD1',
                                                         'COD2'))

    if rsi == None:
        o_dn, o_ds, o_omega = "na", "na", "na"
        o_rn, o_rn0, o_rs, o_rs0 = "na", "na", "na", "na"
        o_t, o_t0 = "na", "na"
        o_N, o_S = "na", "na"
        o_kappa = "na",
        msg = "estimated rate parameters are zero"
    else:
        Q, t = RateEstimation.getQMatrix(pi,
                                         Rsi=rsi,
                                         Rsv=rsv,
                                         Rni=rni,
                                         Rnv=rnv)

        ## get rate matrix as if omega was set to 1
        Q0, t0 = RateEstimation.getQMatrix(pi,
                                           Rsi=(rsi + rni) / 2.0,
                                           Rsv=(rsv + rnv) / 2.0,
                                           Rni=(rsi + rni) / 2.0,
                                           Rnv=(rsv + rnv) / 2.0)

        ## get rate matrix as if kappa was set to 1
        Q1, t1 = RateEstimation.getQMatrix(pi,
                                           Rsi=(rsi + rsv) / 2.0,
                                           Rsv=(rsi + rsv) / 2.0,
                                           Rni=(rni + rnv) / 2.0,
                                           Rnv=(rni + rnv) / 2.0)

        rI, rV, rS, rN = RateEstimation.countSubstitutions(pi, Q)
        rI0, rV0, rS0, rN0 = RateEstimation.countSubstitutions(pi, Q0)
        rI1, rV1, rS1, rN1 = RateEstimation.countSubstitutions(pi, Q1)

        # 64.0/61.0 results from the fact that xrate does not normalize
        # the terminals
        dS = rS / (3 * rS0) * t
        dN = rN / (3 * rN0) * t

        o_omega = options.value_format % (dN / dS)
        o_dn = options.value_format % dN
        o_ds = options.value_format % dS
        o_rn = options.value_format % rN
        o_rs = options.value_format % rS
        o_rn0 = options.value_format % rN0
        o_rs0 = options.value_format % rS0
        o_t = options.value_format % t
        o_t0 = options.value_format % t0
        o_S = options.value_format % (mali.getNumColumns() * rS0)
        o_N = options.value_format % (mali.getNumColumns() * rN0)

        ## kappa is given normalized by sites like omega
        o_kappa = options.value_format % (rI / rI1 * rV1 / rV)

        ## kappa1 is given by the ratio of the rates NOT normalized by the sites.
        msg += " rI/rV=%f rI0/rV0=%f kappa1=%s" % (rI / rV, rI0 / rV0,
                                                   options.value_format %
                                                   ((rsi + rni) / (rsv + rnv)))

    options.stdout.write("\t".join(
        map(str, (mali.getEntry(ids[0]).mId, mali.getEntry(
            ids[1]).mId, o_dn, o_ds, o_omega, o_N, o_S, "na", "na", o_kappa,
                  result.getLogLikelihood(), "na"))))

    if options.with_rho:
        options.stdout.write(
            "\t" + "\t".join(map(str, (o_rn, o_rs, o_t, o_rn0, o_rs0, o_t0))))

    if options.with_counts:
        info = Genomics.CalculatePairIndices(mali[ids[0]], mali[ids[1]])
        options.stdout.write("\t%s" % (str(info)))

    options.stdout.write("\t%s\n" % msg)
    options.stdout.flush()
예제 #4
0
def runXrateSN(xgram, mali, options):
    """run xrate using Ians sn.eg grammar."""

    result, mali, ids = prepareGrammar(xgram, mali, options)
    trained_model = result.getModel()

    pi, matrix = evaluateGrammar(trained_model)

    def getQMatrix(pi, k, s, n):
        """build a q matrix.

        Diagonal elements are set to the negative of the row sums.
        The matrix is normalized such that trace of the matrix is -1.
        """

        codons = Bio.Data.CodonTable.standard_dna_table.forward_table.keys()

        Q = initializeQMatrix(codons)

        trace = 0.0
        for codon_i in codons:
            row_sum = 0.0
            for codon_j in codons:
                if codon_i == codon_j: continue

                is_single, is_synonymous, is_transition = RateEstimation.evaluateCodonPair(
                    codon_i, codon_j)

                if not is_single: continue

                if is_synonymous:
                    if is_transition:
                        v = s
                    else:
                        v = s * k
                else:
                    if is_transition:
                        v = n
                    else:
                        v = n * k

                v *= pi[codon_j]
                Q[codon_i][codon_j] = v
                row_sum += v

            Q[codon_i][codon_i] = -row_sum
            trace += pi[codon_i] * row_sum

        for codon_i in codons:
            for codon_j in codons:
                Q[codon_i][codon_j] /= trace

        return Q, trace

    s = trained_model.mGrammar.getParameter('s')
    n = trained_model.mGrammar.getParameter('n')
    k = trained_model.mGrammar.getParameter('k')
    not_k = trained_model.mGrammar.getParameter('not_k')

    Q, t = getQMatrix(pi, k, s, n)
    Q0, t0 = getQMatrix(pi, k, 1, 1)

    ri, rv, rS, rN = countSubstitutions(pi, Q)
    ri0, rv0, rS0, rN0 = countSubstitutions(pi, Q0)

    kappa = ri / rv
    dS = rS / (3 * rS0) * t
    dN = rN / (3 * rN0) * t

    if s == None or n == None:
        o_dn, o_ds, o_omega = "na", "na", "na"
        o_rn, o_rn0, o_rs, o_rs0 = "na", "na", "na", "na"
        o_t, o_t0 = "na", "na"
        o_kappa = "na",
        msg = "estimated rate parameters are zero"
    else:
        o_omega = options.value_format % (n / s)
        o_dn = options.value_format % dN
        o_ds = options.value_format % dS
        o_rn = options.value_format % rN
        o_rs = options.value_format % rS
        o_rn0 = options.value_format % rN0
        o_rs0 = options.value_format % rS0
        o_t = options.value_format % t
        o_t0 = options.value_format % t0
        o_kappa = options.value_format % kappa
        msg = "iter=%i s=%6.4f n=%6.4f k=%6.4f ~k=%6.4f" % (
            result.getNumIterations(), s, n, k, not_k)

    options.stdout.write("\t".join(
        map(str, (mali.getEntry(ids[0]).mId, mali.getEntry(
            ids[1]).mId, o_dn, o_ds, o_omega, "na", "na", "na", "na", o_kappa,
                  result.getLogLikelihood(), "na", o_rn, o_rs, o_t, o_rn0,
                  o_rs0, o_t0))))

    if options.with_counts:
        info = Genomics.CalculatePairIndices(mali[ids[0]], mali[ids[1]])
        options.stdout.write("\t%s" % (str(info)))

    options.stdout.write("\t%s\n" % msg)
예제 #5
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: fasta2distances.py 2781 2009-09-10 11:33:14Z andreas $")

    parser.add_option("--filters", dest="filters", type="string",
                      help="Filters to use for filtering sequences [all|codon1|codon2|codon3|d4].")
    parser.add_option("--fields", dest="fields", type="string",
                      help="Fields to output [aligned|nunaligned1|nunaligned2|identical|transitions|transversions|jc69|t92].")

    parser.set_defaults(
        filename_map=None,
        filters="all,codon1,codon2,codon3,d4",
        gap_char="-",
        fields="aligned,unaligned1,unaligned2,identical,transitions,transversions,jc69,t92",
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    options.filters = options.filters.split(",")
    options.fields = options.fields.split(",")

    iterator = FastaIterator.FastaIterator(options.stdin)

    headers = ["id1", "id2"]
    for f in options.filters:
        headers += list(["%s_%s" % (f, x) for x in options.fields])

    options.stdout.write("\t".join(headers) + "\n")

    while 1:
        try:
            cur_record = next(iterator)
            if cur_record is None:
                break
            first_record = cur_record
            cur_record = next(iterator)
            if cur_record is None:
                break
            second_record = cur_record

        except StopIteration:
            break

        if len(first_record.sequence) != len(second_record.sequence):
            raise "sequences %s and %s of unequal length" % (
                first_record.title, second_record.title)

        if len(first_record.sequence) % 3 != 0:
            raise "sequence %s not multiple of 3" % first_record.title

        # old: Bio.Alphabet.IUPAC.extended_dna.letters
        alphabet = "ACGT" + options.gap_char

        result = []
        for f in options.filters:

            s1, s2 = FilterAlignedPairForPositions(first_record.sequence,
                                                   second_record.sequence,
                                                   f)

            info = Genomics.CalculatePairIndices(s1, s2, options.gap_char)

            for field in options.fields:

                if field == "aligned":
                    c = "%i" % info.mNAligned
                elif field == "unaligned1":
                    c = "%i" % info.mNUnaligned1
                elif field == "unaligned2":
                    c = "%i" % info.mNUnaligned2
                elif field == "transversions":
                    c = "%i" % info.mNTransversions
                elif field == "transitions":
                    c = "%i" % info.mNTransitions
                elif field == "identical":
                    c = "%i" % info.mNIdentical
                elif field == "jc69":
                    try:
                        c = "%6.4f" % CalculateDistanceJC69(info)[0]
                    except ValueError:
                        c = "nan"
                elif field == "t92":
                    try:
                        c = "%6.4f" % CalculateDistanceT92(info)[0]
                    except ValueError:
                        c = "nan"
                else:
                    raise "Unknown field %s" % field

                result.append(c)

        options.stdout.write("%s\t%s\t%s\n" % (first_record.title,
                                               second_record.title,
                                               "\t".join(result)))

    E.Stop()
예제 #6
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: mali2rates.py 2781 2009-09-10 11:33:14Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-i",
                      "--input-format",
                      dest="input_format",
                      type="choice",
                      choices=("plain", "fasta", "clustal", "stockholm",
                               "phylip"),
                      help="input format of multiple alignment")

    parser.add_option(
        "-s",
        "--sites",
        dest="sites",
        type="string",
        help="sites to use [default=%default].",
    )

    parser.add_option(
        "-f",
        "--file",
        dest="filename",
        type="string",
        help="filename of multiple alignment (- for stdin) [default=%default].",
        metavar="FILE")

    parser.add_option("-o",
                      "--format",
                      dest="format",
                      type="string",
                      help="format [default=%default].",
                      metavar="format")

    parser.add_option(
        "-d",
        "--distance",
        dest="distance",
        type="choice",
        choices=("PID", "T92", "JC69", "POVL", "F84", "LogDet", "K80", "F81",
                 "HKY85", "TN93", "REV", "UNREST", "REVU", "UNRESTU", "JTT",
                 "PMB", "PAM", "Kimura", "CategoriesModel"),
        help="method to use for distance calculation [default=%default].")

    parser.add_option("--method",
                      dest="method",
                      type="choice",
                      choices=("phylip", "baseml", "own", "xrate"),
                      help="program to use for rate calculation.")

    parser.add_option("--output-format",
                      dest="output_format",
                      type="choice",
                      choices=("list", "tree"),
                      help="output format.")

    parser.add_option(
        "-m",
        "--min-sites",
        dest="min_sites",
        type="int",
        help="minimum number of sites for output[default=%default].",
    )

    parser.add_option(
        "-a",
        "--alphabet",
        dest="alphabet",
        type="choice",
        choices=("aa", "na", "auto"),
        help="alphabet to use.",
    )

    parser.add_option("-t",
                      "--filename-tree",
                      dest="filename_tree",
                      type="string",
                      help="filename with tree information.")

    parser.add_option("--set-alpha",
                      dest="alpha",
                      type="float",
                      help="initial alpha value.")

    parser.add_option("--fix-alpha",
                      dest="fix_alpha",
                      action="store_true",
                      help="do not estimate alpha.")

    parser.add_option("--set-kappa",
                      dest="kappa",
                      type="float",
                      help="initial kappa value.")

    parser.add_option("--fix-kappa",
                      dest="fix_kappa",
                      action="store_true",
                      help="do not estimate kappa.")

    parser.add_option("--dump",
                      dest="dump",
                      action="store_true",
                      help="dump output.")

    parser.add_option("--test",
                      dest="test",
                      action="store_true",
                      help="test run - does not clean up.")

    parser.add_option("--pairwise",
                      dest="pairwise",
                      action="store_true",
                      help="force pairwise comparison.")

    parser.add_option(
        "--set-clean-data",
        dest="clean_data",
        type="choice",
        choices=("0", "1"),
        help=
        "PAML should cleanup data:  0=only gaps within pair are removed, 1=columns in the mali with gaps are removed."
    )

    parser.add_option(
        "--with-counts",
        dest="with_counts",
        action="store_true",
        help=
        "output counts of aligned positions, transitions and transversions.")

    parser.add_option("-w",
                      "--write",
                      dest="write",
                      type="choice",
                      action="append",
                      choices=("input", "trained", "all"),
                      help="output sections to write for xrate.")

    parser.add_option("--output-pattern",
                      dest="output_pattern",
                      type="string",
                      help="output pattern for output files.")

    parser.add_option("--xrate-min-increment",
                      dest="xrate_min_increment",
                      type=float,
                      help="minimum increment to stop iteration in xrate.")

    parser.set_defaults(
        input_format="fasta",
        filename_tree=None,
        with_counts=False,
        sites="d4",
        distance="T92",
        min_sites=1,
        filename="-",
        alphabet="auto",
        format="%6.4f",
        method="phylip",
        kappa=None,
        fix_kappa=False,
        alpha=None,
        fix_alpha=False,
        dump=False,
        clean_data=None,
        output_format="list",
        iteration="all-vs-all",
        pairwise=False,
        report_step=1000,
        output_pattern="%s.eg",
        write=[],
        test_xrate=False,
        xrate_min_increment=None,
        is_codons=False,
    )

    (options, args) = E.Start(parser)

    if options.filename != "-":
        infile = open(options.filename, "r")
    else:
        infile = sys.stdin

    # read multiple alignment
    if options.pairwise:
        # read sequences, but not as a multiple alignment. This permits
        # multiple names.
        mali = Mali.SequenceCollection()
        options.iteration = "pairwise"
    else:
        mali = Mali.Mali()

    mali.readFromFile(infile, format=options.input_format)

    ids = mali.getIdentifiers()

    if options.alphabet == "auto":
        s = "".join(map(lambda x: x.mString, mali.values())).lower()
        ss = re.sub("[acgtxn]", "", s)
        if float(len(ss)) < (len(s) * 0.1):
            options.alphabet = "na"
            if mali.getNumColumns() % 3 == 0:
                options.is_codons = True
        else:
            options.alphabet = "aa"

        if options.loglevel >= 1:
            options.stdlog.write("# autodetected alphabet: %s\n" %
                                 options.alphabet)

    if options.filename != "-":
        infile.close()

    npairs = 0
    nskipped_length = 0
    nskipped_distance = 0

    pairs = []
    if options.iteration == "all-vs-all":
        for x in range(len(ids) - 1):
            for y in range(x + 1, len(ids)):
                pairs.append((x, y))
    elif options.iteration == "first-vs-all":
        for y in range(1, len(ids)):
            pairs.append((0, y))
    elif options.iteration == "pairwise":
        if len(ids) % 2 != 0:
            raise "uneven number of sequences (%i) not compatible with --iteration=pairwise" % len(
                ids)
        for x in range(0, len(ids), 2):
            pairs.append((x, x + 1))

    if options.alphabet == "na":

        if options.method == "baseml":
            runBaseML(mali, pairs, options)
        elif options.method == "phylip" and options.distance in ("F84", "K80",
                                                                 "JC69",
                                                                 "LogDet"):
            runDNADIST(mali, pairs, options)
        elif options.method == "xrate":
            runXrate(mali, pairs, options)
        else:
            if options.is_codons:
                h = Genomics.SequencePairInfoCodons().getHeader()
            else:
                h = Genomics.SequencePairInfo().getHeader()
            options.stdout.write("seq1\tseq2\tdist\tvar\t%s\n" % (h))

            for x, y in pairs:
                id_x = ids[x]
                npairs += 1

                id_y = ids[y]

                info = Genomics.CalculatePairIndices(
                    mali[id_x], mali[id_y], with_codons=options.is_codons)

                if options.distance in ("T92", "JC69"):
                    if options.sites == "d4":
                        seq1, seq2 = Genomics.GetDegenerateSites(mali[id_x],
                                                                 mali[id_y],
                                                                 position=3,
                                                                 degeneracy=4)

                        if len(seq1) < options.min_sites:
                            nskipped_length += 1
                            continue
                    else:
                        raise "unknown sites %s" % options.sites

                if options.distance == "T92":
                    distance, variance = CalculateDistanceT92(info)
                elif options.distance == "JC69":
                    distance, variance = CalculateDistanceJC69(info)
                elif options.distance == "PID":
                    distance, variance = CalculateDistancePID(
                        mali[id_x], mali[id_y])
                elif options.distance == "POVL":
                    distance, variance = CalculateDistancePOVL(
                        mali[id_x], mali[id_y])

                if distance >= 0:
                    options.stdout.write("\t".join(
                        map(str, (id_x, id_y, options.format % distance,
                                  options.format % variance, info))) + "\n")
                else:
                    nskipped_distance += 1

    elif options.alphabet == "aa":

        if options.distance in ("JTT", "PMB", "PAM", "Kimura",
                                "CategoriesModel"):

            # use phylip for these
            phylip = WrapperPhylip.Phylip()
            phylip.setProgram("protdist")
            phylip.setMali(mali)

            phylip_options = []
            if options.distance == "PMG":
                phylip_options += ["D"] * 1
            elif options.distance == "PAM":
                phylip_options += ["D"] * 2
            elif options.distance == "Kimura":
                phylip_options += ["D"] * 3
            elif options.distance == "CategoriesModel":
                phylip_options += ["D"] * 4

            phylip_options.append("Y")
            phylip.setOptions(phylip_options)
            result = phylip.run()

            writePhylipResult(result, options)

        else:
            options.stdout.write("id1\tid2\tdist\tvar\n")

            # iterate over all pairs of sequences
            for x, y in pairs:
                id_x = ids[x]
                npairs += 1

                id_y = ids[y]

                if options.distance == "PID":
                    distance, variance = CalculateDistancePID(
                        mali[id_x], mali[id_y])
                elif options.distance == "POVL":
                    # percentage overlap
                    distance, variance = CalculateDistancePOVL(
                        mali[id_x], mali[id_y])

                if distance >= 0:
                    options.stdout.write("\t".join(
                        (id_x, id_y, options.format % distance,
                         options.format % variance)) + "\n")
                else:
                    nskipped_distance += 1

    if options.loglevel >= 1:
        options.stdlog.write(
            "# nseqs=%i, npairs=%i, nskipped_length=%i, nskipped_distance=%i\n"
            % (len(ids), npairs, nskipped_length, nskipped_distance))

    E.Stop()
예제 #7
0
def runXrate(mali, pairs, options):

    from XGram.Generator.Prebuilt import DNA
    from XGram.Model import Annotation
    import XGram.Run

    xgram = XGram.XGram()
    if options.xrate_min_increment:
        xgram.setMinIncrement(options.xrate_min_increment)

    ninput, noutput, nskipped = 0, 0, 0

    tempdir = tempfile.mkdtemp()
    data = tempdir + "/data"

    if options.distance == "K80":
        model = DNA.buildModel(substitution_model="k80")
    elif options.distance == "JC69":
        model = DNA.buildModel(substitution_model="jc69")
    elif options.distance == "REV":
        model = DNA.buildModel(substitution_model="gtr")
    else:
        raise "distance %s not implemented for xrate" % (options.distance)

    writeModel(model, "input", options)

    if options.output_format == "list":
        options.stdout.write("\t".join(
            ("seq1", "seq2", "distance", "lnL", "alpha", "kappa", "msg")))

        if options.with_counts:
            options.stdout.write("\t%s" %
                                 Genomics.SequencePairInfo().getHeader())
        options.stdout.write("\n")

    for x, y in pairs:

        m1 = mali.getSequence(ids[x])
        ninput += 1
        temp_mali = Mali.Mali()
        m2 = mali.getSequence(ids[y])

        temp_mali.addSequence(m1.mId, m1.mFrom, m1.mTo, m1.mString)
        temp_mali.addSequence(m2.mId, m2.mFrom, m2.mTo, m2.mString)

        # if temp_mali.getWidth() < options.min_overlap:
        # if options.loglevel >= 1:
        # options.stdlog.write("# pair %s-%s: not computed because only %i residues overlap\n" % (mali.getEntry(ids[x]).mId,
        # mali.getEntry(ids[y]).mId,
        # temp_mali.getWidth()) )

        ##             nskipped += 1
        # continue

        outfile = open(data, "w")
        temp_mali.writeToFile(outfile,
                              format="stockholm",
                              write_ranges=False,
                              options=("#=GF NH (%s:1.0)%s;" %
                                       tuple(temp_mali.getIdentifiers()), ))
        outfile.close()

        o_alpha, o_kappa = "na", "na"
        o_distance = "na"
        msg = ""

        if options.test_xrate:
            for alpha in (0.1, 0.5, 1.0, 1.5):
                for beta in (0.1, 0.5, 1.0, 1.5):
                    model.mGrammar.setParameter("alpha", alpha)
                    model.mGrammar.setParameter("beta", beta)
                    result = xgram.train(model, data)
                    trained_model = result.getModel()
                    xalpha, xbeta = \
                        (trained_model.mGrammar.getParameter('alpha'),
                         trained_model.mGrammar.getParameter('beta'))
                    # this assumes that the branch length in the input is normalized to 1
                    # this is the normalization constant
                    o_distance = options.format % (2 * xbeta + xalpha)
                    o_kappa = options.format % (xalpha / xbeta)

                    msg = "alpha=%6.4f, beta=%6.4f" % (xalpha, xbeta)

                    options.stdout.write("\t".join(
                        ("%f" % alpha, "%f" % beta, o_distance,
                         options.format % result.getLogLikelihood(), o_alpha,
                         o_kappa, msg)))
                    options.stdout.write("\n")
            continue

        options.stdout.write("%s\t%s\t" % (m1.mId, m2.mId))

        if options.distance in ("K80", ):
            result = xgram.train(model, data)
            trained_model = result.getModel()

        elif options.distance in ("REV", ):
            result = xgram.train(model, data)
            trained_model = result.getModel()
            alpha, beta, gamma, delta, epsilon, theta = \
                (trained_model.mGrammar.getParameter('alpha'),
                 trained_model.mGrammar.getParameter('beta'),
                 trained_model.mGrammar.getParameter('gamma'),
                 trained_model.mGrammar.getParameter('delta'),
                 trained_model.mGrammar.getParameter('epsilon'),
                 trained_model.mGrammar.getParameter('theta'))

            pi = trained_model.evaluateTerminalFrequencies(('A0', ))[('A0', )]
            matrix = trained_model.evaluateRateMatrix(('A0', ))[('A0', )]
            q, d = RateEstimation.getDistanceGTR(pi, matrix)
            o_distance = options.format % (d)
            o_kappa = ""
            msg = "alpha=%6.4f, beta=%6.4f, gamma=%6.4f, delta=%6.4f, epsilon=%6.4f, theta=%6.4f" % (
                alpha, beta, gamma, delta, epsilon, theta)

        elif options.distance in ('JC69', ):
            result = xgram.buildTree(model, data)

        if options.distance == "K80":
            alpha, beta = \
                (trained_model.mGrammar.getParameter('alpha'),
                    trained_model.mGrammar.getParameter('beta'))
            # this assumes that the branch length in the input is normalized to 1
            # this is the normalization constant
            o_distance = options.format % (2 * beta + alpha)
            o_kappa = options.format % (alpha / beta)

            msg = "alpha=%6.4f, beta=%6.4f" % (alpha, beta)
            alpha = "na"

        elif options.distance == "JC69":

            tree = result.getTree()
            # multiply distance by tree, as rates are set to 1 and
            # thus the matrix is scaled by a factor of 3
            o_distance = options.format % (
                3.0 * float(re.search("\(\S+:([0-9.]+)\)", tree).groups()[0]))
            o_kappa = "na"
            msg = ""

        writeModel(result.mModel, "trained", options)

        options.stdout.write("\t".join(
            (o_distance, options.format % result.getLogLikelihood(), o_alpha,
             o_kappa, msg)))

        if options.with_counts:
            info = Genomics.CalculatePairIndices(mali[ids[x]],
                                                 mali[ids[y]],
                                                 with_codons=options.is_codons)
            options.stdout.write("\t%s" % (str(info)))

        options.stdout.write("\n")

    shutil.rmtree(tempdir)