Python RateMatrix.get_jukes_cantor_rate_matrixの例、RateMatrix.get_jukes_cantor_rate_matrix, ContinuousTimeMarkovModel Pythonの例

コード例 #1

0

ファイルを表示

ファイル: 20080123b.py プロジェクト: BIGtigr/xgcode

def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the alignment
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # define the jukes cantor rate matrix
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
        dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix,
                                               ordered_states)
    # simulate the ancestral alignment
    try:
        alignment = PhyLikelihood.simulate_ancestral_alignment(
            tree, alignment, rate_matrix_object)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment string using an ordering defined by the tree
    arr = []
    for node in tree.preorder():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the response
    return '\n'.join(arr) + '\n'

コード例 #2

0

ファイルを表示

ファイル: 20120403a.py プロジェクト: BIGtigr/xgcode

 def __call__(self, X_logs):
     """
     The vth entry of X corresponds to the log rate of the branch above v.
     Return the quantity to be minimized (the neg log likelihood).
     @param X: vector of branch rate logs
     @return: negative log likelihood
     """
     X = [math.exp(x) for x in X_logs]
     B_subs = {}
     for v_parent, v_child in self.R:
         edge = frozenset([v_parent, v_child])
         r = X[v_child]
         t = self.B[edge]
         B_subs[edge] = r * t
     newick_string = FtreeIO.RBN_to_newick(self.R, B_subs, self.N_leaves)
     tree = Newick.parse(newick_string, Newick.NewickTree)
     # define the rate matrix object; horrible
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() 
     ordered_states = list('ACGT') 
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(
             dictionary_rate_matrix, ordered_states, ordered_states)
     rate_matrix_object = RateMatrix.RateMatrix(
             row_major_rate_matrix, ordered_states) 
     # get the log likelihood
     ll = PhyLikelihood.get_log_likelihood(
             tree, self.alignment, rate_matrix_object)
     return -ll

コード例 #3

0

ファイルを表示

ファイル: 20080129b.py プロジェクト: BIGtigr/xgcode

def simulate_branch_path(tree, node):
    """
    Simulate the nucleotide history on the path between a node and its parent.
    This simulated path is conditional on known values at each node.
    Purines are red; pyrimidines are blue.
    A and T are brighter; G and C are darker.
    @param tree: a SpatialTree with simulated nucleotides at each node
    @param node: the node that defines the branch on which to simulate a history
    """
    nucleotide_to_color = {
        'A': 'FF4444',
        'G': 'FF8888',
        'T': '4444FF',
        'C': '8888FF'
    }
    node.branch_color = nucleotide_to_color[node.state]
    rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    initial_state = node.parent.state
    terminal_state = node.state
    states = 'ACGT'
    events = None
    while events is None:
        events = PathSampler.get_nielsen_sample(initial_state, terminal_state,
                                                states, node.blen, rate_matrix)
    parent = node.parent
    last_t = 0
    for t, state in events:
        new = SpatialTree.SpatialTreeNode()
        new.name = node.name
        new.state = state
        new.branch_color = nucleotide_to_color[parent.state]
        tree.insert_node(new, parent, node, (t - last_t) / float(node.blen))
        last_t = t
        parent = new

コード例 #4

0

ファイルを表示

ファイル: 20080123b.py プロジェクト: argriffing/xgcode

def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the alignment
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # define the jukes cantor rate matrix
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states)
    # simulate the ancestral alignment
    try:
        alignment = PhyLikelihood.simulate_ancestral_alignment(
                tree, alignment, rate_matrix_object)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment string using an ordering defined by the tree
    arr = []
    for node in tree.preorder():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the response
    return '\n'.join(arr) + '\n'

コード例 #5

0

ファイルを表示

ファイル: 20080129b.py プロジェクト: argriffing/xgcode

def simulate_branch_path(tree, node):
    """
    Simulate the nucleotide history on the path between a node and its parent.
    This simulated path is conditional on known values at each node.
    Purines are red; pyrimidines are blue.
    A and T are brighter; G and C are darker.
    @param tree: a SpatialTree with simulated nucleotides at each node
    @param node: the node that defines the branch on which to simulate a history
    """
    nucleotide_to_color = {
            'A':'FF4444', 'G':'FF8888', 'T':'4444FF', 'C':'8888FF'}
    node.branch_color = nucleotide_to_color[node.state]
    rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    initial_state = node.parent.state
    terminal_state = node.state
    states = 'ACGT'
    events = None
    while events is None:
        events = PathSampler.get_nielsen_sample(
                initial_state, terminal_state, states, node.blen, rate_matrix)
    parent = node.parent
    last_t = 0
    for t, state in events:
        new = SpatialTree.SpatialTreeNode()
        new.name = node.name
        new.state = state
        new.branch_color = nucleotide_to_color[parent.state]
        tree.insert_node(new, parent, node, (t - last_t) / float(node.blen))
        last_t = t
        parent = new

コード例 #6

0

ファイルを表示

def demo_rejection_sampling():
    path_length = 2
    jukes_cantor_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    states = 'ACGT'
    n = 100000
    nielsen_event_count = 0
    nielsen_path_count = 0
    nielsen_first_time_sum = 0
    nielsen_dwell = dict((c, 0) for c in states)
    rejection_event_count = 0
    rejection_path_count = 0
    rejection_first_time_sum = 0
    rejection_dwell = dict((c, 0) for c in states)
    for i in range(n):
        initial_state = 'A'
        terminal_state = 'C'
        events = get_rejection_sample(initial_state, terminal_state, states, path_length, jukes_cantor_rate_matrix)
        if events is not None:
            assert events
            rejection_path_count += 1
            rejection_event_count += len(events)
            t, state = events[0]
            rejection_first_time_sum += t
            extended = [(0, initial_state)] + events + [(path_length, terminal_state)]
            for (t0, state0), (t1, state1) in zip(extended[:-1], extended[1:]):
                rejection_dwell[state0] += t1 - t0
        events = get_nielsen_sample(initial_state, terminal_state, states, path_length, jukes_cantor_rate_matrix)
        if events is not None:
            assert events
            nielsen_path_count += 1
            nielsen_event_count += len(events)
            t, state = events[0]
            nielsen_first_time_sum += t
            extended = [(0, initial_state)] + events + [(path_length, terminal_state)]
            for (t0, state0), (t1, state1) in zip(extended[:-1], extended[1:]):
                nielsen_dwell[state0] += t1 - t0
    expected_fraction = RateMatrix.get_jukes_cantor_transition_matrix(path_length)[(initial_state, terminal_state)]
    print 'testing the rejection sampling:'
    print 'expected fraction:', expected_fraction
    print 'observed fraction:', rejection_path_count / float(n)
    print 'comparing rejection sampling and nielsen sampling:'
    rejection_method_fraction = rejection_event_count / float(rejection_path_count)
    nielsen_method_fraction = nielsen_event_count / float(nielsen_path_count)
    print 'rejection method fraction:', rejection_method_fraction
    print 'nielsen method fraction:', nielsen_method_fraction
    print 'comparing time of first event:'
    print 'rejection method first event time mean:', rejection_first_time_sum / float(rejection_path_count)
    print 'nielsen method first event time mean:', nielsen_first_time_sum / float(nielsen_path_count)
    print 'comparing the duration spent in each state:'
    print 'rejection:'
    for state, t in rejection_dwell.items():
        print '\t%s: %f' % (state, t/float(rejection_path_count))
    print 'nielsen:'
    for state, t in nielsen_dwell.items():
        print '\t%s: %f' % (state, t/float(nielsen_path_count))

コード例 #7

0

ファイルを表示

ファイル: 20080129b.py プロジェクト: BIGtigr/xgcode

def get_response_content(fs):
    # get a properly formatted newick tree with branch lengths
    tree = Newick.parse(fs.tree, SpatialTree.SpatialTree)
    tree.assert_valid()
    if tree.has_negative_branch_lengths():
        msg = 'drawing a tree with negative branch lengths is not implemented'
        raise HandlingError(msg)
    tree.add_branch_lengths()
    # get the dictionary mapping the branch name to the nucleotide
    name_to_nucleotide = {}
    # parse the column string
    for line in iterutils.stripped_lines(fs.column.splitlines()):
        name_string, nucleotide_string = SnippetUtil.get_state_value_pair(line)
        if nucleotide_string not in list('acgtACGT'):
            msg = '"%s" is not a valid nucleotide' % nucleotide_string
            raise HandlingError(msg)
        nucleotide_string = nucleotide_string.upper()
        if name_string in name_to_nucleotide:
            raise HandlingError('the name "%s" was duplicated' % name_string)
        name_to_nucleotide[name_string] = nucleotide_string
    # augment the tips with the nucleotide letters
    for name, nucleotide in name_to_nucleotide.items():
        try:
            node = tree.get_unique_node(name)
        except Newick.NewickSearchError as e:
            raise HandlingError(e)
        if node.children:
            msg = 'constraints on internal nodes are not implemented'
            raise HandlingError(msg)
        node.state = nucleotide
    # get the Jukes-Cantor rate matrix object
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
        dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix,
                                               ordered_states)
    # simulate the ancestral nucleotides
    rate_matrix_object.simulate_ancestral_states(tree)
    # simulate a path on each branch
    # this breaks up the branch into a linear sequence of nodes and adds color
    for node in tree.gen_non_root_nodes():
        simulate_branch_path(tree, node)
    # do the layout
    EqualArcLayout.do_layout(tree)
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return DrawTreeImage.get_tree_image(tree, (640, 480), ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)

コード例 #8

0

ファイルを表示

ファイル: 20080129b.py プロジェクト: argriffing/xgcode

def get_response_content(fs):
    # get a properly formatted newick tree with branch lengths
    tree = Newick.parse(fs.tree, SpatialTree.SpatialTree)
    tree.assert_valid()
    if tree.has_negative_branch_lengths():
        msg = 'drawing a tree with negative branch lengths is not implemented'
        raise HandlingError(msg)
    tree.add_branch_lengths()
    # get the dictionary mapping the branch name to the nucleotide
    name_to_nucleotide = {}
    # parse the column string
    for line in iterutils.stripped_lines(fs.column.splitlines()):
        name_string, nucleotide_string = SnippetUtil.get_state_value_pair(line)
        if nucleotide_string not in list('acgtACGT'):
            msg = '"%s" is not a valid nucleotide' % nucleotide_string
            raise HandlingError(msg)
        nucleotide_string = nucleotide_string.upper()
        if name_string in name_to_nucleotide:
            raise HandlingError('the name "%s" was duplicated' % name_string)
        name_to_nucleotide[name_string] = nucleotide_string
    # augment the tips with the nucleotide letters
    for name, nucleotide in name_to_nucleotide.items():
        try:
            node = tree.get_unique_node(name)
        except Newick.NewickSearchError as e:
            raise HandlingError(e)
        if node.children:
            msg = 'constraints on internal nodes are not implemented'
            raise HandlingError(msg)
        node.state = nucleotide
    # get the Jukes-Cantor rate matrix object
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states)
    # simulate the ancestral nucleotides
    rate_matrix_object.simulate_ancestral_states(tree)
    # simulate a path on each branch
    # this breaks up the branch into a linear sequence of nodes and adds color
    for node in tree.gen_non_root_nodes():
        simulate_branch_path(tree, node)
    # do the layout
    EqualArcLayout.do_layout(tree)
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return DrawTreeImage.get_tree_image(tree, (640, 480), ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)

コード例 #9

0

ファイルを表示

ファイル: PhyLikelihood.py プロジェクト: BIGtigr/xgcode

 def test_simulation(self):
     tree_string = '(((Human:0.1, Chimpanzee:0.2)to-chimp:0.8, Gorilla:0.3)to-gorilla:0.7, Orangutan:0.4, Gibbon:0.5)all;'
     # Parse the example tree.
     tree = Newick.parse(tree_string, Newick.NewickTree)
     tree.assert_valid()
     # Get header and sequence pairs.
     alignment = Fasta.Alignment(StringIO(Fasta.brown_example_alignment))
     # Get the Jukes-Cantor rate matrix object.
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
     ordered_states = list('ACGT')
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(dictionary_rate_matrix, ordered_states, ordered_states)
     rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states)
     # Simulate ancestral states.
     simulated_alignment = simulate_ancestral_alignment(tree, alignment, rate_matrix_object)

コード例 #10

0

ファイルを表示

ファイル: 20080828a.py プロジェクト: BIGtigr/xgcode

 def gen_distance_matrices(self, count, max_steps):
     """
     Yield (ordered sequence list, distance matrix) pairs .
     The generator will stop if it sees that it cannot meet its goal
     in the allotted number of steps.
     @param count: the requested number of distance matrices
     @param max_steps: an upper bound on the allowed number of steps
     """
     # define the jukes cantor rate matrix
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
     ordered_states = list('ACGT')
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(
         dictionary_rate_matrix, ordered_states, ordered_states)
     model = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states)
     # record the requested number of samples
     self.requested_matrix_count = count
     # do some rejection sampling
     while True:
         if self.get_complexity() >= max_steps:
             break
         if self.accepted_sample_count >= count:
             break
         # simulate an alignment from the tree
         alignment = PhyLikelihood.simulate_alignment(
             self.tree, model, self.sequence_length)
         # extract the ordered list of sequences from the alignment object
         name_to_sequence = dict(zip(alignment.headers,
                                     alignment.sequences))
         sequence_list = [
             name_to_sequence[name] for name in self.ordered_names
         ]
         # get the estimated distance matrix
         distance_matrix = JC69.get_ML_distance_matrix(sequence_list)
         # look for degeneracies
         has_zero_off_diagonal = False
         has_inf_off_diagonal = False
         for i, row in enumerate(distance_matrix):
             for j, value in enumerate(row):
                 if i != j:
                     if value == 0.0:
                         has_zero_off_diagonal = True
                     if value == float('inf'):
                         has_inf_off_diagonal = True
         if has_zero_off_diagonal:
             self.rejected_zero_sample_count += 1
         elif has_inf_off_diagonal:
             self.rejected_inf_sample_count += 1
         else:
             self.accepted_sample_count += 1
             yield sequence_list, distance_matrix

コード例 #11

0

ファイルを表示

ファイル: PhyLikelihood.py プロジェクト: BIGtigr/xgcode

 def test_likelihood(self):
     # Parse the example tree.
     tree_string = Newick.brown_example_tree
     tree = Newick.parse(tree_string, Newick.NewickTree)
     tree.assert_valid()
     # Get header and sequence pairs.
     alignment = Fasta.Alignment(StringIO(Fasta.brown_example_alignment))
     # Get the Jukes-Cantor rate matrix object.
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
     ordered_states = list('ACGT')
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(dictionary_rate_matrix, ordered_states, ordered_states)
     rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states)
     # Calculate the log likelihood.
     log_likelihood = get_log_likelihood(tree, alignment, rate_matrix_object)
     self.assertAlmostEqual(log_likelihood, -4146.26547208)

コード例 #12

0

ファイルを表示

 def test_jukes_cantor_rejection(self):
     path_length = 1
     jukes_cantor_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
     states = 'ACGT'
     n = 200
     observed = 0
     for i in range(n):
         events = get_rejection_sample('A', 'C', states, path_length, jukes_cantor_rate_matrix)
         if events is not None:
             observed += 1
     p = RateMatrix.get_jukes_cantor_transition_matrix(path_length)[('A', 'C')]
     expected = n*p
     variance = n*p*(1-p)
     errstr = 'observed: %f  expected: %f' % (observed, expected)
     self.failUnless(abs(observed - expected) < 3*math.sqrt(variance), errstr)

コード例 #13

0

ファイルを表示

ファイル: 20080828a.py プロジェクト: argriffing/xgcode

 def gen_distance_matrices(self, count, max_steps):
     """
     Yield (ordered sequence list, distance matrix) pairs .
     The generator will stop if it sees that it cannot meet its goal
     in the allotted number of steps.
     @param count: the requested number of distance matrices
     @param max_steps: an upper bound on the allowed number of steps
     """
     # define the jukes cantor rate matrix
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
     ordered_states = list('ACGT')
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(
             dictionary_rate_matrix, ordered_states, ordered_states)
     model = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states)
     # record the requested number of samples
     self.requested_matrix_count = count
     # do some rejection sampling
     while True:
         if self.get_complexity() >= max_steps:
             break
         if self.accepted_sample_count >= count:
             break
         # simulate an alignment from the tree
         alignment = PhyLikelihood.simulate_alignment(
                 self.tree, model, self.sequence_length)
         # extract the ordered list of sequences from the alignment object
         name_to_sequence = dict(zip(alignment.headers, alignment.sequences))
         sequence_list = [name_to_sequence[name]
                 for name in self.ordered_names]
         # get the estimated distance matrix
         distance_matrix = JC69.get_ML_distance_matrix(sequence_list)
         # look for degeneracies
         has_zero_off_diagonal = False
         has_inf_off_diagonal = False
         for i, row in enumerate(distance_matrix):
             for j, value in enumerate(row):
                 if i != j:
                     if value == 0.0:
                         has_zero_off_diagonal = True
                     if value == float('inf'):
                         has_inf_off_diagonal = True
         if has_zero_off_diagonal:
             self.rejected_zero_sample_count += 1
         elif has_inf_off_diagonal:
             self.rejected_inf_sample_count += 1
         else:
             self.accepted_sample_count += 1
             yield sequence_list, distance_matrix

コード例 #14

0

ファイルを表示

ファイル: 20080122a.py プロジェクト: BIGtigr/xgcode

def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the alignment
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # get the log likelihood
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states)
    log_likelihood = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # return the response
    return str(log_likelihood) + '\n'

コード例 #15

0

ファイルを表示

ファイル: 20120403a.py プロジェクト: BIGtigr/xgcode

def get_response_content(fs):
    # init the response and get the user variables
    out = StringIO()
    nleaves = fs.nleaves
    nvertices = nleaves * 2 - 1
    nbranches = nvertices - 1
    nsites = fs.nsites
    # sample the coalescent tree with timelike branch lengths
    R, B = kingman.sample(fs.nleaves)
    r = Ftree.R_to_root(R)
    # get the leaf vertex names
    N = dict(zip(range(nleaves), string.uppercase[:nleaves]))
    N_leaves = dict(N)
    # get the internal vertex names
    v_to_leaves = R_to_v_to_leaves(R)
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            N[v] = ''.join(sorted(N[leaf] for leaf in leaves))
    # get vertex ages
    v_to_age = kingman.RB_to_v_to_age(R, B)
    # sample the rates on the branches
    b_to_rate = sample_b_to_rate(R)
    xycorr = get_correlation(R, b_to_rate)
    # define B_subs in terms of substitutions instead of time
    B_subs = dict((p, t * b_to_rate[p]) for p, t in B.items())
    # sample the alignment
    v_to_seq = sample_v_to_seq(R, B_subs, nsites)
    # get the log likelihood; this is kind of horrible
    pairs = [(N[v], ''.join(v_to_seq[v])) for v in range(nleaves)]
    headers, sequences = zip(*pairs)
    alignment = Fasta.create_alignment(headers, sequences)
    newick_string = FtreeIO.RBN_to_newick(R, B_subs, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() 
    ordered_states = list('ACGT') 
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states) 
    ll = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are all 1.0
    newick_string = FtreeIO.RBN_to_newick(R, B, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    ll_unity = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are numerically optimized
    # TODO incorporate the result into the xml file
    # TODO speed up the likelihood evaluation (beagle? C module?)
    #f = Opt(R, B, N_leaves, alignment)
    #X_logs = [0.0] * nbranches
    #result = scipy.optimize.fmin(f, X_logs, full_output=True)
    #print result
    #
    print >> out, '<?xml version="1.0"?>'
    print >> out, '<beast>'
    print >> out
    print >> out, '<!-- actual rate autocorrelation', xycorr, '-->'
    print >> out, '<!-- actual root height', v_to_age[r], '-->'
    print >> out, '<!-- actual log likelihood', ll, '-->'
    print >> out, '<!-- ll if rates were unity', ll_unity, '-->'
    print >> out
    print >> out, '<!--'
    print >> out, 'predefine the taxa as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_leaf_taxon_defn(list(string.uppercase[:nleaves]))
    print >> out
    print >> out, '<!--'
    print >> out, 'define the alignment as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_alignment_defn(leaves, N, v_to_seq)
    print >> out
    print >> out, '<!--'
    print >> out, 'specify the starting tree as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, get_starting_tree_defn(R, B, N_leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'connect the tree model as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, g_tree_model_defn
    print >> out
    print >> out, g_uncorrelated_relaxed_clock_info
    print >> out
    """
    print >> out, '<!--'
    print >> out, 'create a list of taxa for which to constrain the mrca as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_subset_defn(N, v, leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'create a tmrcaStatistic that will record the height as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_stat_defn(N[v])
    """
    print >> out
    print >> out, g_likelihood_info
    print >> out
    print >> out, '<!--'
    print >> out, 'run the mcmc'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    print >> out, get_mcmc_defn(v_to_leaves, v_to_age, N)
    print >> out
    print >> out, '</beast>'
    # return the response
    return out.getvalue()