def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the mixture weights weights = [fs.weight_a, fs.weight_b, fs.weight_c] # get the matrices matrices = [fs.matrix_a, fs.matrix_b, fs.matrix_c] for R in matrices: if R.shape != (4, 4): msg = 'expected each nucleotide rate matrix to be 4x4' raise HandlingError(msg) # get the nucleotide alignment try: alignment = Fasta.Alignment(fs.alignment.splitlines()) alignment.force_nucleotide() except Fasta.AlignmentError as e: raise HandlingError(e) # create the mixture proportions weight_sum = sum(weights) mixture_proportions = [weight / weight_sum for weight in weights] # create the rate matrix objects ordered_states = list('ACGT') rate_matrix_objects = [] for R in matrices: rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states) rate_matrix_objects.append(rate_matrix_object) # create the mixture model mixture_model = SubModel.MixtureModel(mixture_proportions, rate_matrix_objects) # normalize the mixture model mixture_model.normalize() # return the html string return do_analysis(mixture_model, alignment, tree) + '\n'
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the alignment try: alignment = Fasta.Alignment(fs.fasta.splitlines()) alignment.force_nucleotide() except Fasta.AlignmentError as e: raise HandlingError(e) # define the jukes cantor rate matrix dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states) # simulate the ancestral alignment try: alignment = PhyLikelihood.simulate_ancestral_alignment( tree, alignment, rate_matrix_object) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the alignment string using an ordering defined by the tree arr = [] for node in tree.preorder(): arr.append(alignment.get_fasta_sequence(node.name)) # return the response return '\n'.join(arr) + '\n'
def __call__(self, X_logs): """ The vth entry of X corresponds to the log rate of the branch above v. Return the quantity to be minimized (the neg log likelihood). @param X: vector of branch rate logs @return: negative log likelihood """ X = [math.exp(x) for x in X_logs] B_subs = {} for v_parent, v_child in self.R: edge = frozenset([v_parent, v_child]) r = X[v_child] t = self.B[edge] B_subs[edge] = r * t newick_string = FtreeIO.RBN_to_newick(self.R, B_subs, self.N_leaves) tree = Newick.parse(newick_string, Newick.NewickTree) # define the rate matrix object; horrible dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix( row_major_rate_matrix, ordered_states) # get the log likelihood ll = PhyLikelihood.get_log_likelihood( tree, self.alignment, rate_matrix_object) return -ll
def get_response_content(fs): # read the alignment try: alignment = Fasta.Alignment(fs.fasta.splitlines()) except Fasta.AlignmentError as e: raise HandlingError('fasta alignment error: ' + str(e)) if alignment.get_sequence_count() != 2: raise HandlingError('expected a sequence pair') # read the rate matrix R = fs.matrix # read the ordered states ordered_states = Util.get_stripped_lines(fs.states.splitlines()) if len(ordered_states) != len(R): msg_a = 'the number of ordered states must be the same ' msg_b = 'as the number of rows in the rate matrix' raise HandlingError(msg_a + msg_b) if len(set(ordered_states)) != len(ordered_states): raise HandlingError('the ordered states must be unique') # create the rate matrix object using the ordered states rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states) # create the objective function objective = Objective(alignment.sequences, rate_matrix_object) # Use golden section search to find the mle distance. # The bracket is just a suggestion. bracket = (0.51, 2.01) mle_distance = optimize.golden(objective, brack=bracket) # write the response out = StringIO() print >> out, 'maximum likelihood distance:', mle_distance #distances = (mle_distance, 0.2, 2.0, 20.0) #for distance in distances: #print >> out, 'f(%s): %s' % (distance, objective(distance)) return out.getvalue()
def create_rate_matrix(distribution, kappa, f): """ The parameter f does not affect the stationary distribution. @param distribution: a dictionary mapping a nucleotide to its frequency @param kappa: the transition / transversion substitution rate ratio @param f: a WAG-like parameter between zero and one @return: a nucleotide rate matrix object """ assert len(distribution) == 4 assert set(distribution) == set('ACGT') assert abs(sum(distribution.values()) - 1.0) < .0000001 # Create the off-diagonal elements of the unscaled rate matrix. rate_matrix = {} for na, pa in distribution.items(): for nb, pb in distribution.items(): if na != nb: if f == 1: rate = pb else: rate = (pb**f) / (pa**(1-f)) if na+nb in ('AG', 'GA', 'CT', 'TC'): rate *= kappa rate_matrix[(na, nb)] = rate # Create the diagonal elements # such that each row in the rate matrix sums to zero. for na in distribution: rate = sum(rate_matrix[(na, nb)] for nb in distribution if nb != na) rate_matrix[(na, na)] = -rate # Convert the dictionary rate matrix to a row major rate matrix ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix( row_major_rate_matrix, ordered_states) return rate_matrix_object
def get_response_content(fs): # read the matrix from the form data R = fs.matrix n = len(R) # convert the row major rate matrix to a rate matrix object arbitrary_states = [str(x) for x in range(n)] rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), arbitrary_states) rate_matrix_object.normalize() normalized_row_major = rate_matrix_object.get_row_major_rate_matrix() # return the rate matrix return MatrixUtil.m_to_string(normalized_row_major) + '\n'
def get_response_content(fs): # read the matrix from the form data R = fs.matrix # get the expected rate states = range(len(R)) try: rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), states) expected_rate = rate_matrix_object.get_expected_rate() except RateMatrix.RateMatrixError as e: raise HandlingError('error calculating the expected rate: ' + str(e)) # return the response return str(expected_rate) + '\n'
def get_response_content(fs): # get a properly formatted newick tree with branch lengths tree = Newick.parse(fs.tree, SpatialTree.SpatialTree) tree.assert_valid() if tree.has_negative_branch_lengths(): msg = 'drawing a tree with negative branch lengths is not implemented' raise HandlingError(msg) tree.add_branch_lengths() # get the dictionary mapping the branch name to the nucleotide name_to_nucleotide = {} # parse the column string for line in iterutils.stripped_lines(fs.column.splitlines()): name_string, nucleotide_string = SnippetUtil.get_state_value_pair(line) if nucleotide_string not in list('acgtACGT'): msg = '"%s" is not a valid nucleotide' % nucleotide_string raise HandlingError(msg) nucleotide_string = nucleotide_string.upper() if name_string in name_to_nucleotide: raise HandlingError('the name "%s" was duplicated' % name_string) name_to_nucleotide[name_string] = nucleotide_string # augment the tips with the nucleotide letters for name, nucleotide in name_to_nucleotide.items(): try: node = tree.get_unique_node(name) except Newick.NewickSearchError as e: raise HandlingError(e) if node.children: msg = 'constraints on internal nodes are not implemented' raise HandlingError(msg) node.state = nucleotide # get the Jukes-Cantor rate matrix object dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states) # simulate the ancestral nucleotides rate_matrix_object.simulate_ancestral_states(tree) # simulate a path on each branch # this breaks up the branch into a linear sequence of nodes and adds color for node in tree.gen_non_root_nodes(): simulate_branch_path(tree, node) # do the layout EqualArcLayout.do_layout(tree) # draw the image try: ext = Form.g_imageformat_to_ext[fs.imageformat] return DrawTreeImage.get_tree_image(tree, (640, 480), ext) except CairoUtil.CairoUtilError as e: raise HandlingError(e)
def test_simulation(self): tree_string = '(((Human:0.1, Chimpanzee:0.2)to-chimp:0.8, Gorilla:0.3)to-gorilla:0.7, Orangutan:0.4, Gibbon:0.5)all;' # Parse the example tree. tree = Newick.parse(tree_string, Newick.NewickTree) tree.assert_valid() # Get header and sequence pairs. alignment = Fasta.Alignment(StringIO(Fasta.brown_example_alignment)) # Get the Jukes-Cantor rate matrix object. dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major(dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states) # Simulate ancestral states. simulated_alignment = simulate_ancestral_alignment(tree, alignment, rate_matrix_object)
def gen_distance_matrices(self, count, max_steps): """ Yield (ordered sequence list, distance matrix) pairs . The generator will stop if it sees that it cannot meet its goal in the allotted number of steps. @param count: the requested number of distance matrices @param max_steps: an upper bound on the allowed number of steps """ # define the jukes cantor rate matrix dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) model = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states) # record the requested number of samples self.requested_matrix_count = count # do some rejection sampling while True: if self.get_complexity() >= max_steps: break if self.accepted_sample_count >= count: break # simulate an alignment from the tree alignment = PhyLikelihood.simulate_alignment( self.tree, model, self.sequence_length) # extract the ordered list of sequences from the alignment object name_to_sequence = dict(zip(alignment.headers, alignment.sequences)) sequence_list = [ name_to_sequence[name] for name in self.ordered_names ] # get the estimated distance matrix distance_matrix = JC69.get_ML_distance_matrix(sequence_list) # look for degeneracies has_zero_off_diagonal = False has_inf_off_diagonal = False for i, row in enumerate(distance_matrix): for j, value in enumerate(row): if i != j: if value == 0.0: has_zero_off_diagonal = True if value == float('inf'): has_inf_off_diagonal = True if has_zero_off_diagonal: self.rejected_zero_sample_count += 1 elif has_inf_off_diagonal: self.rejected_inf_sample_count += 1 else: self.accepted_sample_count += 1 yield sequence_list, distance_matrix
def test_likelihood(self): # Parse the example tree. tree_string = Newick.brown_example_tree tree = Newick.parse(tree_string, Newick.NewickTree) tree.assert_valid() # Get header and sequence pairs. alignment = Fasta.Alignment(StringIO(Fasta.brown_example_alignment)) # Get the Jukes-Cantor rate matrix object. dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major(dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states) # Calculate the log likelihood. log_likelihood = get_log_likelihood(tree, alignment, rate_matrix_object) self.assertAlmostEqual(log_likelihood, -4146.26547208)
def create_rate_matrix(kappa, nt_distribution): """ @param kappa: adjusts for the transition rate differing from the transversion rate @param nt_distribution: ordered ACGT nucleotide probabilities @return: a rate matrix object with one expected nucleotide substitution per time unit """ # make some assertions about the distribution for p in nt_distribution: assert p >= 0 assert len(nt_distribution) == 4 assert RateMatrix.almost_equal(sum(nt_distribution), 1.0) # define some intermediate variables A, C, G, T = nt_distribution R = float(A + G) Y = float(C + T) # make some more assertions about the distribution and about kappa assert A + G > 0 assert C + T > 0 assert kappa > max(-Y, -R) # get the normalization constant normalization_constant = 4 * T * C * (1 + kappa / Y) + 4 * A * G * ( 1 + kappa / R) + 4 * Y * R # adjust the normalization constant to correct what might be an error in the paper normalization_constant /= 2 # define the dictionary rate matrix dict_rate_matrix = {} for source_index, source in enumerate('ACGT'): for sink_index, sink in enumerate('ACGT'): key = (source, sink) coefficient = 1.0 if key in g_transitions: coefficient = 1 + kappa / (nt_distribution[source_index] + nt_distribution[sink_index]) dict_rate_matrix[key] = coefficient * nt_distribution[ sink_index] / normalization_constant for source in 'ACGT': dict_rate_matrix[(source, source)] = -sum(dict_rate_matrix[(source, sink)] for sink in 'ACGT' if source != sink) # convert the dictionary rate matrix to a row major rate matrix row_major = MatrixUtil.dict_to_row_major(dict_rate_matrix, 'ACGT', 'ACGT') # return the rate matrix object rate_matrix_object = RateMatrix.RateMatrix(row_major, 'ACGT') expected_rate = rate_matrix_object.get_expected_rate() if not RateMatrix.almost_equal(expected_rate, 1.0): assert False, 'the rate is %f but should be 1.0' % expected_rate return rate_matrix_object
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # read the alignment try: alignment = Fasta.Alignment(StringIO(fs.fasta)) except Fasta.AlignmentError as e: raise HandlingError('fasta alignment error: ' + str(e)) if alignment.get_sequence_count() < 2: raise HandlingError('expected at least two sequences') # read the rate matrix R = fs.matrix # read the ordered states ordered_states = Util.get_stripped_lines(StringIO(fs.states)) if len(ordered_states) != len(R): msg_a = 'the number of ordered states must be the same ' msg_b = 'as the number of rows in the rate matrix' raise HandlingError(msg_a + msg_b) if len(set(ordered_states)) != len(ordered_states): raise HandlingError('the ordered states must be unique') # create the rate matrix object using the ordered states rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states) # create the distance matrix n = alignment.get_sequence_count() row_major_distance_matrix = [[0] * n for i in range(n)] for i, sequence_a in enumerate(alignment.sequences): for j, sequence_b in enumerate(alignment.sequences): if i < j: # create the objective function using the sequence pair objective = Objective((sequence_a, sequence_b), rate_matrix_object) # Use golden section search to find the mle distance. # The bracket is just a suggestion. bracket = (0.51, 2.01) mle_distance = optimize.golden(objective, brack=bracket) # fill two elements of the matrix row_major_distance_matrix[i][j] = mle_distance row_major_distance_matrix[j][i] = mle_distance # write the response out = StringIO() print >> out, 'maximum likelihood distance matrix:' print >> out, MatrixUtil.m_to_string(row_major_distance_matrix) return out.getvalue()
def get_response_content(fs): # get a properly formatted newick tree with branch lengths tree = Newick.parse(fs.tree, SpatialTree.SpatialTree) tree.assert_valid() if tree.has_negative_branch_lengths(): msg = 'drawing a tree with negative branch lengths is not implemented' raise HandlingError(msg) tree.add_branch_lengths() # get the dictionary mapping the branch name to the nucleotide name_to_nt = {} lines = Util.get_stripped_lines(fs.column.splitlines()) if lines: name_to_nt = SnippetUtil.get_generic_dictionary(lines, 'name', 'nucleotide', list('acgtACGT')) # augment the tips with the nucleotide letters for name, nt in name_to_nt.items(): try: node = tree.get_unique_node(name) except Newick.NewickSearchError as e: raise HandlingError(e) if node.children: msg = 'constraints on internal nodes are not implemented' raise HandlingError(msg) node.state = nt.upper() # read the rate matrix R = fs.matrix # convert the rate matrix to a rate matrix object states = list('ACGT') rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), states) # simulate the ancestral nucleotides rate_matrix_object.simulate_ancestral_states(tree) # simulate a path on each branch # this breaks up the branch into a linear sequence of nodes and adds color for node in tree.gen_non_root_nodes(): simulate_branch_path(tree, node, rate_matrix_object) # do the layout EqualArcLayout.do_layout(tree) # draw the image try: ext = Form.g_imageformat_to_ext[fs.imageformat] return DrawTreeImage.get_tree_image(tree, (640, 480), ext) except CairoUtil.CairoUtilError as e: raise HandlingError(e)
def add_colors(tree, selection): """ Add branch colors to a newick tree. @param tree: a newick tree @param selection: a list of taxon names """ # set the tip states for node in tree.gen_tips(): if node.name in selection: node.state = 'a' else: node.state = 'b' # get the total length of the tree total_length = sum(node.blen for node in tree.gen_non_root_nodes()) # define the rate matrix states = ('a', 'b') mu = 1.0 / total_length row_major_rate_matrix = [[-mu, mu], [mu, -mu]] rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, states) # repeatedly reroot and calculate root state distributions internal_nodes = list(tree.gen_internal_nodes()) for node in internal_nodes: tree.reroot(node) rate_matrix_object.add_probabilities(tree) weights = [node.state_to_subtree_prob[state] for state in states] node.state_distribution = Util.weights_to_distribution(weights) for node in tree.gen_tips(): node.state_distribution = [] for state in states: if state == node.state: node.state_distribution.append(1.0) else: node.state_distribution.append(0.0) # set the color of each branch for node in tree.gen_non_root_nodes(): parent_probability = node.parent.state_distribution[0] current_probability = node.state_distribution[0] p = (parent_probability + current_probability) / 2.0 r, g, b = HeatMap.blue_red_gradient(p) rgb_string = ('%02x%02x%02x' % (r, g, b)).upper() node.branch_color = rgb_string
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the alignment try: alignment = Fasta.Alignment(fs.fasta.splitlines()) alignment.force_nucleotide() except Fasta.AlignmentError as e: raise HandlingError(e) # get the log likelihood dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix( row_major_rate_matrix, ordered_states) log_likelihood = PhyLikelihood.get_log_likelihood( tree, alignment, rate_matrix_object) # return the response return str(log_likelihood) + '\n'
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # read the ordered states ordered_states = Util.get_stripped_lines(StringIO(fs.states)) # read the matrix from the form data R = fs.rate_matrix if len(R) < 2: raise HandlingError('the rate matrix should have at least two rows') if len(ordered_states) != len(R): msg_a = 'the number of ordered states should be the same ' msg_b = 'as the number of rows in the matrix' raise HandlingError(msg_a + msg_b) # get the dictionary mapping taxa to states taxon_to_state = SnippetUtil.get_generic_dictionary( StringIO(fs.assignments), 'taxon name', 'state name', ordered_states) # set the states for each of the tree tips for node in tree.gen_tips(): node.state = taxon_to_state[node.name] # create the rate matrix object rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states) # repeatedly reroot and calculate root state distributions internal_nodes = list(tree.gen_internal_nodes()) for node in internal_nodes: tree.reroot(node) rate_matrix_object.add_probabilities(tree) weights = [ node.state_to_subtree_prob[state] for state in ordered_states ] node.state_distribution = Util.weights_to_distribution(weights) # define the response out = StringIO() # show the ancestral state distributions for node in tree.gen_internal_nodes(): if node.name: name = '\t'.join(str(p) for p in node.state_distribution) print >> out, node.name, ':', name # write the response return out.getvalue()
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the mixture weights weights = [fs.weight_a, fs.weight_b, fs.weight_c] # get the matrices matrices = [fs.matrix_a, fs.matrix_b, fs.matrix_c] for R in matrices: if R.shape != (4, 4): msg = 'expected each nucleotide rate matrix to be 4x4' raise HandlingError(msg) # create the mixture proportions weight_sum = sum(weights) mixture_proportions = [weight / weight_sum for weight in weights] # create the rate matrix objects ordered_states = list('ACGT') rate_matrix_objects = [] for R in matrices: rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states) rate_matrix_objects.append(rate_matrix_object) # create the mixture model mixture_model = SubModel.MixtureModel(mixture_proportions, rate_matrix_objects) # normalize the mixture model mixture_model.normalize() # simulate the alignment try: alignment = PhyLikelihood.simulate_alignment(tree, mixture_model, fs.ncols) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the alignment arr = [] for node in tree.gen_tips(): arr.append(alignment.get_fasta_sequence(node.name)) # return the alignment string return '\n'.join(arr) + '\n'
def get_response_content(fs): # init the response and get the user variables out = StringIO() nleaves = fs.nleaves nvertices = nleaves * 2 - 1 nbranches = nvertices - 1 nsites = fs.nsites # sample the coalescent tree with timelike branch lengths R, B = kingman.sample(fs.nleaves) r = Ftree.R_to_root(R) # get the leaf vertex names N = dict(zip(range(nleaves), string.uppercase[:nleaves])) N_leaves = dict(N) # get the internal vertex names v_to_leaves = R_to_v_to_leaves(R) for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: N[v] = ''.join(sorted(N[leaf] for leaf in leaves)) # get vertex ages v_to_age = kingman.RB_to_v_to_age(R, B) # sample the rates on the branches b_to_rate = sample_b_to_rate(R) xycorr = get_correlation(R, b_to_rate) # define B_subs in terms of substitutions instead of time B_subs = dict((p, t * b_to_rate[p]) for p, t in B.items()) # sample the alignment v_to_seq = sample_v_to_seq(R, B_subs, nsites) # get the log likelihood; this is kind of horrible pairs = [(N[v], ''.join(v_to_seq[v])) for v in range(nleaves)] headers, sequences = zip(*pairs) alignment = Fasta.create_alignment(headers, sequences) newick_string = FtreeIO.RBN_to_newick(R, B_subs, N_leaves) tree = Newick.parse(newick_string, Newick.NewickTree) dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix( row_major_rate_matrix, ordered_states) ll = PhyLikelihood.get_log_likelihood( tree, alignment, rate_matrix_object) # get ll when rates are all 1.0 newick_string = FtreeIO.RBN_to_newick(R, B, N_leaves) tree = Newick.parse(newick_string, Newick.NewickTree) ll_unity = PhyLikelihood.get_log_likelihood( tree, alignment, rate_matrix_object) # get ll when rates are numerically optimized # TODO incorporate the result into the xml file # TODO speed up the likelihood evaluation (beagle? C module?) #f = Opt(R, B, N_leaves, alignment) #X_logs = [0.0] * nbranches #result = scipy.optimize.fmin(f, X_logs, full_output=True) #print result # print >> out, '<?xml version="1.0"?>' print >> out, '<beast>' print >> out print >> out, '<!-- actual rate autocorrelation', xycorr, '-->' print >> out, '<!-- actual root height', v_to_age[r], '-->' print >> out, '<!-- actual log likelihood', ll, '-->' print >> out, '<!-- ll if rates were unity', ll_unity, '-->' print >> out print >> out, '<!--' print >> out, 'predefine the taxa as in' print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format' print >> out, '-->' print >> out, get_leaf_taxon_defn(list(string.uppercase[:nleaves])) print >> out print >> out, '<!--' print >> out, 'define the alignment as in' print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format' print >> out, '-->' print >> out, get_alignment_defn(leaves, N, v_to_seq) print >> out print >> out, '<!--' print >> out, 'specify the starting tree as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4' print >> out, '-->' print >> out, get_starting_tree_defn(R, B, N_leaves) print >> out print >> out, '<!--' print >> out, 'connect the tree model as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4' print >> out, '-->' print >> out, g_tree_model_defn print >> out print >> out, g_uncorrelated_relaxed_clock_info print >> out """ print >> out, '<!--' print >> out, 'create a list of taxa for which to constrain the mrca as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: print >> out, get_mrca_subset_defn(N, v, leaves) print >> out print >> out, '<!--' print >> out, 'create a tmrcaStatistic that will record the height as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: print >> out, get_mrca_stat_defn(N[v]) """ print >> out print >> out, g_likelihood_info print >> out print >> out, '<!--' print >> out, 'run the mcmc' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' print >> out, get_mcmc_defn(v_to_leaves, v_to_age, N) print >> out print >> out, '</beast>' # return the response return out.getvalue()