コード例 #1
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the mixture weights
    weights = [fs.weight_a, fs.weight_b, fs.weight_c]
    # get the matrices
    matrices = [fs.matrix_a, fs.matrix_b, fs.matrix_c]
    for R in matrices:
        if R.shape != (4, 4):
            msg = 'expected each nucleotide rate matrix to be 4x4'
            raise HandlingError(msg)
    # get the nucleotide alignment
    try:
        alignment = Fasta.Alignment(fs.alignment.splitlines())
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # create the mixture proportions
    weight_sum = sum(weights)
    mixture_proportions = [weight / weight_sum for weight in weights]
    # create the rate matrix objects
    ordered_states = list('ACGT')
    rate_matrix_objects = []
    for R in matrices:
        rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture model
    mixture_model = SubModel.MixtureModel(mixture_proportions,
                                          rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # return the html string
    return do_analysis(mixture_model, alignment, tree) + '\n'
コード例 #2
0
ファイル: 20080123b.py プロジェクト: BIGtigr/xgcode
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the alignment
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # define the jukes cantor rate matrix
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
        dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix,
                                               ordered_states)
    # simulate the ancestral alignment
    try:
        alignment = PhyLikelihood.simulate_ancestral_alignment(
            tree, alignment, rate_matrix_object)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment string using an ordering defined by the tree
    arr = []
    for node in tree.preorder():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the response
    return '\n'.join(arr) + '\n'
コード例 #3
0
ファイル: 20120403a.py プロジェクト: BIGtigr/xgcode
 def __call__(self, X_logs):
     """
     The vth entry of X corresponds to the log rate of the branch above v.
     Return the quantity to be minimized (the neg log likelihood).
     @param X: vector of branch rate logs
     @return: negative log likelihood
     """
     X = [math.exp(x) for x in X_logs]
     B_subs = {}
     for v_parent, v_child in self.R:
         edge = frozenset([v_parent, v_child])
         r = X[v_child]
         t = self.B[edge]
         B_subs[edge] = r * t
     newick_string = FtreeIO.RBN_to_newick(self.R, B_subs, self.N_leaves)
     tree = Newick.parse(newick_string, Newick.NewickTree)
     # define the rate matrix object; horrible
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() 
     ordered_states = list('ACGT') 
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(
             dictionary_rate_matrix, ordered_states, ordered_states)
     rate_matrix_object = RateMatrix.RateMatrix(
             row_major_rate_matrix, ordered_states) 
     # get the log likelihood
     ll = PhyLikelihood.get_log_likelihood(
             tree, self.alignment, rate_matrix_object)
     return -ll
コード例 #4
0
def get_response_content(fs):
    # read the alignment
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
    except Fasta.AlignmentError as e:
        raise HandlingError('fasta alignment error: ' + str(e))
    if alignment.get_sequence_count() != 2:
        raise HandlingError('expected a sequence pair')
    # read the rate matrix
    R = fs.matrix
    # read the ordered states
    ordered_states = Util.get_stripped_lines(fs.states.splitlines())
    if len(ordered_states) != len(R):
        msg_a = 'the number of ordered states must be the same '
        msg_b = 'as the number of rows in the rate matrix'
        raise HandlingError(msg_a + msg_b)
    if len(set(ordered_states)) != len(ordered_states):
        raise HandlingError('the ordered states must be unique')
    # create the rate matrix object using the ordered states
    rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states) 
    # create the objective function
    objective = Objective(alignment.sequences, rate_matrix_object)
    # Use golden section search to find the mle distance.
    # The bracket is just a suggestion.
    bracket = (0.51, 2.01)
    mle_distance = optimize.golden(objective, brack=bracket)
    # write the response
    out = StringIO()
    print >> out, 'maximum likelihood distance:', mle_distance
    #distances = (mle_distance, 0.2, 2.0, 20.0)
    #for distance in distances:
        #print >> out, 'f(%s): %s' % (distance, objective(distance))
    return out.getvalue()
コード例 #5
0
def create_rate_matrix(distribution, kappa, f):
    """
    The parameter f does not affect the stationary distribution.
    @param distribution: a dictionary mapping a nucleotide to its frequency
    @param kappa: the transition / transversion substitution rate ratio
    @param f: a WAG-like parameter between zero and one
    @return: a nucleotide rate matrix object
    """
    assert len(distribution) == 4
    assert set(distribution) == set('ACGT')
    assert abs(sum(distribution.values()) - 1.0) < .0000001
    # Create the off-diagonal elements of the unscaled rate matrix.
    rate_matrix = {}
    for na, pa in distribution.items():
        for nb, pb in distribution.items():
            if na != nb:
                if f == 1:
                    rate = pb
                else:
                    rate = (pb**f) / (pa**(1-f))
                if na+nb in ('AG', 'GA', 'CT', 'TC'):
                    rate *= kappa
                rate_matrix[(na, nb)] = rate
    # Create the diagonal elements 
    # such that each row in the rate matrix sums to zero.
    for na in distribution:
        rate = sum(rate_matrix[(na, nb)] for nb in distribution if nb != na)
        rate_matrix[(na, na)] = -rate
    # Convert the dictionary rate matrix to a row major rate matrix
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states)
    return rate_matrix_object
コード例 #6
0
ファイル: 20080225d.py プロジェクト: BIGtigr/xgcode
def get_response_content(fs):
    # read the matrix from the form data
    R = fs.matrix
    n = len(R)
    # convert the row major rate matrix to a rate matrix object
    arbitrary_states = [str(x) for x in range(n)]
    rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), arbitrary_states)
    rate_matrix_object.normalize()
    normalized_row_major = rate_matrix_object.get_row_major_rate_matrix()
    # return the rate matrix
    return MatrixUtil.m_to_string(normalized_row_major) + '\n'
コード例 #7
0
ファイル: 20080225c.py プロジェクト: BIGtigr/xgcode
def get_response_content(fs):
    # read the matrix from the form data
    R = fs.matrix
    # get the expected rate
    states = range(len(R))
    try:
        rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), states)
        expected_rate = rate_matrix_object.get_expected_rate()
    except RateMatrix.RateMatrixError as e:
        raise HandlingError('error calculating the expected rate: ' + str(e))
    # return the response
    return str(expected_rate) + '\n'
コード例 #8
0
ファイル: 20080129b.py プロジェクト: BIGtigr/xgcode
def get_response_content(fs):
    # get a properly formatted newick tree with branch lengths
    tree = Newick.parse(fs.tree, SpatialTree.SpatialTree)
    tree.assert_valid()
    if tree.has_negative_branch_lengths():
        msg = 'drawing a tree with negative branch lengths is not implemented'
        raise HandlingError(msg)
    tree.add_branch_lengths()
    # get the dictionary mapping the branch name to the nucleotide
    name_to_nucleotide = {}
    # parse the column string
    for line in iterutils.stripped_lines(fs.column.splitlines()):
        name_string, nucleotide_string = SnippetUtil.get_state_value_pair(line)
        if nucleotide_string not in list('acgtACGT'):
            msg = '"%s" is not a valid nucleotide' % nucleotide_string
            raise HandlingError(msg)
        nucleotide_string = nucleotide_string.upper()
        if name_string in name_to_nucleotide:
            raise HandlingError('the name "%s" was duplicated' % name_string)
        name_to_nucleotide[name_string] = nucleotide_string
    # augment the tips with the nucleotide letters
    for name, nucleotide in name_to_nucleotide.items():
        try:
            node = tree.get_unique_node(name)
        except Newick.NewickSearchError as e:
            raise HandlingError(e)
        if node.children:
            msg = 'constraints on internal nodes are not implemented'
            raise HandlingError(msg)
        node.state = nucleotide
    # get the Jukes-Cantor rate matrix object
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
        dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix,
                                               ordered_states)
    # simulate the ancestral nucleotides
    rate_matrix_object.simulate_ancestral_states(tree)
    # simulate a path on each branch
    # this breaks up the branch into a linear sequence of nodes and adds color
    for node in tree.gen_non_root_nodes():
        simulate_branch_path(tree, node)
    # do the layout
    EqualArcLayout.do_layout(tree)
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return DrawTreeImage.get_tree_image(tree, (640, 480), ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
コード例 #9
0
ファイル: PhyLikelihood.py プロジェクト: BIGtigr/xgcode
 def test_simulation(self):
     tree_string = '(((Human:0.1, Chimpanzee:0.2)to-chimp:0.8, Gorilla:0.3)to-gorilla:0.7, Orangutan:0.4, Gibbon:0.5)all;'
     # Parse the example tree.
     tree = Newick.parse(tree_string, Newick.NewickTree)
     tree.assert_valid()
     # Get header and sequence pairs.
     alignment = Fasta.Alignment(StringIO(Fasta.brown_example_alignment))
     # Get the Jukes-Cantor rate matrix object.
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
     ordered_states = list('ACGT')
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(dictionary_rate_matrix, ordered_states, ordered_states)
     rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states)
     # Simulate ancestral states.
     simulated_alignment = simulate_ancestral_alignment(tree, alignment, rate_matrix_object)
コード例 #10
0
ファイル: 20080828a.py プロジェクト: BIGtigr/xgcode
 def gen_distance_matrices(self, count, max_steps):
     """
     Yield (ordered sequence list, distance matrix) pairs .
     The generator will stop if it sees that it cannot meet its goal
     in the allotted number of steps.
     @param count: the requested number of distance matrices
     @param max_steps: an upper bound on the allowed number of steps
     """
     # define the jukes cantor rate matrix
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
     ordered_states = list('ACGT')
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(
         dictionary_rate_matrix, ordered_states, ordered_states)
     model = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states)
     # record the requested number of samples
     self.requested_matrix_count = count
     # do some rejection sampling
     while True:
         if self.get_complexity() >= max_steps:
             break
         if self.accepted_sample_count >= count:
             break
         # simulate an alignment from the tree
         alignment = PhyLikelihood.simulate_alignment(
             self.tree, model, self.sequence_length)
         # extract the ordered list of sequences from the alignment object
         name_to_sequence = dict(zip(alignment.headers,
                                     alignment.sequences))
         sequence_list = [
             name_to_sequence[name] for name in self.ordered_names
         ]
         # get the estimated distance matrix
         distance_matrix = JC69.get_ML_distance_matrix(sequence_list)
         # look for degeneracies
         has_zero_off_diagonal = False
         has_inf_off_diagonal = False
         for i, row in enumerate(distance_matrix):
             for j, value in enumerate(row):
                 if i != j:
                     if value == 0.0:
                         has_zero_off_diagonal = True
                     if value == float('inf'):
                         has_inf_off_diagonal = True
         if has_zero_off_diagonal:
             self.rejected_zero_sample_count += 1
         elif has_inf_off_diagonal:
             self.rejected_inf_sample_count += 1
         else:
             self.accepted_sample_count += 1
             yield sequence_list, distance_matrix
コード例 #11
0
ファイル: PhyLikelihood.py プロジェクト: BIGtigr/xgcode
 def test_likelihood(self):
     # Parse the example tree.
     tree_string = Newick.brown_example_tree
     tree = Newick.parse(tree_string, Newick.NewickTree)
     tree.assert_valid()
     # Get header and sequence pairs.
     alignment = Fasta.Alignment(StringIO(Fasta.brown_example_alignment))
     # Get the Jukes-Cantor rate matrix object.
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
     ordered_states = list('ACGT')
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(dictionary_rate_matrix, ordered_states, ordered_states)
     rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states)
     # Calculate the log likelihood.
     log_likelihood = get_log_likelihood(tree, alignment, rate_matrix_object)
     self.assertAlmostEqual(log_likelihood, -4146.26547208)
コード例 #12
0
ファイル: F84.py プロジェクト: BIGtigr/xgcode
def create_rate_matrix(kappa, nt_distribution):
    """
    @param kappa: adjusts for the transition rate differing from the transversion rate
    @param nt_distribution: ordered ACGT nucleotide probabilities
    @return: a rate matrix object with one expected nucleotide substitution per time unit
    """
    # make some assertions about the distribution
    for p in nt_distribution:
        assert p >= 0
    assert len(nt_distribution) == 4
    assert RateMatrix.almost_equal(sum(nt_distribution), 1.0)
    # define some intermediate variables
    A, C, G, T = nt_distribution
    R = float(A + G)
    Y = float(C + T)
    # make some more assertions about the distribution and about kappa
    assert A + G > 0
    assert C + T > 0
    assert kappa > max(-Y, -R)
    # get the normalization constant
    normalization_constant = 4 * T * C * (1 + kappa / Y) + 4 * A * G * (
        1 + kappa / R) + 4 * Y * R
    # adjust the normalization constant to correct what might be an error in the paper
    normalization_constant /= 2
    # define the dictionary rate matrix
    dict_rate_matrix = {}
    for source_index, source in enumerate('ACGT'):
        for sink_index, sink in enumerate('ACGT'):
            key = (source, sink)
            coefficient = 1.0
            if key in g_transitions:
                coefficient = 1 + kappa / (nt_distribution[source_index] +
                                           nt_distribution[sink_index])
            dict_rate_matrix[key] = coefficient * nt_distribution[
                sink_index] / normalization_constant
    for source in 'ACGT':
        dict_rate_matrix[(source,
                          source)] = -sum(dict_rate_matrix[(source, sink)]
                                          for sink in 'ACGT' if source != sink)
    # convert the dictionary rate matrix to a row major rate matrix
    row_major = MatrixUtil.dict_to_row_major(dict_rate_matrix, 'ACGT', 'ACGT')
    # return the rate matrix object
    rate_matrix_object = RateMatrix.RateMatrix(row_major, 'ACGT')
    expected_rate = rate_matrix_object.get_expected_rate()
    if not RateMatrix.almost_equal(expected_rate, 1.0):
        assert False, 'the rate is %f but should be 1.0' % expected_rate
    return rate_matrix_object
コード例 #13
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # read the alignment
    try:
        alignment = Fasta.Alignment(StringIO(fs.fasta))
    except Fasta.AlignmentError as e:
        raise HandlingError('fasta alignment error: ' + str(e))
    if alignment.get_sequence_count() < 2:
        raise HandlingError('expected at least two sequences')
    # read the rate matrix
    R = fs.matrix
    # read the ordered states
    ordered_states = Util.get_stripped_lines(StringIO(fs.states))
    if len(ordered_states) != len(R):
        msg_a = 'the number of ordered states must be the same '
        msg_b = 'as the number of rows in the rate matrix'
        raise HandlingError(msg_a + msg_b)
    if len(set(ordered_states)) != len(ordered_states):
        raise HandlingError('the ordered states must be unique')
    # create the rate matrix object using the ordered states
    rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states)
    # create the distance matrix
    n = alignment.get_sequence_count()
    row_major_distance_matrix = [[0] * n for i in range(n)]
    for i, sequence_a in enumerate(alignment.sequences):
        for j, sequence_b in enumerate(alignment.sequences):
            if i < j:
                # create the objective function using the sequence pair
                objective = Objective((sequence_a, sequence_b),
                                      rate_matrix_object)
                # Use golden section search to find the mle distance.
                # The bracket is just a suggestion.
                bracket = (0.51, 2.01)
                mle_distance = optimize.golden(objective, brack=bracket)
                # fill two elements of the matrix
                row_major_distance_matrix[i][j] = mle_distance
                row_major_distance_matrix[j][i] = mle_distance
    # write the response
    out = StringIO()
    print >> out, 'maximum likelihood distance matrix:'
    print >> out, MatrixUtil.m_to_string(row_major_distance_matrix)
    return out.getvalue()
コード例 #14
0
def get_response_content(fs):
    # get a properly formatted newick tree with branch lengths
    tree = Newick.parse(fs.tree, SpatialTree.SpatialTree)
    tree.assert_valid()
    if tree.has_negative_branch_lengths():
        msg = 'drawing a tree with negative branch lengths is not implemented'
        raise HandlingError(msg)
    tree.add_branch_lengths()
    # get the dictionary mapping the branch name to the nucleotide
    name_to_nt = {}
    lines = Util.get_stripped_lines(fs.column.splitlines())
    if lines:
        name_to_nt = SnippetUtil.get_generic_dictionary(lines, 'name',
                'nucleotide', list('acgtACGT'))
    # augment the tips with the nucleotide letters
    for name, nt in name_to_nt.items():
        try:
            node = tree.get_unique_node(name)
        except Newick.NewickSearchError as e:
            raise HandlingError(e)
        if node.children:
            msg = 'constraints on internal nodes are not implemented'
            raise HandlingError(msg)
        node.state = nt.upper()
    # read the rate matrix
    R = fs.matrix
    # convert the rate matrix to a rate matrix object
    states = list('ACGT')
    rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), states)
    # simulate the ancestral nucleotides
    rate_matrix_object.simulate_ancestral_states(tree)
    # simulate a path on each branch
    # this breaks up the branch into a linear sequence of nodes and adds color
    for node in tree.gen_non_root_nodes():
        simulate_branch_path(tree, node, rate_matrix_object)
    # do the layout
    EqualArcLayout.do_layout(tree)
    # draw the image
    try:
        ext = Form.g_imageformat_to_ext[fs.imageformat]
        return DrawTreeImage.get_tree_image(tree, (640, 480), ext)
    except CairoUtil.CairoUtilError as e:
        raise HandlingError(e)
コード例 #15
0
ファイル: 20080617a.py プロジェクト: BIGtigr/xgcode
def add_colors(tree, selection):
    """
    Add branch colors to a newick tree.
    @param tree: a newick tree
    @param selection: a list of taxon names
    """
    # set the tip states
    for node in tree.gen_tips():
        if node.name in selection:
            node.state = 'a'
        else:
            node.state = 'b'
    # get the total length of the tree
    total_length = sum(node.blen for node in tree.gen_non_root_nodes())
    # define the rate matrix
    states = ('a', 'b')
    mu = 1.0 / total_length
    row_major_rate_matrix = [[-mu, mu], [mu, -mu]]
    rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, states)
    # repeatedly reroot and calculate root state distributions
    internal_nodes = list(tree.gen_internal_nodes())
    for node in internal_nodes:
        tree.reroot(node)
        rate_matrix_object.add_probabilities(tree)
        weights = [node.state_to_subtree_prob[state] for state in states]
        node.state_distribution = Util.weights_to_distribution(weights)
    for node in tree.gen_tips():
        node.state_distribution = []
        for state in states:
            if state == node.state:
                node.state_distribution.append(1.0)
            else:
                node.state_distribution.append(0.0)
    # set the color of each branch
    for node in tree.gen_non_root_nodes():
        parent_probability = node.parent.state_distribution[0]
        current_probability = node.state_distribution[0]
        p = (parent_probability + current_probability) / 2.0
        r, g, b = HeatMap.blue_red_gradient(p)
        rgb_string = ('%02x%02x%02x' % (r, g, b)).upper()
        node.branch_color = rgb_string
コード例 #16
0
ファイル: 20080122a.py プロジェクト: BIGtigr/xgcode
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the alignment
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # get the log likelihood
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states)
    log_likelihood = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # return the response
    return str(log_likelihood) + '\n'
コード例 #17
0
ファイル: 20080624a.py プロジェクト: BIGtigr/xgcode
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # read the ordered states
    ordered_states = Util.get_stripped_lines(StringIO(fs.states))
    # read the matrix from the form data
    R = fs.rate_matrix
    if len(R) < 2:
        raise HandlingError('the rate matrix should have at least two rows')
    if len(ordered_states) != len(R):
        msg_a = 'the number of ordered states should be the same '
        msg_b = 'as the number of rows in the matrix'
        raise HandlingError(msg_a + msg_b)
    # get the dictionary mapping taxa to states
    taxon_to_state = SnippetUtil.get_generic_dictionary(
        StringIO(fs.assignments), 'taxon name', 'state name', ordered_states)
    # set the states for each of the tree tips
    for node in tree.gen_tips():
        node.state = taxon_to_state[node.name]
    # create the rate matrix object
    rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states)
    # repeatedly reroot and calculate root state distributions
    internal_nodes = list(tree.gen_internal_nodes())
    for node in internal_nodes:
        tree.reroot(node)
        rate_matrix_object.add_probabilities(tree)
        weights = [
            node.state_to_subtree_prob[state] for state in ordered_states
        ]
        node.state_distribution = Util.weights_to_distribution(weights)
    # define the response
    out = StringIO()
    # show the ancestral state distributions
    for node in tree.gen_internal_nodes():
        if node.name:
            name = '\t'.join(str(p) for p in node.state_distribution)
            print >> out, node.name, ':', name
    # write the response
    return out.getvalue()
コード例 #18
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the mixture weights
    weights = [fs.weight_a, fs.weight_b, fs.weight_c]
    # get the matrices
    matrices = [fs.matrix_a, fs.matrix_b, fs.matrix_c]
    for R in matrices:
        if R.shape != (4, 4):
            msg = 'expected each nucleotide rate matrix to be 4x4'
            raise HandlingError(msg)
    # create the mixture proportions
    weight_sum = sum(weights)
    mixture_proportions = [weight / weight_sum for weight in weights]
    # create the rate matrix objects
    ordered_states = list('ACGT')
    rate_matrix_objects = []
    for R in matrices:
        rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture model
    mixture_model = SubModel.MixtureModel(mixture_proportions,
                                          rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(tree, mixture_model,
                                                     fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment
    arr = []
    for node in tree.gen_tips():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the alignment string
    return '\n'.join(arr) + '\n'
コード例 #19
0
ファイル: 20120403a.py プロジェクト: BIGtigr/xgcode
def get_response_content(fs):
    # init the response and get the user variables
    out = StringIO()
    nleaves = fs.nleaves
    nvertices = nleaves * 2 - 1
    nbranches = nvertices - 1
    nsites = fs.nsites
    # sample the coalescent tree with timelike branch lengths
    R, B = kingman.sample(fs.nleaves)
    r = Ftree.R_to_root(R)
    # get the leaf vertex names
    N = dict(zip(range(nleaves), string.uppercase[:nleaves]))
    N_leaves = dict(N)
    # get the internal vertex names
    v_to_leaves = R_to_v_to_leaves(R)
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            N[v] = ''.join(sorted(N[leaf] for leaf in leaves))
    # get vertex ages
    v_to_age = kingman.RB_to_v_to_age(R, B)
    # sample the rates on the branches
    b_to_rate = sample_b_to_rate(R)
    xycorr = get_correlation(R, b_to_rate)
    # define B_subs in terms of substitutions instead of time
    B_subs = dict((p, t * b_to_rate[p]) for p, t in B.items())
    # sample the alignment
    v_to_seq = sample_v_to_seq(R, B_subs, nsites)
    # get the log likelihood; this is kind of horrible
    pairs = [(N[v], ''.join(v_to_seq[v])) for v in range(nleaves)]
    headers, sequences = zip(*pairs)
    alignment = Fasta.create_alignment(headers, sequences)
    newick_string = FtreeIO.RBN_to_newick(R, B_subs, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() 
    ordered_states = list('ACGT') 
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states) 
    ll = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are all 1.0
    newick_string = FtreeIO.RBN_to_newick(R, B, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    ll_unity = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are numerically optimized
    # TODO incorporate the result into the xml file
    # TODO speed up the likelihood evaluation (beagle? C module?)
    #f = Opt(R, B, N_leaves, alignment)
    #X_logs = [0.0] * nbranches
    #result = scipy.optimize.fmin(f, X_logs, full_output=True)
    #print result
    #
    print >> out, '<?xml version="1.0"?>'
    print >> out, '<beast>'
    print >> out
    print >> out, '<!-- actual rate autocorrelation', xycorr, '-->'
    print >> out, '<!-- actual root height', v_to_age[r], '-->'
    print >> out, '<!-- actual log likelihood', ll, '-->'
    print >> out, '<!-- ll if rates were unity', ll_unity, '-->'
    print >> out
    print >> out, '<!--'
    print >> out, 'predefine the taxa as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_leaf_taxon_defn(list(string.uppercase[:nleaves]))
    print >> out
    print >> out, '<!--'
    print >> out, 'define the alignment as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_alignment_defn(leaves, N, v_to_seq)
    print >> out
    print >> out, '<!--'
    print >> out, 'specify the starting tree as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, get_starting_tree_defn(R, B, N_leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'connect the tree model as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, g_tree_model_defn
    print >> out
    print >> out, g_uncorrelated_relaxed_clock_info
    print >> out
    """
    print >> out, '<!--'
    print >> out, 'create a list of taxa for which to constrain the mrca as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_subset_defn(N, v, leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'create a tmrcaStatistic that will record the height as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_stat_defn(N[v])
    """
    print >> out
    print >> out, g_likelihood_info
    print >> out
    print >> out, '<!--'
    print >> out, 'run the mcmc'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    print >> out, get_mcmc_defn(v_to_leaves, v_to_age, N)
    print >> out
    print >> out, '</beast>'
    # return the response
    return out.getvalue()