Ejemplo n.º 1
0
def get_response_content(fs):
    # get the nucleotide distribution
    nt_to_weight = SnippetUtil.get_distribution(fs.nucleotides,
            'nucleotide', nt_letters)
    # get the amino acid distribution
    aa_to_weight = SnippetUtil.get_distribution(fs.aminoacids,
            'amino acid', aa_letters)
    # get results
    mutation_distribution = [nt_to_weight[nt] for nt in nt_letters]
    aa_distribution = [aa_to_weight[aa] for aa in aa_letters]
    pair = DirectProtein.get_nt_distribution_and_aa_energies(
            mutation_distribution, aa_distribution)
    nt_distribution, aa_energies = pair
    # write something
    out = StringIO()
    # write the stationary nucleotide distribution
    print >> out, 'nucleotide stationary distribution:'
    for nt, value in zip(nt_letters, nt_distribution):
        print >> out, '%s : %s' % (nt, value)
    print >> out, ''
    # write the amino acid energies
    print >> out, 'amino acid energies:'
    for aa, value in zip(aa_letters, aa_energies):
        print >> out, '%s : %s' % (aa, value)
    # return the response
    return out.getvalue()
Ejemplo n.º 2
0
def get_response_content(fs):
    # get the nucleotide distribution
    nt_to_probability = SnippetUtil.get_distribution(fs.nucleotides,
            'nucleotide', nt_letters)
    # get the amino acid distribution
    aa_to_probability = SnippetUtil.get_distribution(fs.aminoacids,
            'amino acid', aa_letters)
    # convert the dictionaries to lists
    observed_nt_stationary_distribution = [nt_to_probability[nt]
            for nt in nt_letters]
    aa_distribution = [aa_to_probability[aa] for aa in aa_letters]
    # define the objective function
    objective_function = MyCodonObjective(aa_distribution,
            observed_nt_stationary_distribution)
    initial_stationary_guess = halpern_bruno_nt_estimate(nt_to_probability,
            aa_to_probability)
    A, C, G, T = initial_stationary_guess
    initial_guess = (math.log(C/A), math.log(G/A), math.log(T/A))
    iterations = 20
    try:
        best = scipy.optimize.nonlin.broyden2(objective_function,
                initial_guess, iterations)
    except Exception, e:
        debugging_information = objective_function.get_history()
        raise HandlingError(str(e) + '\n' + debugging_information)
Ejemplo n.º 3
0
def get_response_content(fs):
    # read the nexus data
    nexus = Nexus.Nexus()
    try:
        nexus.load(StringIO(fs.nexus))
    except Nexus.NexusError as e:
        raise HandlingError(e)
    # get the mixture weights
    mixture_weights = [fs.weight_a, fs.weight_b]
    # get the kappa values
    kappa_values = [fs.kappa_a, fs.kappa_b]
    # get the nucleotide distributions
    nucleotide_distributions = []
    for nt_string in (fs.frequency_a, fs.frequency_b):
        distribution = SnippetUtil.get_distribution(nt_string, 'nucleotide',
                                                    list('ACGT'))
        nucleotide_distributions.append(distribution)
    # create the nucleotide HKY rate matrix objects
    rate_matrix_objects = []
    for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values):
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
            nt_distribution, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture proportions
    weight_sum = sum(mixture_weights)
    mixture_proportions = [weight / weight_sum for weight in mixture_weights]
    # create the mixture model
    mixture_model = SubModel.MixtureModel(mixture_proportions,
                                          rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # return the results
    return do_analysis(mixture_model, nexus.alignment, nexus.tree) + '\n'
Ejemplo n.º 4
0
def get_response_content(fs):
    # read the nexus data
    nexus = Nexus.Nexus()
    try:
        nexus.load(StringIO(fs.nexus))
    except Nexus.NexusError as e:
        raise HandlingError(e)
    # get the mixture weights
    mixture_weights = [fs.weight_a, fs.weight_b]
    # get the kappa values
    kappa_values = [fs.kappa_a, fs.kappa_b]
    # get the nucleotide distributions
    nucleotide_distributions = []
    for nt_string in (fs.frequency_a, fs.frequency_b):
        distribution = SnippetUtil.get_distribution(
                nt_string, 'nucleotide', list('ACGT'))
        nucleotide_distributions.append(distribution)
    # create the nucleotide HKY rate matrix objects
    rate_matrix_objects = []
    for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values):
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
                nt_distribution, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture proportions
    weight_sum = sum(mixture_weights)
    mixture_proportions = [weight / weight_sum for weight in mixture_weights]
    # create the mixture model
    mixture_model = SubModel.MixtureModel(
            mixture_proportions, rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # return the results
    return do_analysis(mixture_model, nexus.alignment, nexus.tree) + '\n'
Ejemplo n.º 5
0
def get_response_content(fs):
    # get the nucleotide distribution
    nt_to_probability = SnippetUtil.get_distribution(fs.nucleotides, "nucleotide", nt_letters)
    # get the amino acid distribution
    aa_to_probability = SnippetUtil.get_distribution(fs.aminoacids, "amino acid", aa_letters)
    # convert the dictionaries to lists
    observed_nt_stationary_distribution = [nt_to_probability[nt] for nt in nt_letters]
    aa_distribution = [aa_to_probability[aa] for aa in aa_letters]
    # define the objective function
    objective_function = MyCodonObjective(aa_distribution, observed_nt_stationary_distribution)
    initial_stationary_guess = halpern_bruno_nt_estimate(nt_to_probability, aa_to_probability)
    A, C, G, T = initial_stationary_guess
    initial_guess = (math.log(C / A), math.log(G / A), math.log(T / A))
    iterations = 20
    try:
        best = scipy.optimize.nonlin.broyden2(objective_function, initial_guess, iterations)
    except Exception, e:
        debugging_information = objective_function.get_history()
        raise HandlingError(str(e) + "\n" + debugging_information)
Ejemplo n.º 6
0
def get_response_content(fs):
    # get the nucleotide distribution
    nt_distribution = SnippetUtil.get_distribution(fs.nucleotides,
                                                   'nucleotide',
                                                   Codon.g_nt_letters)
    # get the amino acid distribution
    aa_distribution = SnippetUtil.get_distribution(fs.amino_acids,
                                                   'amino acid',
                                                   Codon.g_aa_letters)
    # Assert that the nucleotide distribution
    # is compatible with the amino acid distribution.
    # According to the Halpern-Bruno assumptions, there should be no codon bias.
    # This means that if a nucleotide has a frequency of zero,
    # then the amino acid coded by each codon containing that nucleotide
    # must also have a frequency of zero.
    msg_a = 'the given amino acid and nucleotide distributions '
    msg_b = 'are incompatible with the assumption of no codon bias'
    err = HandlingError(msg_a + msg_b)
    for aa, codons in Codon.g_aa_letter_to_codons.items():
        for codon in codons:
            for nt in codon:
                if aa_distribution[aa] and not nt_distribution[nt]:
                    raise err
    # get the codon distribution
    codon_to_weight = {}
    for codon in Codon.g_non_stop_codons:
        aa = Codon.g_codon_to_aa_letter[codon]
        sibling_codons = Codon.g_aa_letter_to_codons[aa]
        codon_aa_weight = aa_distribution[aa]
        codon_nt_weight = np.prod([nt_distribution[nt] for nt in codon])
        sibling_nt_weight_sum = 0
        for sibling in sibling_codons:
            product = np.prod([nt_distribution[nt] for nt in sibling])
            sibling_nt_weight_sum += product
        codon_to_weight[codon] = codon_aa_weight * codon_nt_weight
        codon_to_weight[codon] /= sibling_nt_weight_sum
    total_weight = sum(codon_to_weight.values())
    # return the codon distribution
    out = StringIO()
    for codon, weight in sorted(codon_to_weight.items()):
        print >> out, codon, ':', weight / total_weight
    return out.getvalue() + '\n'
Ejemplo n.º 7
0
def get_response_content(fs):
    # get the codon distribution
    codons = Codon.g_sorted_non_stop_codons
    distribution = SnippetUtil.get_distribution(fs.weights, 'codon', codons)
    # get the rate matrix defined by the weights and kappa and omega
    r = RateMatrix.get_gy94_rate_matrix(distribution, fs.kappa, fs.omega)
    # show the rate matrix in convenient text form
    out = StringIO()
    for ca in codons:
        print >> out, '\t'.join(str(r[(ca, cb)]) for cb in codons)
    return out.getvalue()
Ejemplo n.º 8
0
def get_response_content(fs):
    # get the nucleotide distribution
    nt_distribution = SnippetUtil.get_distribution(fs.nucleotides,
            'nucleotide', Codon.g_nt_letters)
    # get the amino acid distribution
    aa_distribution = SnippetUtil.get_distribution(fs.amino_acids,
            'amino acid', Codon.g_aa_letters)
    # Assert that the nucleotide distribution
    # is compatible with the amino acid distribution.
    # According to the Halpern-Bruno assumptions, there should be no codon bias.
    # This means that if a nucleotide has a frequency of zero,
    # then the amino acid coded by each codon containing that nucleotide
    # must also have a frequency of zero.
    msg_a = 'the given amino acid and nucleotide distributions '
    msg_b = 'are incompatible with the assumption of no codon bias'
    err = HandlingError(msg_a + msg_b)
    for aa, codons in Codon.g_aa_letter_to_codons.items():
        for codon in codons:
            for nt in codon:
                if aa_distribution[aa] and not nt_distribution[nt]:
                    raise err
    # get the codon distribution
    codon_to_weight = {}
    for codon in Codon.g_non_stop_codons:
        aa = Codon.g_codon_to_aa_letter[codon]
        sibling_codons = Codon.g_aa_letter_to_codons[aa]
        codon_aa_weight = aa_distribution[aa]
        codon_nt_weight = np.prod([nt_distribution[nt] for nt in codon])
        sibling_nt_weight_sum = 0
        for sibling in sibling_codons:
            product = np.prod([nt_distribution[nt] for nt in sibling])
            sibling_nt_weight_sum += product
        codon_to_weight[codon] = codon_aa_weight * codon_nt_weight
        codon_to_weight[codon] /= sibling_nt_weight_sum
    total_weight = sum(codon_to_weight.values())
    # return the codon distribution
    out = StringIO()
    for codon, weight in sorted(codon_to_weight.items()):
        print >> out, codon, ':', weight / total_weight
    return out.getvalue() + '\n'
Ejemplo n.º 9
0
def get_response_content(fs):
    # get the nucleotide distribution
    d = SnippetUtil.get_distribution(fs.weights, 'nucleotide', list('ACGT'))
    # get the rate matrix defined by the nucleotide distribution and kappa
    rate_object = RateMatrix.get_unscaled_hky85_rate_matrix(d, fs.kappa)
    if fs.scaled:
        rate_object.normalize()
    rate_matrix = rate_object.get_dictionary_rate_matrix()
    # show the rate matrix in convenient text form
    out = StringIO()
    for nta in 'ACGT':
        print >> out, '\t'.join(str(rate_matrix[(nta, ntb)]) for ntb in 'ACGT')
    return out.getvalue()
Ejemplo n.º 10
0
def get_response_content(fs):
    # get the nucleotide distribution
    d = SnippetUtil.get_distribution(fs.weights, 'nucleotide', list('ACGT'))
    # get the rate matrix defined by the nucleotide distribution and kappa
    rate_object = RateMatrix.get_unscaled_hky85_rate_matrix(d, fs.kappa)
    if fs.scaled:
        rate_object.normalize()
    rate_matrix = rate_object.get_dictionary_rate_matrix()
    # show the rate matrix in convenient text form
    out = StringIO()
    for nta in 'ACGT':
        print >> out, '\t'.join(str(rate_matrix[(nta, ntb)]) for ntb in 'ACGT')
    return out.getvalue()
Ejemplo n.º 11
0
def get_response_content(fs):
    # get the mutation process nucleotide distribution
    nt_distribution = SnippetUtil.get_distribution(fs.nucleotides,
            'nucleotide', nt_ordered)
    # get the selection process amino acid energies
    aa_to_energy = SnippetUtil.get_dictionary(fs.aminoacids,
            'amino acid', 'energy', aa_ordered)
    # create the direct protein rate matrix object
    nt_distribution_list = [nt_distribution[nt] for nt in nt_ordered]
    aa_energy_list = [aa_to_energy[aa] for aa in aa_ordered]
    rate_matrix_object = DirectProtein.DirectProteinRateMatrix(fs.kappa,
            nt_distribution_list, aa_energy_list)
    # write the response
    out = StringIO()
    if fs.srm:
        # write the scaled rate matrix
        rate_matrix_object.normalize()
        row_major_rate_matrix = rate_matrix_object.get_row_major_rate_matrix()
        print >> out, MatrixUtil.m_to_string(row_major_rate_matrix)
    elif fs.urm:
        # write the unscaled rate matrix
        row_major_rate_matrix = rate_matrix_object.get_row_major_rate_matrix()
        print >> out, MatrixUtil.m_to_string(row_major_rate_matrix)
    elif fs.cstat:
        # write the codon stationary distribution
        codon_distribution = rate_matrix_object.get_codon_distribution()
        for codon in codons_ordered:
            print >> out, codon, ':', codon_distribution[codon]
    elif fs.astat:
        # write the amino acid stationary distribution
        aa_distribution = rate_matrix_object.get_aa_distribution()
        for aa in aa_ordered:
            print >> out, aa, ':', aa_distribution[aa]
    elif fs.nstat:
        # write the nucleotide stationary distribution
        nt_distribution = rate_matrix_object.get_nt_distribution()
        for nt in nt_ordered:
            print >> out, nt, ':', nt_distribution[nt]
    elif fs.sf:
        # write the rate matrix scaling factor
        print >> out, rate_matrix_object.get_expected_rate()
    # return the response
    return out.getvalue() + '\n'
Ejemplo n.º 12
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the nucleotide distribution
    distribution = SnippetUtil.get_distribution(
            fs.weights, 'nucleotide', list('ACGT'))
    # get the nucleotide alignment
    try:
        alignment = Fasta.Alignment(StringIO(fs.alignment))
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # get the rate matrix defined by the nucleotide distribution and kappa
    row_major_rate_matrix = RateMatrix.get_unscaled_hky85_rate_matrix(
            distribution, fs.kappa).get_row_major_rate_matrix()
    rate_matrix = RateMatrix.FastRateMatrix(
            row_major_rate_matrix, list('ACGT'))
    rate_matrix.normalize()
    # get the mle rates
    mle_rates = get_mle_rates(tree, alignment, rate_matrix)
    # return the response
    return get_stockholm_string(tree, alignment, mle_rates) + '\n'
Ejemplo n.º 13
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the nucleotide distribution
    distribution = SnippetUtil.get_distribution(fs.weights, 'nucleotide',
                                                list('ACGT'))
    # get the nucleotide alignment
    try:
        alignment = Fasta.Alignment(StringIO(fs.alignment))
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # get the rate matrix defined by the nucleotide distribution and kappa
    row_major_rate_matrix = RateMatrix.get_unscaled_hky85_rate_matrix(
        distribution, fs.kappa).get_row_major_rate_matrix()
    rate_matrix = RateMatrix.FastRateMatrix(row_major_rate_matrix,
                                            list('ACGT'))
    rate_matrix.normalize()
    # get the mle rates
    mle_rates = get_mle_rates(tree, alignment, rate_matrix)
    # return the response
    return get_stockholm_string(tree, alignment, mle_rates) + '\n'
Ejemplo n.º 14
0
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # parse the tree
    try:
        tree = Newick.parse(fs.tree, Newick.NewickTree)
        tree.assert_valid()
    except Newick.NewickSyntaxError as e:
        raise HandlingError(str(e))
    # get the mixture weights
    mixture_weights = [fs.weight_a, fs.weight_b]
    # get the kappa values
    kappa_values = [fs.kappa_a, fs.kappa_b]
    # get the nucleotide distributions
    frequency_strings = (fs.frequency_a, fs.frequency_b)
    nucleotide_distributions = []
    for nt_string in frequency_strings:
        d = SnippetUtil.get_distribution(nt_string, 'nucleotide', list('ACGT'))
        nucleotide_distributions.append(d)
    # create the nucleotide HKY rate matrix objects
    rate_matrix_objects = []
    for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values):
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
            nt_distribution, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture proportions
    weight_sum = sum(mixture_weights)
    mixture_proportions = [weight / weight_sum for weight in mixture_weights]
    # create the mixture model
    mixture_model = SubModel.MixtureModel(mixture_proportions,
                                          rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(tree, mixture_model,
                                                     fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the output string
    output_string = ''
    if fs.fasta:
        # the output is the alignment
        arr = []
        for node in tree.gen_tips():
            arr.append(alignment.get_fasta_sequence(node.name))
        alignment_string = '\n'.join(arr)
        output_string = alignment_string
    elif fs.nex:
        # the output is the alignment and the tree
        nexus = Nexus.Nexus()
        nexus.tree = tree
        nexus.alignment = alignment
        for i in range(2):
            arr = []
            arr.append('weight: %s' % mixture_weights[i])
            arr.append('kappa: %s' % kappa_values[i])
            nexus.add_comment('category %d: %s' % (i + 1, ', '.join(arr)))
        output_string = str(nexus)
    # define the filename
    if fs.fasta:
        filename_extension = 'fasta'
    elif fs.nex:
        filename_extension = 'nex'
    filename = 'sample.' + fs.fmt
    #TODO use the correct filename extension in the output
    return output_string
Ejemplo n.º 15
0
def get_response_content(fs):
    # get the nucleotide distribution
    nt_to_weight = SnippetUtil.get_distribution(fs.nucleotides, 'nucleotide',
                                                nt_letters)
    # get the amino acid distribution
    aa_to_weight = SnippetUtil.get_distribution(fs.aminoacids, 'amino acid',
                                                aa_letters)
    # get distributions in convenient list form
    stationary_nt_distribution = [nt_to_weight[nt] for nt in nt_letters]
    aa_distribution = [aa_to_weight[aa] for aa in aa_letters]
    codon_distribution = []
    implied_stationary_nt_distribution = []
    if fs.corrected:
        # define the objective function
        objective_function = MyObjective(aa_distribution,
                                         stationary_nt_distribution)
        initial_guess = (0, 0, 0)
        iterations = 20
        best = scipy.optimize.nonlin.broyden2(objective_function,
                                              initial_guess, iterations)
        x, y, z = best
        best_mutation_weights = (1, math.exp(x), math.exp(y), math.exp(z))
        best_mutation_distribution = normalized(best_mutation_weights)
        # Given the mutation distribution and the amino acid distribution,
        # get the stationary distribution.
        result = DirectProtein.get_nt_distribution_and_aa_energies(
            best_mutation_distribution, aa_distribution)
        implied_stationary_nt_distribution, result_aa_energies = result
        # Get the codon distribution;
        # kappa doesn't matter because we are only concerned
        # with stationary distributions
        kappa = 1.0
        dpm = DirectProtein.DirectProteinRateMatrix(
            kappa, best_mutation_distribution, result_aa_energies)
        codon_distribution = dpm.get_stationary_distribution()
    elif fs.hb:
        # get the codon distribution
        unnormalized_codon_distribution = []
        for codon in codons:
            aa = Codon.g_codon_to_aa_letter[codon]
            sibling_codons = Codon.g_aa_letter_to_codons[aa]
            codon_aa_weight = aa_to_weight[aa]
            codon_nt_weight = np.prod([nt_to_weight[nt] for nt in codon])
            sibling_nt_weight_sum = sum(
                np.prod([nt_to_weight[nt] for nt in sibling])
                for sibling in sibling_codons)
            weight = codon_aa_weight * codon_nt_weight
            weight /= sibling_nt_weight_sum
            unnormalized_codon_distribution.append(weight)
        codon_distribution = normalized(unnormalized_codon_distribution)
        nt_to_weight = dict(zip(nt_letters, [0] * 4))
        for codon, p in zip(codons, codon_distribution):
            for nt in codon:
                nt_to_weight[nt] += p
        implied_stationary_nt_distribution = normalized(nt_to_weight[nt]
                                                        for nt in nt_letters)
    # start the output text string
    out = StringIO()
    # write the codon stationary distribution
    print >> out, 'estimated codon stationary distribution:'
    for codon, p in zip(codons, codon_distribution):
        print >> out, '%s : %s' % (codon, p)
    print >> out, ''
    # write the nucleotide stationary distribution
    print >> out, 'implied nucleotide stationary distribution:'
    for nt, p in zip(nt_letters, implied_stationary_nt_distribution):
        print >> out, '%s : %s' % (nt, p)
    # return the response
    return out.getvalue()
Ejemplo n.º 16
0
def get_response_content(fs):
    # get the nucleotide distribution
    nt_to_weight = SnippetUtil.get_distribution(fs.nucleotides,
            'nucleotide', nt_letters)
    # get the amino acid distribution
    aa_to_weight = SnippetUtil.get_distribution(fs.aminoacids,
            'amino acid', aa_letters)
    # get distributions in convenient list form
    stationary_nt_distribution = [nt_to_weight[nt] for nt in nt_letters]
    aa_distribution = [aa_to_weight[aa] for aa in aa_letters]
    codon_distribution = []
    implied_stationary_nt_distribution = []
    if fs.corrected:
        # define the objective function
        objective_function = MyObjective(aa_distribution,
                stationary_nt_distribution)
        initial_guess = (0, 0, 0)
        iterations = 20
        best = scipy.optimize.nonlin.broyden2(objective_function,
                initial_guess, iterations)
        x, y, z = best
        best_mutation_weights = (1, math.exp(x), math.exp(y), math.exp(z))
        best_mutation_distribution = normalized(best_mutation_weights)
        # Given the mutation distribution and the amino acid distribution,
        # get the stationary distribution.
        result = DirectProtein.get_nt_distribution_and_aa_energies(
                best_mutation_distribution, aa_distribution)
        implied_stationary_nt_distribution, result_aa_energies = result
        # Get the codon distribution;
        # kappa doesn't matter because we are only concerned
        # with stationary distributions
        kappa = 1.0
        dpm = DirectProtein.DirectProteinRateMatrix(
                kappa, best_mutation_distribution, result_aa_energies)
        codon_distribution = dpm.get_stationary_distribution()
    elif fs.hb:
        # get the codon distribution
        unnormalized_codon_distribution = []
        for codon in codons:
            aa = Codon.g_codon_to_aa_letter[codon]
            sibling_codons = Codon.g_aa_letter_to_codons[aa]
            codon_aa_weight = aa_to_weight[aa]
            codon_nt_weight = np.prod([nt_to_weight[nt] for nt in codon])
            sibling_nt_weight_sum = sum(np.prod([nt_to_weight[nt]
                for nt in sibling]) for sibling in sibling_codons)
            weight = codon_aa_weight * codon_nt_weight
            weight /= sibling_nt_weight_sum
            unnormalized_codon_distribution.append(weight)
        codon_distribution = normalized(unnormalized_codon_distribution)
        nt_to_weight = dict(zip(nt_letters, [0]*4))
        for codon, p in zip(codons, codon_distribution):
            for nt in codon:
                nt_to_weight[nt] += p
        implied_stationary_nt_distribution = normalized(nt_to_weight[nt]
                for nt in nt_letters)
    # start the output text string
    out = StringIO()
    # write the codon stationary distribution
    print >> out, 'estimated codon stationary distribution:'
    for codon, p in zip(codons, codon_distribution):
        print >> out, '%s : %s' % (codon, p)
    print >> out, ''
    # write the nucleotide stationary distribution
    print >> out, 'implied nucleotide stationary distribution:'
    for nt, p in zip(nt_letters, implied_stationary_nt_distribution):
        print >> out, '%s : %s' % (nt, p)
    # return the response
    return out.getvalue()
Ejemplo n.º 17
0
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # parse the tree
    try:
        tree = Newick.parse(fs.tree, Newick.NewickTree)
        tree.assert_valid()
    except Newick.NewickSyntaxError as e:
        raise HandlingError(str(e))
    # get the mixture weights
    mixture_weights = [fs.weight_a, fs.weight_b]
    # get the kappa values
    kappa_values = [fs.kappa_a, fs.kappa_b]
    # get the nucleotide distributions
    frequency_strings = (fs.frequency_a, fs.frequency_b)
    nucleotide_distributions = []
    for nt_string in frequency_strings:
        d = SnippetUtil.get_distribution(nt_string, 'nucleotide', list('ACGT'))
        nucleotide_distributions.append(d)
    # create the nucleotide HKY rate matrix objects
    rate_matrix_objects = []
    for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values):
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
                nt_distribution, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture proportions
    weight_sum = sum(mixture_weights)
    mixture_proportions = [weight / weight_sum for weight in mixture_weights]
    # create the mixture model
    mixture_model = SubModel.MixtureModel(
            mixture_proportions, rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(
                tree, mixture_model, fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the output string
    output_string = ''
    if fs.fasta:
        # the output is the alignment
        arr = []
        for node in tree.gen_tips():
            arr.append(alignment.get_fasta_sequence(node.name))
        alignment_string = '\n'.join(arr)
        output_string = alignment_string
    elif fs.nex:
        # the output is the alignment and the tree
        nexus = Nexus.Nexus()
        nexus.tree = tree
        nexus.alignment = alignment
        for i in range(2):
            arr = []
            arr.append('weight: %s' % mixture_weights[i])
            arr.append('kappa: %s' % kappa_values[i])
            nexus.add_comment('category %d: %s' % (i+1, ', '.join(arr)))
        output_string = str(nexus)
    # define the filename
    if fs.fasta:
        filename_extension = 'fasta'
    elif fs.nex:
        filename_extension = 'nex'
    filename = 'sample.' + fs.fmt
    #TODO use the correct filename extension in the output
    return output_string