Example #1
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the alignment
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # define the jukes cantor rate matrix
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
        dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix,
                                               ordered_states)
    # simulate the ancestral alignment
    try:
        alignment = PhyLikelihood.simulate_ancestral_alignment(
            tree, alignment, rate_matrix_object)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment string using an ordering defined by the tree
    arr = []
    for node in tree.preorder():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the response
    return '\n'.join(arr) + '\n'
Example #2
0
 def __call__(self, X_logs):
     """
     The vth entry of X corresponds to the log rate of the branch above v.
     Return the quantity to be minimized (the neg log likelihood).
     @param X: vector of branch rate logs
     @return: negative log likelihood
     """
     X = [math.exp(x) for x in X_logs]
     B_subs = {}
     for v_parent, v_child in self.R:
         edge = frozenset([v_parent, v_child])
         r = X[v_child]
         t = self.B[edge]
         B_subs[edge] = r * t
     newick_string = FtreeIO.RBN_to_newick(self.R, B_subs, self.N_leaves)
     tree = Newick.parse(newick_string, Newick.NewickTree)
     # define the rate matrix object; horrible
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() 
     ordered_states = list('ACGT') 
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(
             dictionary_rate_matrix, ordered_states, ordered_states)
     rate_matrix_object = RateMatrix.RateMatrix(
             row_major_rate_matrix, ordered_states) 
     # get the log likelihood
     ll = PhyLikelihood.get_log_likelihood(
             tree, self.alignment, rate_matrix_object)
     return -ll
Example #3
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default nexus string
    tree = get_sample_tree()
    mixture_model = get_sample_mixture_model()
    ncols = 200
    seed = 314159
    alignment = PhyLikelihood.simulate_alignment(tree, mixture_model, ncols,
                                                 seed)
    nexus = Nexus.Nexus()
    nexus.tree = tree
    nexus.alignment = alignment
    nexus_string = str(nexus)
    # define the form objects
    form_objects = [
        Form.MultiLine('nexus', 'nexus data', nexus_string),
        Form.Integer('ncategories',
                     'use this many categories',
                     3,
                     low=1,
                     high=5),
        Form.CheckGroup('options', 'output options', [
            Form.CheckItem('outdebug', 'show debug info'),
            Form.CheckItem('outmodel', 'show the model'),
            Form.CheckItem('outcheck', 'show the likelihood and rates', True)
        ])
    ]
    return form_objects
Example #4
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default nexus string
    tree = get_sample_tree()
    mixture_model = get_sample_mixture_model()
    ncols = 200
    seed = 314159
    alignment = PhyLikelihood.simulate_alignment(
            tree, mixture_model, ncols, seed)
    nexus = Nexus.Nexus()
    nexus.tree = tree
    nexus.alignment = alignment
    nexus_string = str(nexus)
    # define the form objects
    form_objects = [
            Form.MultiLine('nexus', 'nexus data', nexus_string),
            Form.Integer('ncategories', 'use this many categories',
                3, low=1, high=5),
            Form.CheckGroup('options', 'output options', [
                Form.CheckItem('outdebug', 'show debug info'),
                Form.CheckItem('outmodel', 'show the model'),
                Form.CheckItem('outcheck', 'show the likelihood and rates',
                    True)])]
    return form_objects
Example #5
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the alignment
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # define the jukes cantor rate matrix
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states)
    # simulate the ancestral alignment
    try:
        alignment = PhyLikelihood.simulate_ancestral_alignment(
                tree, alignment, rate_matrix_object)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment string using an ordering defined by the tree
    arr = []
    for node in tree.preorder():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the response
    return '\n'.join(arr) + '\n'
Example #6
0
 def test_likelihood_calculation(self):
     # get a tree
     tree = Newick.parse(sample_tree_string, Newick.NewickTree)
     # get a model
     input_xml_string = get_sample_xml_string()
     model = deserialize_mixture_model(input_xml_string)
     # get an alignment
     alignment = Fasta.CodonAlignment(StringIO(long_sample_codon_alignment_string))
     # get the likelihood
     log_likelihood = PhyLikelihood.get_log_likelihood(tree, alignment, model)
Example #7
0
 def test_likelihood_calculation(self):
     # get a tree
     tree = Newick.parse(sample_tree_string, Newick.NewickTree)
     # get a model
     input_xml_string = get_sample_xml_string()
     model = deserialize_mixture_model(input_xml_string)
     # get an alignment
     alignment = Fasta.CodonAlignment(
         StringIO(long_sample_codon_alignment_string))
     # get the likelihood
     log_likelihood = PhyLikelihood.get_log_likelihood(
         tree, alignment, model)
Example #8
0
 def gen_distance_matrices(self, count, max_steps):
     """
     Yield (ordered sequence list, distance matrix) pairs .
     The generator will stop if it sees that it cannot meet its goal
     in the allotted number of steps.
     @param count: the requested number of distance matrices
     @param max_steps: an upper bound on the allowed number of steps
     """
     # define the jukes cantor rate matrix
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
     ordered_states = list('ACGT')
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(
         dictionary_rate_matrix, ordered_states, ordered_states)
     model = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states)
     # record the requested number of samples
     self.requested_matrix_count = count
     # do some rejection sampling
     while True:
         if self.get_complexity() >= max_steps:
             break
         if self.accepted_sample_count >= count:
             break
         # simulate an alignment from the tree
         alignment = PhyLikelihood.simulate_alignment(
             self.tree, model, self.sequence_length)
         # extract the ordered list of sequences from the alignment object
         name_to_sequence = dict(zip(alignment.headers,
                                     alignment.sequences))
         sequence_list = [
             name_to_sequence[name] for name in self.ordered_names
         ]
         # get the estimated distance matrix
         distance_matrix = JC69.get_ML_distance_matrix(sequence_list)
         # look for degeneracies
         has_zero_off_diagonal = False
         has_inf_off_diagonal = False
         for i, row in enumerate(distance_matrix):
             for j, value in enumerate(row):
                 if i != j:
                     if value == 0.0:
                         has_zero_off_diagonal = True
                     if value == float('inf'):
                         has_inf_off_diagonal = True
         if has_zero_off_diagonal:
             self.rejected_zero_sample_count += 1
         elif has_inf_off_diagonal:
             self.rejected_inf_sample_count += 1
         else:
             self.accepted_sample_count += 1
             yield sequence_list, distance_matrix
Example #9
0
 def gen_distance_matrices(self, count, max_steps):
     """
     Yield (ordered sequence list, distance matrix) pairs .
     The generator will stop if it sees that it cannot meet its goal
     in the allotted number of steps.
     @param count: the requested number of distance matrices
     @param max_steps: an upper bound on the allowed number of steps
     """
     # define the jukes cantor rate matrix
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
     ordered_states = list('ACGT')
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(
             dictionary_rate_matrix, ordered_states, ordered_states)
     model = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states)
     # record the requested number of samples
     self.requested_matrix_count = count
     # do some rejection sampling
     while True:
         if self.get_complexity() >= max_steps:
             break
         if self.accepted_sample_count >= count:
             break
         # simulate an alignment from the tree
         alignment = PhyLikelihood.simulate_alignment(
                 self.tree, model, self.sequence_length)
         # extract the ordered list of sequences from the alignment object
         name_to_sequence = dict(zip(alignment.headers, alignment.sequences))
         sequence_list = [name_to_sequence[name]
                 for name in self.ordered_names]
         # get the estimated distance matrix
         distance_matrix = JC69.get_ML_distance_matrix(sequence_list)
         # look for degeneracies
         has_zero_off_diagonal = False
         has_inf_off_diagonal = False
         for i, row in enumerate(distance_matrix):
             for j, value in enumerate(row):
                 if i != j:
                     if value == 0.0:
                         has_zero_off_diagonal = True
                     if value == float('inf'):
                         has_inf_off_diagonal = True
         if has_zero_off_diagonal:
             self.rejected_zero_sample_count += 1
         elif has_inf_off_diagonal:
             self.rejected_inf_sample_count += 1
         else:
             self.accepted_sample_count += 1
             yield sequence_list, distance_matrix
Example #10
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the normalized Direct RNA mixture model
    mixture_model = DirectRna.deserialize_mixture_model(fs.model)
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(tree, mixture_model,
                                                     fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment
    arr = []
    for node in tree.gen_tips():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the alignment string
    return '\n'.join(arr) + '\n'
Example #11
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the normalized Direct RNA mixture model
    mixture_model = DirectRna.deserialize_mixture_model(fs.model)
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(tree,
                mixture_model, fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment
    arr = []
    for node in tree.gen_tips():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the alignment string
    return '\n'.join(arr) + '\n'
Example #12
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the alignment
    try:
        alignment = Fasta.Alignment(fs.fasta.splitlines())
        alignment.force_nucleotide()
    except Fasta.AlignmentError as e:
        raise HandlingError(e)
    # get the log likelihood
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix()
    ordered_states = list('ACGT')
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states)
    log_likelihood = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # return the response
    return str(log_likelihood) + '\n'
Example #13
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the mixture weights
    weights = [fs.weight_a, fs.weight_b, fs.weight_c]
    # get the matrices
    matrices = [fs.matrix_a, fs.matrix_b, fs.matrix_c]
    for R in matrices:
        if R.shape != (4, 4):
            msg = 'expected each nucleotide rate matrix to be 4x4'
            raise HandlingError(msg)
    # create the mixture proportions
    weight_sum = sum(weights)
    mixture_proportions = [weight / weight_sum for weight in weights]
    # create the rate matrix objects
    ordered_states = list('ACGT')
    rate_matrix_objects = []
    for R in matrices:
        rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture model
    mixture_model = SubModel.MixtureModel(mixture_proportions,
                                          rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(tree, mixture_model,
                                                     fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment
    arr = []
    for node in tree.gen_tips():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the alignment string
    return '\n'.join(arr) + '\n'
Example #14
0
def get_response_content(fs):
    # get the tree
    tree = Newick.parse(fs.tree, Newick.NewickTree)
    tree.assert_valid()
    # get the mixture weights
    weights = [fs.weight_a, fs.weight_b, fs.weight_c]
    # get the matrices
    matrices = [fs.matrix_a, fs.matrix_b, fs.matrix_c]
    for R in matrices:
        if R.shape != (4,4):
            msg = 'expected each nucleotide rate matrix to be 4x4'
            raise HandlingError(msg)
    # create the mixture proportions
    weight_sum = sum(weights)
    mixture_proportions = [weight / weight_sum for weight in weights]
    # create the rate matrix objects
    ordered_states = list('ACGT')
    rate_matrix_objects = []
    for R in matrices:
        rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture model
    mixture_model = SubModel.MixtureModel(mixture_proportions,
            rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(tree,
                mixture_model, fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the alignment
    arr = []
    for node in tree.gen_tips():
        arr.append(alignment.get_fasta_sequence(node.name))
    # return the alignment string
    return '\n'.join(arr) + '\n'
Example #15
0
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # parse the tree
    try:
        tree = Newick.parse(fs.tree, Newick.NewickTree)
        tree.assert_valid()
    except Newick.NewickSyntaxError as e:
        raise HandlingError(str(e))
    # get the normalized model
    mixture_model = deserialize_mixture_model(fs.model)
    # sample the alignment, possibly using a specified seed
    try:
        alignment = PhyLikelihood.simulate_alignment(tree, mixture_model,
                                                     fs.ncols, fs.seed)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the output string
    output_string = ''
    if fs.fastaformat:
        # the output is the alignment
        arr = []
        for node in tree.gen_tips():
            arr.append(alignment.get_fasta_sequence(node.name))
        alignment_string = '\n'.join(arr)
        output_string = alignment_string
    elif fs.nexusformat:
        # the output is the alignment and the tree
        nexus = Nexus.Nexus()
        nexus.tree = tree
        nexus.alignment = alignment
        output_string = str(nexus)
    # print the results
    response_headers = [('Content-Type', 'text/plain')]
    return response_headers, output_string
Example #16
0
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # parse the tree
    try:
        tree = Newick.parse(fs.tree, Newick.NewickTree)
        tree.assert_valid()
    except Newick.NewickSyntaxError as e:
        raise HandlingError(str(e))
    # get the normalized model
    mixture_model = deserialize_mixture_model(fs.model)
    # sample the alignment, possibly using a specified seed
    try:
        alignment = PhyLikelihood.simulate_alignment(tree, mixture_model, fs.ncols, fs.seed)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the output string
    output_string = ""
    if fs.fastaformat:
        # the output is the alignment
        arr = []
        for node in tree.gen_tips():
            arr.append(alignment.get_fasta_sequence(node.name))
        alignment_string = "\n".join(arr)
        output_string = alignment_string
    elif fs.nexusformat:
        # the output is the alignment and the tree
        nexus = Nexus.Nexus()
        nexus.tree = tree
        nexus.alignment = alignment
        output_string = str(nexus)
    # print the results
    response_headers = [("Content-Type", "text/plain")]
    return response_headers, output_string
Example #17
0
def get_response_content(fs):
    # init the response and get the user variables
    out = StringIO()
    nleaves = fs.nleaves
    nvertices = nleaves * 2 - 1
    nbranches = nvertices - 1
    nsites = fs.nsites
    # sample the coalescent tree with timelike branch lengths
    R, B = kingman.sample(fs.nleaves)
    r = Ftree.R_to_root(R)
    # get the leaf vertex names
    N = dict(zip(range(nleaves), string.uppercase[:nleaves]))
    N_leaves = dict(N)
    # get the internal vertex names
    v_to_leaves = R_to_v_to_leaves(R)
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            N[v] = ''.join(sorted(N[leaf] for leaf in leaves))
    # get vertex ages
    v_to_age = kingman.RB_to_v_to_age(R, B)
    # sample the rates on the branches
    b_to_rate = sample_b_to_rate(R)
    xycorr = get_correlation(R, b_to_rate)
    # define B_subs in terms of substitutions instead of time
    B_subs = dict((p, t * b_to_rate[p]) for p, t in B.items())
    # sample the alignment
    v_to_seq = sample_v_to_seq(R, B_subs, nsites)
    # get the log likelihood; this is kind of horrible
    pairs = [(N[v], ''.join(v_to_seq[v])) for v in range(nleaves)]
    headers, sequences = zip(*pairs)
    alignment = Fasta.create_alignment(headers, sequences)
    newick_string = FtreeIO.RBN_to_newick(R, B_subs, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() 
    ordered_states = list('ACGT') 
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states) 
    ll = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are all 1.0
    newick_string = FtreeIO.RBN_to_newick(R, B, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    ll_unity = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are numerically optimized
    # TODO incorporate the result into the xml file
    # TODO speed up the likelihood evaluation (beagle? C module?)
    #f = Opt(R, B, N_leaves, alignment)
    #X_logs = [0.0] * nbranches
    #result = scipy.optimize.fmin(f, X_logs, full_output=True)
    #print result
    #
    print >> out, '<?xml version="1.0"?>'
    print >> out, '<beast>'
    print >> out
    print >> out, '<!-- actual rate autocorrelation', xycorr, '-->'
    print >> out, '<!-- actual root height', v_to_age[r], '-->'
    print >> out, '<!-- actual log likelihood', ll, '-->'
    print >> out, '<!-- ll if rates were unity', ll_unity, '-->'
    print >> out
    print >> out, '<!--'
    print >> out, 'predefine the taxa as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_leaf_taxon_defn(list(string.uppercase[:nleaves]))
    print >> out
    print >> out, '<!--'
    print >> out, 'define the alignment as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_alignment_defn(leaves, N, v_to_seq)
    print >> out
    print >> out, '<!--'
    print >> out, 'specify the starting tree as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, get_starting_tree_defn(R, B, N_leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'connect the tree model as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, g_tree_model_defn
    print >> out
    print >> out, g_uncorrelated_relaxed_clock_info
    print >> out
    """
    print >> out, '<!--'
    print >> out, 'create a list of taxa for which to constrain the mrca as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_subset_defn(N, v, leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'create a tmrcaStatistic that will record the height as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_stat_defn(N[v])
    """
    print >> out
    print >> out, g_likelihood_info
    print >> out
    print >> out, '<!--'
    print >> out, 'run the mcmc'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    print >> out, get_mcmc_defn(v_to_leaves, v_to_age, N)
    print >> out
    print >> out, '</beast>'
    # return the response
    return out.getvalue()
Example #18
0
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # read the nexus data
    nexus = Nexus.Nexus()
    try:
        nexus.load(StringIO(fs.nexus))
    except Nexus.NexusError as e:
        raise HandlingError(e)
    # move to the data directory
    original_directory = os.getcwd()
    os.chdir(Config.data_path)
    # create the batch file
    category_suffixes = [str(category+1) for category in range(fs.ncategories)]
    hky_hyphy_model = get_hyphy_model_string(hyphy_nexus, fs.ncategories)
    with open(hyphy_bf, 'wt') as fout:
        print >> fout, hky_hyphy_model 
    # create the nexus file
    with open(hyphy_nexus, 'wt') as fout:
        print >> fout, nexus
    # run hyphy
    p = subprocess.Popen([Config.hyphy_exe_path, hyphy_bf],
            close_fds=True, stdout=subprocess.PIPE)
    hyphy_output = p.stdout.read()
    # move back to the original directory
    os.chdir(original_directory)
    # read the hyphy output
    ns = Hyphy.get_hyphy_namespace(StringIO(hyphy_output))
    out = StringIO()
    if fs.outdebug:
        print >> out, get_hyphy_debug_info(hyphy_output)
        print >> out, ''
        print >> out, ''
    if fs.outmodel:
        print >> out, 'hyphy model:'
        print >> out, '---------------------------------------'
        print >> out, hky_hyphy_model
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    if True:
        print >> out, 'reformatted hyphy output:'
        print >> out, '---------------------------------------'
        # show the log likelihood
        print >> out, 'log likelihood :', ns.lnL
        print >> out, ''
        # show the kappa value
        print >> out, 'kappa :', ns.kappa
        print >> out, ''
        category_blocks = []
        for suffix in category_suffixes:
            block = StringIO()
            print >> block, 'mixing proportion :', getattr(ns, 'catFreq'+suffix)
            print >> block, 'tree :', getattr(ns, 'tree'+suffix).get_newick_string()
            for nt in list('ACGT'):
                print >> block, nt, ':', getattr(ns, 'eqFreq'+nt+suffix)
            category_blocks.append(block.getvalue().strip())
        print >> out, '\n\n'.join(category_blocks)
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    if fs.outcheck:
        # get the raw matrices
        matrices = []
        for suffix in category_suffixes:
            nt_dict = {}
            for nt in list('ACGT'):
                nt_dict[nt] = getattr(ns, 'eqFreq'+nt+suffix)
            total = float(sum(nt_dict.values()))
            nt_dict = dict((k, v/total) for k, v in nt_dict.items())
            matrix = RateMatrix.get_unscaled_hky85_rate_matrix(
                    nt_dict, ns.kappa)
            matrices.append(matrix)
        raw_matrix_rates = [matrix.get_expected_rate() for matrix in matrices]
        category_weights = []
        for suffix in category_suffixes:
            category_weights.append(getattr(ns, 'catFreq'+suffix))
        total = float(sum(category_weights))
        category_distribution = [weight / total for weight in category_weights]
        mixture_model = SubModel.MixtureModel(category_distribution, matrices)
        raw_mixture_rate = mixture_model.get_expected_rate()
        # rescale the mixture model
        # 0.75 is the expected rate of the initial model
        r1 = 0.75
        scaling_factor = r1
        mixture_model.rescale(scaling_factor)
        recomputed_log_likelihood = PhyLikelihood.get_log_likelihood(
                nexus.tree, nexus.alignment, mixture_model)
        print >> out, 'recomputed likelihood and rates:'
        print >> out, '---------------------------------------'
        print >> out, 'log likelihood :', recomputed_log_likelihood
        print >> out, ''
        print >> out, 'rate :', raw_mixture_rate
        print >> out, ''
        for rate, suffix in zip(raw_matrix_rates, category_suffixes):
            print >> out, 'rate%s : %s' % (suffix, rate)
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    # return the response
    return out.getvalue()
Example #19
0
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # parse the tree
    try:
        tree = Newick.parse(fs.tree, Newick.NewickTree)
        tree.assert_valid()
    except Newick.NewickSyntaxError as e:
        raise HandlingError(str(e))
    # get the mixture weights
    mixture_weights = [fs.weight_a, fs.weight_b]
    # get the kappa values
    kappa_values = [fs.kappa_a, fs.kappa_b]
    # get the nucleotide distributions
    frequency_strings = (fs.frequency_a, fs.frequency_b)
    nucleotide_distributions = []
    for nt_string in frequency_strings:
        d = SnippetUtil.get_distribution(nt_string, 'nucleotide', list('ACGT'))
        nucleotide_distributions.append(d)
    # create the nucleotide HKY rate matrix objects
    rate_matrix_objects = []
    for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values):
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
            nt_distribution, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture proportions
    weight_sum = sum(mixture_weights)
    mixture_proportions = [weight / weight_sum for weight in mixture_weights]
    # create the mixture model
    mixture_model = SubModel.MixtureModel(mixture_proportions,
                                          rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(tree, mixture_model,
                                                     fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the output string
    output_string = ''
    if fs.fasta:
        # the output is the alignment
        arr = []
        for node in tree.gen_tips():
            arr.append(alignment.get_fasta_sequence(node.name))
        alignment_string = '\n'.join(arr)
        output_string = alignment_string
    elif fs.nex:
        # the output is the alignment and the tree
        nexus = Nexus.Nexus()
        nexus.tree = tree
        nexus.alignment = alignment
        for i in range(2):
            arr = []
            arr.append('weight: %s' % mixture_weights[i])
            arr.append('kappa: %s' % kappa_values[i])
            nexus.add_comment('category %d: %s' % (i + 1, ', '.join(arr)))
        output_string = str(nexus)
    # define the filename
    if fs.fasta:
        filename_extension = 'fasta'
    elif fs.nex:
        filename_extension = 'nex'
    filename = 'sample.' + fs.fmt
    #TODO use the correct filename extension in the output
    return output_string
Example #20
0
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # read the nexus data
    nexus = Nexus.Nexus()
    try:
        nexus.load(StringIO(fs.nexus))
    except Nexus.NexusError as e:
        raise HandlingError(e)
    # move to the data directory
    original_directory = os.getcwd()
    os.chdir(Config.data_path)
    # create the batch file
    category_suffixes = [
        str(category + 1) for category in range(fs.ncategories)
    ]
    hky_hyphy_model = get_hyphy_model_string(hyphy_nexus, fs.ncategories)
    with open(hyphy_bf, 'wt') as fout:
        print >> fout, hky_hyphy_model
    # create the nexus file
    with open(hyphy_nexus, 'wt') as fout:
        print >> fout, nexus
    # run hyphy
    p = subprocess.Popen([Config.hyphy_exe_path, hyphy_bf],
                         close_fds=True,
                         stdout=subprocess.PIPE)
    hyphy_output = p.stdout.read()
    # move back to the original directory
    os.chdir(original_directory)
    # read the hyphy output
    ns = Hyphy.get_hyphy_namespace(StringIO(hyphy_output))
    out = StringIO()
    if fs.outdebug:
        print >> out, get_hyphy_debug_info(hyphy_output)
        print >> out, ''
        print >> out, ''
    if fs.outmodel:
        print >> out, 'hyphy model:'
        print >> out, '---------------------------------------'
        print >> out, hky_hyphy_model
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    if True:
        print >> out, 'reformatted hyphy output:'
        print >> out, '---------------------------------------'
        # show the log likelihood
        print >> out, 'log likelihood :', ns.lnL
        print >> out, ''
        # show the kappa value
        print >> out, 'kappa :', ns.kappa
        print >> out, ''
        category_blocks = []
        for suffix in category_suffixes:
            block = StringIO()
            print >> block, 'mixing proportion :', getattr(
                ns, 'catFreq' + suffix)
            print >> block, 'tree :', getattr(ns, 'tree' +
                                              suffix).get_newick_string()
            for nt in list('ACGT'):
                print >> block, nt, ':', getattr(ns, 'eqFreq' + nt + suffix)
            category_blocks.append(block.getvalue().strip())
        print >> out, '\n\n'.join(category_blocks)
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    if fs.outcheck:
        # get the raw matrices
        matrices = []
        for suffix in category_suffixes:
            nt_dict = {}
            for nt in list('ACGT'):
                nt_dict[nt] = getattr(ns, 'eqFreq' + nt + suffix)
            total = float(sum(nt_dict.values()))
            nt_dict = dict((k, v / total) for k, v in nt_dict.items())
            matrix = RateMatrix.get_unscaled_hky85_rate_matrix(
                nt_dict, ns.kappa)
            matrices.append(matrix)
        raw_matrix_rates = [matrix.get_expected_rate() for matrix in matrices]
        category_weights = []
        for suffix in category_suffixes:
            category_weights.append(getattr(ns, 'catFreq' + suffix))
        total = float(sum(category_weights))
        category_distribution = [weight / total for weight in category_weights]
        mixture_model = SubModel.MixtureModel(category_distribution, matrices)
        raw_mixture_rate = mixture_model.get_expected_rate()
        # rescale the mixture model
        # 0.75 is the expected rate of the initial model
        r1 = 0.75
        scaling_factor = r1
        mixture_model.rescale(scaling_factor)
        recomputed_log_likelihood = PhyLikelihood.get_log_likelihood(
            nexus.tree, nexus.alignment, mixture_model)
        print >> out, 'recomputed likelihood and rates:'
        print >> out, '---------------------------------------'
        print >> out, 'log likelihood :', recomputed_log_likelihood
        print >> out, ''
        print >> out, 'rate :', raw_mixture_rate
        print >> out, ''
        for rate, suffix in zip(raw_matrix_rates, category_suffixes):
            print >> out, 'rate%s : %s' % (suffix, rate)
        print >> out, '---------------------------------------'
        print >> out, ''
        print >> out, ''
    # return the response
    return out.getvalue()
Example #21
0
def get_response(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # parse the tree
    try:
        tree = Newick.parse(fs.tree, Newick.NewickTree)
        tree.assert_valid()
    except Newick.NewickSyntaxError as e:
        raise HandlingError(str(e))
    # get the mixture weights
    mixture_weights = [fs.weight_a, fs.weight_b]
    # get the kappa values
    kappa_values = [fs.kappa_a, fs.kappa_b]
    # get the nucleotide distributions
    frequency_strings = (fs.frequency_a, fs.frequency_b)
    nucleotide_distributions = []
    for nt_string in frequency_strings:
        d = SnippetUtil.get_distribution(nt_string, 'nucleotide', list('ACGT'))
        nucleotide_distributions.append(d)
    # create the nucleotide HKY rate matrix objects
    rate_matrix_objects = []
    for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values):
        rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(
                nt_distribution, kappa)
        rate_matrix_objects.append(rate_matrix_object)
    # create the mixture proportions
    weight_sum = sum(mixture_weights)
    mixture_proportions = [weight / weight_sum for weight in mixture_weights]
    # create the mixture model
    mixture_model = SubModel.MixtureModel(
            mixture_proportions, rate_matrix_objects)
    # normalize the mixture model
    mixture_model.normalize()
    # simulate the alignment
    try:
        alignment = PhyLikelihood.simulate_alignment(
                tree, mixture_model, fs.ncols)
    except PhyLikelihood.SimulationError as e:
        raise HandlingError(e)
    # get the output string
    output_string = ''
    if fs.fasta:
        # the output is the alignment
        arr = []
        for node in tree.gen_tips():
            arr.append(alignment.get_fasta_sequence(node.name))
        alignment_string = '\n'.join(arr)
        output_string = alignment_string
    elif fs.nex:
        # the output is the alignment and the tree
        nexus = Nexus.Nexus()
        nexus.tree = tree
        nexus.alignment = alignment
        for i in range(2):
            arr = []
            arr.append('weight: %s' % mixture_weights[i])
            arr.append('kappa: %s' % kappa_values[i])
            nexus.add_comment('category %d: %s' % (i+1, ', '.join(arr)))
        output_string = str(nexus)
    # define the filename
    if fs.fasta:
        filename_extension = 'fasta'
    elif fs.nex:
        filename_extension = 'nex'
    filename = 'sample.' + fs.fmt
    #TODO use the correct filename extension in the output
    return output_string