def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the alignment try: alignment = Fasta.Alignment(fs.fasta.splitlines()) alignment.force_nucleotide() except Fasta.AlignmentError as e: raise HandlingError(e) # define the jukes cantor rate matrix dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states) # simulate the ancestral alignment try: alignment = PhyLikelihood.simulate_ancestral_alignment( tree, alignment, rate_matrix_object) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the alignment string using an ordering defined by the tree arr = [] for node in tree.preorder(): arr.append(alignment.get_fasta_sequence(node.name)) # return the response return '\n'.join(arr) + '\n'
def __call__(self, X_logs): """ The vth entry of X corresponds to the log rate of the branch above v. Return the quantity to be minimized (the neg log likelihood). @param X: vector of branch rate logs @return: negative log likelihood """ X = [math.exp(x) for x in X_logs] B_subs = {} for v_parent, v_child in self.R: edge = frozenset([v_parent, v_child]) r = X[v_child] t = self.B[edge] B_subs[edge] = r * t newick_string = FtreeIO.RBN_to_newick(self.R, B_subs, self.N_leaves) tree = Newick.parse(newick_string, Newick.NewickTree) # define the rate matrix object; horrible dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix( row_major_rate_matrix, ordered_states) # get the log likelihood ll = PhyLikelihood.get_log_likelihood( tree, self.alignment, rate_matrix_object) return -ll
def get_form(): """ @return: the body of a form """ # define the default nexus string tree = get_sample_tree() mixture_model = get_sample_mixture_model() ncols = 200 seed = 314159 alignment = PhyLikelihood.simulate_alignment(tree, mixture_model, ncols, seed) nexus = Nexus.Nexus() nexus.tree = tree nexus.alignment = alignment nexus_string = str(nexus) # define the form objects form_objects = [ Form.MultiLine('nexus', 'nexus data', nexus_string), Form.Integer('ncategories', 'use this many categories', 3, low=1, high=5), Form.CheckGroup('options', 'output options', [ Form.CheckItem('outdebug', 'show debug info'), Form.CheckItem('outmodel', 'show the model'), Form.CheckItem('outcheck', 'show the likelihood and rates', True) ]) ] return form_objects
def get_form(): """ @return: the body of a form """ # define the default nexus string tree = get_sample_tree() mixture_model = get_sample_mixture_model() ncols = 200 seed = 314159 alignment = PhyLikelihood.simulate_alignment( tree, mixture_model, ncols, seed) nexus = Nexus.Nexus() nexus.tree = tree nexus.alignment = alignment nexus_string = str(nexus) # define the form objects form_objects = [ Form.MultiLine('nexus', 'nexus data', nexus_string), Form.Integer('ncategories', 'use this many categories', 3, low=1, high=5), Form.CheckGroup('options', 'output options', [ Form.CheckItem('outdebug', 'show debug info'), Form.CheckItem('outmodel', 'show the model'), Form.CheckItem('outcheck', 'show the likelihood and rates', True)])] return form_objects
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the alignment try: alignment = Fasta.Alignment(fs.fasta.splitlines()) alignment.force_nucleotide() except Fasta.AlignmentError as e: raise HandlingError(e) # define the jukes cantor rate matrix dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix( row_major_rate_matrix, ordered_states) # simulate the ancestral alignment try: alignment = PhyLikelihood.simulate_ancestral_alignment( tree, alignment, rate_matrix_object) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the alignment string using an ordering defined by the tree arr = [] for node in tree.preorder(): arr.append(alignment.get_fasta_sequence(node.name)) # return the response return '\n'.join(arr) + '\n'
def test_likelihood_calculation(self): # get a tree tree = Newick.parse(sample_tree_string, Newick.NewickTree) # get a model input_xml_string = get_sample_xml_string() model = deserialize_mixture_model(input_xml_string) # get an alignment alignment = Fasta.CodonAlignment(StringIO(long_sample_codon_alignment_string)) # get the likelihood log_likelihood = PhyLikelihood.get_log_likelihood(tree, alignment, model)
def test_likelihood_calculation(self): # get a tree tree = Newick.parse(sample_tree_string, Newick.NewickTree) # get a model input_xml_string = get_sample_xml_string() model = deserialize_mixture_model(input_xml_string) # get an alignment alignment = Fasta.CodonAlignment( StringIO(long_sample_codon_alignment_string)) # get the likelihood log_likelihood = PhyLikelihood.get_log_likelihood( tree, alignment, model)
def gen_distance_matrices(self, count, max_steps): """ Yield (ordered sequence list, distance matrix) pairs . The generator will stop if it sees that it cannot meet its goal in the allotted number of steps. @param count: the requested number of distance matrices @param max_steps: an upper bound on the allowed number of steps """ # define the jukes cantor rate matrix dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) model = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states) # record the requested number of samples self.requested_matrix_count = count # do some rejection sampling while True: if self.get_complexity() >= max_steps: break if self.accepted_sample_count >= count: break # simulate an alignment from the tree alignment = PhyLikelihood.simulate_alignment( self.tree, model, self.sequence_length) # extract the ordered list of sequences from the alignment object name_to_sequence = dict(zip(alignment.headers, alignment.sequences)) sequence_list = [ name_to_sequence[name] for name in self.ordered_names ] # get the estimated distance matrix distance_matrix = JC69.get_ML_distance_matrix(sequence_list) # look for degeneracies has_zero_off_diagonal = False has_inf_off_diagonal = False for i, row in enumerate(distance_matrix): for j, value in enumerate(row): if i != j: if value == 0.0: has_zero_off_diagonal = True if value == float('inf'): has_inf_off_diagonal = True if has_zero_off_diagonal: self.rejected_zero_sample_count += 1 elif has_inf_off_diagonal: self.rejected_inf_sample_count += 1 else: self.accepted_sample_count += 1 yield sequence_list, distance_matrix
def gen_distance_matrices(self, count, max_steps): """ Yield (ordered sequence list, distance matrix) pairs . The generator will stop if it sees that it cannot meet its goal in the allotted number of steps. @param count: the requested number of distance matrices @param max_steps: an upper bound on the allowed number of steps """ # define the jukes cantor rate matrix dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) model = RateMatrix.RateMatrix(row_major_rate_matrix, ordered_states) # record the requested number of samples self.requested_matrix_count = count # do some rejection sampling while True: if self.get_complexity() >= max_steps: break if self.accepted_sample_count >= count: break # simulate an alignment from the tree alignment = PhyLikelihood.simulate_alignment( self.tree, model, self.sequence_length) # extract the ordered list of sequences from the alignment object name_to_sequence = dict(zip(alignment.headers, alignment.sequences)) sequence_list = [name_to_sequence[name] for name in self.ordered_names] # get the estimated distance matrix distance_matrix = JC69.get_ML_distance_matrix(sequence_list) # look for degeneracies has_zero_off_diagonal = False has_inf_off_diagonal = False for i, row in enumerate(distance_matrix): for j, value in enumerate(row): if i != j: if value == 0.0: has_zero_off_diagonal = True if value == float('inf'): has_inf_off_diagonal = True if has_zero_off_diagonal: self.rejected_zero_sample_count += 1 elif has_inf_off_diagonal: self.rejected_inf_sample_count += 1 else: self.accepted_sample_count += 1 yield sequence_list, distance_matrix
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the normalized Direct RNA mixture model mixture_model = DirectRna.deserialize_mixture_model(fs.model) mixture_model.normalize() # simulate the alignment try: alignment = PhyLikelihood.simulate_alignment(tree, mixture_model, fs.ncols) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the alignment arr = [] for node in tree.gen_tips(): arr.append(alignment.get_fasta_sequence(node.name)) # return the alignment string return '\n'.join(arr) + '\n'
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the alignment try: alignment = Fasta.Alignment(fs.fasta.splitlines()) alignment.force_nucleotide() except Fasta.AlignmentError as e: raise HandlingError(e) # get the log likelihood dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix( row_major_rate_matrix, ordered_states) log_likelihood = PhyLikelihood.get_log_likelihood( tree, alignment, rate_matrix_object) # return the response return str(log_likelihood) + '\n'
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the mixture weights weights = [fs.weight_a, fs.weight_b, fs.weight_c] # get the matrices matrices = [fs.matrix_a, fs.matrix_b, fs.matrix_c] for R in matrices: if R.shape != (4, 4): msg = 'expected each nucleotide rate matrix to be 4x4' raise HandlingError(msg) # create the mixture proportions weight_sum = sum(weights) mixture_proportions = [weight / weight_sum for weight in weights] # create the rate matrix objects ordered_states = list('ACGT') rate_matrix_objects = [] for R in matrices: rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states) rate_matrix_objects.append(rate_matrix_object) # create the mixture model mixture_model = SubModel.MixtureModel(mixture_proportions, rate_matrix_objects) # normalize the mixture model mixture_model.normalize() # simulate the alignment try: alignment = PhyLikelihood.simulate_alignment(tree, mixture_model, fs.ncols) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the alignment arr = [] for node in tree.gen_tips(): arr.append(alignment.get_fasta_sequence(node.name)) # return the alignment string return '\n'.join(arr) + '\n'
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the mixture weights weights = [fs.weight_a, fs.weight_b, fs.weight_c] # get the matrices matrices = [fs.matrix_a, fs.matrix_b, fs.matrix_c] for R in matrices: if R.shape != (4,4): msg = 'expected each nucleotide rate matrix to be 4x4' raise HandlingError(msg) # create the mixture proportions weight_sum = sum(weights) mixture_proportions = [weight / weight_sum for weight in weights] # create the rate matrix objects ordered_states = list('ACGT') rate_matrix_objects = [] for R in matrices: rate_matrix_object = RateMatrix.RateMatrix(R.tolist(), ordered_states) rate_matrix_objects.append(rate_matrix_object) # create the mixture model mixture_model = SubModel.MixtureModel(mixture_proportions, rate_matrix_objects) # normalize the mixture model mixture_model.normalize() # simulate the alignment try: alignment = PhyLikelihood.simulate_alignment(tree, mixture_model, fs.ncols) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the alignment arr = [] for node in tree.gen_tips(): arr.append(alignment.get_fasta_sequence(node.name)) # return the alignment string return '\n'.join(arr) + '\n'
def get_response(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # parse the tree try: tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() except Newick.NewickSyntaxError as e: raise HandlingError(str(e)) # get the normalized model mixture_model = deserialize_mixture_model(fs.model) # sample the alignment, possibly using a specified seed try: alignment = PhyLikelihood.simulate_alignment(tree, mixture_model, fs.ncols, fs.seed) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the output string output_string = '' if fs.fastaformat: # the output is the alignment arr = [] for node in tree.gen_tips(): arr.append(alignment.get_fasta_sequence(node.name)) alignment_string = '\n'.join(arr) output_string = alignment_string elif fs.nexusformat: # the output is the alignment and the tree nexus = Nexus.Nexus() nexus.tree = tree nexus.alignment = alignment output_string = str(nexus) # print the results response_headers = [('Content-Type', 'text/plain')] return response_headers, output_string
def get_response(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # parse the tree try: tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() except Newick.NewickSyntaxError as e: raise HandlingError(str(e)) # get the normalized model mixture_model = deserialize_mixture_model(fs.model) # sample the alignment, possibly using a specified seed try: alignment = PhyLikelihood.simulate_alignment(tree, mixture_model, fs.ncols, fs.seed) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the output string output_string = "" if fs.fastaformat: # the output is the alignment arr = [] for node in tree.gen_tips(): arr.append(alignment.get_fasta_sequence(node.name)) alignment_string = "\n".join(arr) output_string = alignment_string elif fs.nexusformat: # the output is the alignment and the tree nexus = Nexus.Nexus() nexus.tree = tree nexus.alignment = alignment output_string = str(nexus) # print the results response_headers = [("Content-Type", "text/plain")] return response_headers, output_string
def get_response_content(fs): # init the response and get the user variables out = StringIO() nleaves = fs.nleaves nvertices = nleaves * 2 - 1 nbranches = nvertices - 1 nsites = fs.nsites # sample the coalescent tree with timelike branch lengths R, B = kingman.sample(fs.nleaves) r = Ftree.R_to_root(R) # get the leaf vertex names N = dict(zip(range(nleaves), string.uppercase[:nleaves])) N_leaves = dict(N) # get the internal vertex names v_to_leaves = R_to_v_to_leaves(R) for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: N[v] = ''.join(sorted(N[leaf] for leaf in leaves)) # get vertex ages v_to_age = kingman.RB_to_v_to_age(R, B) # sample the rates on the branches b_to_rate = sample_b_to_rate(R) xycorr = get_correlation(R, b_to_rate) # define B_subs in terms of substitutions instead of time B_subs = dict((p, t * b_to_rate[p]) for p, t in B.items()) # sample the alignment v_to_seq = sample_v_to_seq(R, B_subs, nsites) # get the log likelihood; this is kind of horrible pairs = [(N[v], ''.join(v_to_seq[v])) for v in range(nleaves)] headers, sequences = zip(*pairs) alignment = Fasta.create_alignment(headers, sequences) newick_string = FtreeIO.RBN_to_newick(R, B_subs, N_leaves) tree = Newick.parse(newick_string, Newick.NewickTree) dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix( row_major_rate_matrix, ordered_states) ll = PhyLikelihood.get_log_likelihood( tree, alignment, rate_matrix_object) # get ll when rates are all 1.0 newick_string = FtreeIO.RBN_to_newick(R, B, N_leaves) tree = Newick.parse(newick_string, Newick.NewickTree) ll_unity = PhyLikelihood.get_log_likelihood( tree, alignment, rate_matrix_object) # get ll when rates are numerically optimized # TODO incorporate the result into the xml file # TODO speed up the likelihood evaluation (beagle? C module?) #f = Opt(R, B, N_leaves, alignment) #X_logs = [0.0] * nbranches #result = scipy.optimize.fmin(f, X_logs, full_output=True) #print result # print >> out, '<?xml version="1.0"?>' print >> out, '<beast>' print >> out print >> out, '<!-- actual rate autocorrelation', xycorr, '-->' print >> out, '<!-- actual root height', v_to_age[r], '-->' print >> out, '<!-- actual log likelihood', ll, '-->' print >> out, '<!-- ll if rates were unity', ll_unity, '-->' print >> out print >> out, '<!--' print >> out, 'predefine the taxa as in' print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format' print >> out, '-->' print >> out, get_leaf_taxon_defn(list(string.uppercase[:nleaves])) print >> out print >> out, '<!--' print >> out, 'define the alignment as in' print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format' print >> out, '-->' print >> out, get_alignment_defn(leaves, N, v_to_seq) print >> out print >> out, '<!--' print >> out, 'specify the starting tree as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4' print >> out, '-->' print >> out, get_starting_tree_defn(R, B, N_leaves) print >> out print >> out, '<!--' print >> out, 'connect the tree model as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4' print >> out, '-->' print >> out, g_tree_model_defn print >> out print >> out, g_uncorrelated_relaxed_clock_info print >> out """ print >> out, '<!--' print >> out, 'create a list of taxa for which to constrain the mrca as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: print >> out, get_mrca_subset_defn(N, v, leaves) print >> out print >> out, '<!--' print >> out, 'create a tmrcaStatistic that will record the height as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: print >> out, get_mrca_stat_defn(N[v]) """ print >> out print >> out, g_likelihood_info print >> out print >> out, '<!--' print >> out, 'run the mcmc' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' print >> out, get_mcmc_defn(v_to_leaves, v_to_age, N) print >> out print >> out, '</beast>' # return the response return out.getvalue()
def get_response(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # read the nexus data nexus = Nexus.Nexus() try: nexus.load(StringIO(fs.nexus)) except Nexus.NexusError as e: raise HandlingError(e) # move to the data directory original_directory = os.getcwd() os.chdir(Config.data_path) # create the batch file category_suffixes = [str(category+1) for category in range(fs.ncategories)] hky_hyphy_model = get_hyphy_model_string(hyphy_nexus, fs.ncategories) with open(hyphy_bf, 'wt') as fout: print >> fout, hky_hyphy_model # create the nexus file with open(hyphy_nexus, 'wt') as fout: print >> fout, nexus # run hyphy p = subprocess.Popen([Config.hyphy_exe_path, hyphy_bf], close_fds=True, stdout=subprocess.PIPE) hyphy_output = p.stdout.read() # move back to the original directory os.chdir(original_directory) # read the hyphy output ns = Hyphy.get_hyphy_namespace(StringIO(hyphy_output)) out = StringIO() if fs.outdebug: print >> out, get_hyphy_debug_info(hyphy_output) print >> out, '' print >> out, '' if fs.outmodel: print >> out, 'hyphy model:' print >> out, '---------------------------------------' print >> out, hky_hyphy_model print >> out, '---------------------------------------' print >> out, '' print >> out, '' if True: print >> out, 'reformatted hyphy output:' print >> out, '---------------------------------------' # show the log likelihood print >> out, 'log likelihood :', ns.lnL print >> out, '' # show the kappa value print >> out, 'kappa :', ns.kappa print >> out, '' category_blocks = [] for suffix in category_suffixes: block = StringIO() print >> block, 'mixing proportion :', getattr(ns, 'catFreq'+suffix) print >> block, 'tree :', getattr(ns, 'tree'+suffix).get_newick_string() for nt in list('ACGT'): print >> block, nt, ':', getattr(ns, 'eqFreq'+nt+suffix) category_blocks.append(block.getvalue().strip()) print >> out, '\n\n'.join(category_blocks) print >> out, '---------------------------------------' print >> out, '' print >> out, '' if fs.outcheck: # get the raw matrices matrices = [] for suffix in category_suffixes: nt_dict = {} for nt in list('ACGT'): nt_dict[nt] = getattr(ns, 'eqFreq'+nt+suffix) total = float(sum(nt_dict.values())) nt_dict = dict((k, v/total) for k, v in nt_dict.items()) matrix = RateMatrix.get_unscaled_hky85_rate_matrix( nt_dict, ns.kappa) matrices.append(matrix) raw_matrix_rates = [matrix.get_expected_rate() for matrix in matrices] category_weights = [] for suffix in category_suffixes: category_weights.append(getattr(ns, 'catFreq'+suffix)) total = float(sum(category_weights)) category_distribution = [weight / total for weight in category_weights] mixture_model = SubModel.MixtureModel(category_distribution, matrices) raw_mixture_rate = mixture_model.get_expected_rate() # rescale the mixture model # 0.75 is the expected rate of the initial model r1 = 0.75 scaling_factor = r1 mixture_model.rescale(scaling_factor) recomputed_log_likelihood = PhyLikelihood.get_log_likelihood( nexus.tree, nexus.alignment, mixture_model) print >> out, 'recomputed likelihood and rates:' print >> out, '---------------------------------------' print >> out, 'log likelihood :', recomputed_log_likelihood print >> out, '' print >> out, 'rate :', raw_mixture_rate print >> out, '' for rate, suffix in zip(raw_matrix_rates, category_suffixes): print >> out, 'rate%s : %s' % (suffix, rate) print >> out, '---------------------------------------' print >> out, '' print >> out, '' # return the response return out.getvalue()
def get_response(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # parse the tree try: tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() except Newick.NewickSyntaxError as e: raise HandlingError(str(e)) # get the mixture weights mixture_weights = [fs.weight_a, fs.weight_b] # get the kappa values kappa_values = [fs.kappa_a, fs.kappa_b] # get the nucleotide distributions frequency_strings = (fs.frequency_a, fs.frequency_b) nucleotide_distributions = [] for nt_string in frequency_strings: d = SnippetUtil.get_distribution(nt_string, 'nucleotide', list('ACGT')) nucleotide_distributions.append(d) # create the nucleotide HKY rate matrix objects rate_matrix_objects = [] for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values): rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix( nt_distribution, kappa) rate_matrix_objects.append(rate_matrix_object) # create the mixture proportions weight_sum = sum(mixture_weights) mixture_proportions = [weight / weight_sum for weight in mixture_weights] # create the mixture model mixture_model = SubModel.MixtureModel(mixture_proportions, rate_matrix_objects) # normalize the mixture model mixture_model.normalize() # simulate the alignment try: alignment = PhyLikelihood.simulate_alignment(tree, mixture_model, fs.ncols) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the output string output_string = '' if fs.fasta: # the output is the alignment arr = [] for node in tree.gen_tips(): arr.append(alignment.get_fasta_sequence(node.name)) alignment_string = '\n'.join(arr) output_string = alignment_string elif fs.nex: # the output is the alignment and the tree nexus = Nexus.Nexus() nexus.tree = tree nexus.alignment = alignment for i in range(2): arr = [] arr.append('weight: %s' % mixture_weights[i]) arr.append('kappa: %s' % kappa_values[i]) nexus.add_comment('category %d: %s' % (i + 1, ', '.join(arr))) output_string = str(nexus) # define the filename if fs.fasta: filename_extension = 'fasta' elif fs.nex: filename_extension = 'nex' filename = 'sample.' + fs.fmt #TODO use the correct filename extension in the output return output_string
def get_response(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # read the nexus data nexus = Nexus.Nexus() try: nexus.load(StringIO(fs.nexus)) except Nexus.NexusError as e: raise HandlingError(e) # move to the data directory original_directory = os.getcwd() os.chdir(Config.data_path) # create the batch file category_suffixes = [ str(category + 1) for category in range(fs.ncategories) ] hky_hyphy_model = get_hyphy_model_string(hyphy_nexus, fs.ncategories) with open(hyphy_bf, 'wt') as fout: print >> fout, hky_hyphy_model # create the nexus file with open(hyphy_nexus, 'wt') as fout: print >> fout, nexus # run hyphy p = subprocess.Popen([Config.hyphy_exe_path, hyphy_bf], close_fds=True, stdout=subprocess.PIPE) hyphy_output = p.stdout.read() # move back to the original directory os.chdir(original_directory) # read the hyphy output ns = Hyphy.get_hyphy_namespace(StringIO(hyphy_output)) out = StringIO() if fs.outdebug: print >> out, get_hyphy_debug_info(hyphy_output) print >> out, '' print >> out, '' if fs.outmodel: print >> out, 'hyphy model:' print >> out, '---------------------------------------' print >> out, hky_hyphy_model print >> out, '---------------------------------------' print >> out, '' print >> out, '' if True: print >> out, 'reformatted hyphy output:' print >> out, '---------------------------------------' # show the log likelihood print >> out, 'log likelihood :', ns.lnL print >> out, '' # show the kappa value print >> out, 'kappa :', ns.kappa print >> out, '' category_blocks = [] for suffix in category_suffixes: block = StringIO() print >> block, 'mixing proportion :', getattr( ns, 'catFreq' + suffix) print >> block, 'tree :', getattr(ns, 'tree' + suffix).get_newick_string() for nt in list('ACGT'): print >> block, nt, ':', getattr(ns, 'eqFreq' + nt + suffix) category_blocks.append(block.getvalue().strip()) print >> out, '\n\n'.join(category_blocks) print >> out, '---------------------------------------' print >> out, '' print >> out, '' if fs.outcheck: # get the raw matrices matrices = [] for suffix in category_suffixes: nt_dict = {} for nt in list('ACGT'): nt_dict[nt] = getattr(ns, 'eqFreq' + nt + suffix) total = float(sum(nt_dict.values())) nt_dict = dict((k, v / total) for k, v in nt_dict.items()) matrix = RateMatrix.get_unscaled_hky85_rate_matrix( nt_dict, ns.kappa) matrices.append(matrix) raw_matrix_rates = [matrix.get_expected_rate() for matrix in matrices] category_weights = [] for suffix in category_suffixes: category_weights.append(getattr(ns, 'catFreq' + suffix)) total = float(sum(category_weights)) category_distribution = [weight / total for weight in category_weights] mixture_model = SubModel.MixtureModel(category_distribution, matrices) raw_mixture_rate = mixture_model.get_expected_rate() # rescale the mixture model # 0.75 is the expected rate of the initial model r1 = 0.75 scaling_factor = r1 mixture_model.rescale(scaling_factor) recomputed_log_likelihood = PhyLikelihood.get_log_likelihood( nexus.tree, nexus.alignment, mixture_model) print >> out, 'recomputed likelihood and rates:' print >> out, '---------------------------------------' print >> out, 'log likelihood :', recomputed_log_likelihood print >> out, '' print >> out, 'rate :', raw_mixture_rate print >> out, '' for rate, suffix in zip(raw_matrix_rates, category_suffixes): print >> out, 'rate%s : %s' % (suffix, rate) print >> out, '---------------------------------------' print >> out, '' print >> out, '' # return the response return out.getvalue()
def get_response(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # parse the tree try: tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() except Newick.NewickSyntaxError as e: raise HandlingError(str(e)) # get the mixture weights mixture_weights = [fs.weight_a, fs.weight_b] # get the kappa values kappa_values = [fs.kappa_a, fs.kappa_b] # get the nucleotide distributions frequency_strings = (fs.frequency_a, fs.frequency_b) nucleotide_distributions = [] for nt_string in frequency_strings: d = SnippetUtil.get_distribution(nt_string, 'nucleotide', list('ACGT')) nucleotide_distributions.append(d) # create the nucleotide HKY rate matrix objects rate_matrix_objects = [] for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values): rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix( nt_distribution, kappa) rate_matrix_objects.append(rate_matrix_object) # create the mixture proportions weight_sum = sum(mixture_weights) mixture_proportions = [weight / weight_sum for weight in mixture_weights] # create the mixture model mixture_model = SubModel.MixtureModel( mixture_proportions, rate_matrix_objects) # normalize the mixture model mixture_model.normalize() # simulate the alignment try: alignment = PhyLikelihood.simulate_alignment( tree, mixture_model, fs.ncols) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the output string output_string = '' if fs.fasta: # the output is the alignment arr = [] for node in tree.gen_tips(): arr.append(alignment.get_fasta_sequence(node.name)) alignment_string = '\n'.join(arr) output_string = alignment_string elif fs.nex: # the output is the alignment and the tree nexus = Nexus.Nexus() nexus.tree = tree nexus.alignment = alignment for i in range(2): arr = [] arr.append('weight: %s' % mixture_weights[i]) arr.append('kappa: %s' % kappa_values[i]) nexus.add_comment('category %d: %s' % (i+1, ', '.join(arr))) output_string = str(nexus) # define the filename if fs.fasta: filename_extension = 'fasta' elif fs.nex: filename_extension = 'nex' filename = 'sample.' + fs.fmt #TODO use the correct filename extension in the output return output_string