def main(): # create the alignment object print 'creating the alignment...' alignment_string = Fasta.brown_example_alignment.strip() alignment = Fasta.Alignment(StringIO(alignment_string)) # create a tree object print 'creating the tree...' tree_string = Newick.brown_example_tree tree = Newick.parse(tree_string, Newick.NewickTree) # create a rate matrix object print 'creating the rate matrix object...' distribution = {'A': .25, 'C': .25, 'G': .25, 'T': .25} kappa = 2.0 row_major_rate_matrix = RateMatrix.get_unscaled_hky85_rate_matrix( distribution, kappa).get_row_major_rate_matrix() rate_matrix = RateMatrix.FastRateMatrix( row_major_rate_matrix, list('ACGT')) rate_matrix.normalize() # get the mle_rates print 'getting the mle rates...' mle_rates = get_mle_rates(tree, alignment, rate_matrix) print 'mle rates:' print mle_rates print 'stockholm string:' print get_stockholm_string(tree, alignment, mle_rates)
def get_response_content(fs): # read the nexus data nexus = Nexus.Nexus() try: nexus.load(StringIO(fs.nexus)) except Nexus.NexusError as e: raise HandlingError(e) # get the mixture weights mixture_weights = [fs.weight_a, fs.weight_b] # get the kappa values kappa_values = [fs.kappa_a, fs.kappa_b] # get the nucleotide distributions nucleotide_distributions = [] for nt_string in (fs.frequency_a, fs.frequency_b): distribution = SnippetUtil.get_distribution(nt_string, 'nucleotide', list('ACGT')) nucleotide_distributions.append(distribution) # create the nucleotide HKY rate matrix objects rate_matrix_objects = [] for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values): rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix( nt_distribution, kappa) rate_matrix_objects.append(rate_matrix_object) # create the mixture proportions weight_sum = sum(mixture_weights) mixture_proportions = [weight / weight_sum for weight in mixture_weights] # create the mixture model mixture_model = SubModel.MixtureModel(mixture_proportions, rate_matrix_objects) # normalize the mixture model mixture_model.normalize() # return the results return do_analysis(mixture_model, nexus.alignment, nexus.tree) + '\n'
def test_hky_nielsen(self): """ Give modified rejection sampling a chance to fail. It should give the same results as vanilla rejection sampling. """ distribution = {'A':.2,'C':.3,'G':.3,'T':.2} kappa = 2 rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(distribution, kappa) rate_matrix_object.normalize() rate_matrix = rate_matrix_object.get_dictionary_rate_matrix() path_length = 2 initial_state = 'A' terminal_state = 'C' states = 'ACGT' iterations = 200 rejection_changes = [] i = 0 while i < iterations: rejection_events = get_rejection_sample(initial_state, terminal_state, states, path_length, rate_matrix) if rejection_events is not None: rejection_changes.append(len(rejection_events)) i += 1 nielsen_changes = [] i = 0 while i < iterations: nielsen_events = get_nielsen_sample(initial_state, terminal_state, states, path_length, rate_matrix) if nielsen_events is not None: nielsen_changes.append(len(nielsen_events)) i += 1 t, p = scipy.stats.mannwhitneyu(rejection_changes, nielsen_changes) self.failIf(p < .001)
def get_sample_mixture_model(): """ @return: a mixture model that is used to generate the default nexus data """ # define the model kappa = 2 category_distribution = [.1, .4, .5] nt_dicts = [{ 'A': .1, 'C': .4, 'G': .4, 'T': .1 }, { 'A': .2, 'C': .3, 'G': .3, 'T': .2 }, { 'A': .25, 'C': .25, 'G': .25, 'T': .25 }] # create a mixture model from the variables that define the model rate_matrix_objects = [] for nt_dict in nt_dicts: rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix( nt_dict, kappa) rate_matrix_objects.append(rate_matrix_object) mixture_model = SubModel.MixtureModel(category_distribution, rate_matrix_objects) mixture_model.normalize() return mixture_model
def test_hky_uniformization(self): """ Give uniformization a chance to fail. It should give the same results as modified rejection sampling. """ distribution = {'A':.2,'C':.3,'G':.3,'T':.2} kappa = 2 rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(distribution, kappa) rate_matrix_object.normalize() rate_matrix = rate_matrix_object.get_dictionary_rate_matrix() path_length = 2 initial_state = 'A' terminal_state = 'C' states = 'ACGT' iterations = 200 # get the modified rejection sampling changes, where each change is the number of events on a sampled path nielsen_changes = [] i = 0 while i < iterations: nielsen_events = get_nielsen_sample(initial_state, terminal_state, states, path_length, rate_matrix) if nielsen_events is not None: nielsen_changes.append(len(nielsen_events)) i += 1 # get the uniformization changes, where each change is the number of events on a sampled path uniformization_changes = [] for i in range(iterations): uniformization_events = get_uniformization_sample(initial_state, terminal_state, states, path_length, rate_matrix) uniformization_changes.append(len(uniformization_events)) # see if there is a statistically significant difference between the sampled path lengths #print sum(nielsen_changes) #print sum(uniformization_changes) t, p = scipy.stats.mannwhitneyu(uniformization_changes, nielsen_changes) self.failIf(p < .001, p)
def deserialize_mixture_model(xml_string): """ Convert the xml string to a mixture model. @param xml_string: an xml string defining the mixture model @return: an unscaled mixture model object """ # define the variables that define the model kappa = None category_weights = [] nt_dicts = [] # get the variables that define the model element_tree = ET.parse(StringIO(xml_string)) root = element_tree.getroot() kappa = float(root.get("kappa")) for category in root: category_weights.append(float(category.get("weight"))) distribution = category.find("distribution") nt_dict = {} for terminal in distribution: nt_dict[terminal.get("symbol")] = float(terminal.get("weight")) total = sum(nt_dict.values()) for nt in nt_dict: nt_dict[nt] /= total nt_dicts.append(nt_dict) # create a mixture model from the variables that define the model rate_matrix_objects = [] for nt_dict in nt_dicts: rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(nt_dict, kappa) rate_matrix_objects.append(rate_matrix_object) total = float(sum(category_weights)) category_distribution = [weight / total for weight in category_weights] mixture_model = SubModel.MixtureModel(category_distribution, rate_matrix_objects) mixture_model.normalize() return mixture_model
def main(): # create the alignment object print 'creating the alignment...' alignment_string = Fasta.brown_example_alignment.strip() alignment = Fasta.Alignment(StringIO(alignment_string)) # create a tree object print 'creating the tree...' tree_string = Newick.brown_example_tree tree = Newick.parse(tree_string, Newick.NewickTree) # create a rate matrix object print 'creating the rate matrix object...' distribution = {'A': .25, 'C': .25, 'G': .25, 'T': .25} kappa = 2.0 row_major_rate_matrix = RateMatrix.get_unscaled_hky85_rate_matrix( distribution, kappa).get_row_major_rate_matrix() rate_matrix = RateMatrix.FastRateMatrix(row_major_rate_matrix, list('ACGT')) rate_matrix.normalize() # get the mle_rates print 'getting the mle rates...' mle_rates = get_mle_rates(tree, alignment, rate_matrix) print 'mle rates:' print mle_rates print 'stockholm string:' print get_stockholm_string(tree, alignment, mle_rates)
def get_response_content(fs): # read the nexus data nexus = Nexus.Nexus() try: nexus.load(StringIO(fs.nexus)) except Nexus.NexusError as e: raise HandlingError(e) # get the mixture weights mixture_weights = [fs.weight_a, fs.weight_b] # get the kappa values kappa_values = [fs.kappa_a, fs.kappa_b] # get the nucleotide distributions nucleotide_distributions = [] for nt_string in (fs.frequency_a, fs.frequency_b): distribution = SnippetUtil.get_distribution( nt_string, 'nucleotide', list('ACGT')) nucleotide_distributions.append(distribution) # create the nucleotide HKY rate matrix objects rate_matrix_objects = [] for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values): rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix( nt_distribution, kappa) rate_matrix_objects.append(rate_matrix_object) # create the mixture proportions weight_sum = sum(mixture_weights) mixture_proportions = [weight / weight_sum for weight in mixture_weights] # create the mixture model mixture_model = SubModel.MixtureModel( mixture_proportions, rate_matrix_objects) # normalize the mixture model mixture_model.normalize() # return the results return do_analysis(mixture_model, nexus.alignment, nexus.tree) + '\n'
def demo_uniformization(): distribution = {'A':.2,'C':.3,'G':.3,'T':.2} kappa = 2 rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(distribution, kappa) rate_matrix_object.normalize() rate_matrix = rate_matrix_object.get_dictionary_rate_matrix() path_length = 2 initial_state = 'A' terminal_state = 'C' states = 'ACGT' uniformization_events = get_uniformization_sample(initial_state, terminal_state, states, path_length, rate_matrix) print uniformization_events
def get_response_content(fs): # get the nucleotide distribution d = SnippetUtil.get_distribution(fs.weights, 'nucleotide', list('ACGT')) # get the rate matrix defined by the nucleotide distribution and kappa rate_object = RateMatrix.get_unscaled_hky85_rate_matrix(d, fs.kappa) if fs.scaled: rate_object.normalize() rate_matrix = rate_object.get_dictionary_rate_matrix() # show the rate matrix in convenient text form out = StringIO() for nta in 'ACGT': print >> out, '\t'.join(str(rate_matrix[(nta, ntb)]) for ntb in 'ACGT') return out.getvalue()
def get_response_content(fs): # read the nexus data nexus = Nexus.Nexus() try: nexus.load(StringIO(fs.nexus)) except Nexus.NexusError as e: raise HandlingError(e) # read the hyphy variables ns = Hyphy.get_hyphy_namespace(StringIO(fs.hyphy)) # get the mixture weights mixture_weights = [ns.P, 1.0 - ns.P] # get the nucleotide distributions nucleotide_distributions = [] for suffix in ("", "2"): distribution = {} for nt in list("ACGT"): var = "eqFreq" + nt + suffix proportion = getattr(ns, var) distribution[nt] = proportion nucleotide_distributions.append(distribution) # create the normalized nucleotide HKY rate matrix objects rate_matrix_objects = [] for nt_distribution in nucleotide_distributions: rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix(nt_distribution, ns.kappa) rate_matrix_object.normalize() rate_matrix_objects.append(rate_matrix_object) # create the mixture proportions weight_sum = sum(mixture_weights) mixture_proportions = [weight / weight_sum for weight in mixture_weights] # scale each rate matrix object by its branch length ratio for rate_matrix_object, tree_name in zip(rate_matrix_objects, ("givenTree", "otherTree")): nexus_tree = nexus.tree hyphy_tree = getattr(ns, tree_name) try: nexus_human_node = nexus_tree.get_unique_node("Human") except Newick.NewickSearchError as e: raise HandlingError("nexus tree error: %s" % e) try: hyphy_human_node = hyphy_tree.get_unique_node("HUMAN") except Newick.NewickSearchError as e: raise HandlingError("hyphy tree error: %s" % e) sf = hyphy_human_node.blen / nexus_human_node.blen rate_matrix_object.rescale(sf) # create the mixture model mixture_model = SubModel.MixtureModel(mixture_proportions, rate_matrix_objects) # return the results return do_analysis(mixture_model, nexus.alignment, nexus.tree) + "\n"
def get_sample_mixture_model(): """ @return: a mixture model that is used to generate the default nexus data """ # define the model kappa = 2 category_distribution = [.1, .4, .5] nt_dicts = [ {'A' : .1, 'C' : .4, 'G' : .4, 'T' : .1}, {'A' : .2, 'C' : .3, 'G' : .3, 'T' : .2}, {'A' : .25, 'C' : .25, 'G' : .25, 'T' : .25} ] # create a mixture model from the variables that define the model rate_matrix_objects = [] for nt_dict in nt_dicts: rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix( nt_dict, kappa) rate_matrix_objects.append(rate_matrix_object) mixture_model = SubModel.MixtureModel( category_distribution, rate_matrix_objects) mixture_model.normalize() return mixture_model
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the nucleotide distribution distribution = SnippetUtil.get_distribution( fs.weights, 'nucleotide', list('ACGT')) # get the nucleotide alignment try: alignment = Fasta.Alignment(StringIO(fs.alignment)) alignment.force_nucleotide() except Fasta.AlignmentError as e: raise HandlingError(e) # get the rate matrix defined by the nucleotide distribution and kappa row_major_rate_matrix = RateMatrix.get_unscaled_hky85_rate_matrix( distribution, fs.kappa).get_row_major_rate_matrix() rate_matrix = RateMatrix.FastRateMatrix( row_major_rate_matrix, list('ACGT')) rate_matrix.normalize() # get the mle rates mle_rates = get_mle_rates(tree, alignment, rate_matrix) # return the response return get_stockholm_string(tree, alignment, mle_rates) + '\n'
def get_response_content(fs): # get the tree tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() # get the nucleotide distribution distribution = SnippetUtil.get_distribution(fs.weights, 'nucleotide', list('ACGT')) # get the nucleotide alignment try: alignment = Fasta.Alignment(StringIO(fs.alignment)) alignment.force_nucleotide() except Fasta.AlignmentError as e: raise HandlingError(e) # get the rate matrix defined by the nucleotide distribution and kappa row_major_rate_matrix = RateMatrix.get_unscaled_hky85_rate_matrix( distribution, fs.kappa).get_row_major_rate_matrix() rate_matrix = RateMatrix.FastRateMatrix(row_major_rate_matrix, list('ACGT')) rate_matrix.normalize() # get the mle rates mle_rates = get_mle_rates(tree, alignment, rate_matrix) # return the response return get_stockholm_string(tree, alignment, mle_rates) + '\n'
def deserialize_mixture_model(xml_string): """ Convert the xml string to a mixture model. @param xml_string: an xml string defining the mixture model @return: an unscaled mixture model object """ # define the variables that define the model kappa = None category_weights = [] nt_dicts = [] # get the variables that define the model element_tree = ET.parse(StringIO(xml_string)) root = element_tree.getroot() kappa = float(root.get('kappa')) for category in root: category_weights.append(float(category.get('weight'))) distribution = category.find('distribution') nt_dict = {} for terminal in distribution: nt_dict[terminal.get('symbol')] = float(terminal.get('weight')) total = sum(nt_dict.values()) for nt in nt_dict: nt_dict[nt] /= total nt_dicts.append(nt_dict) # create a mixture model from the variables that define the model rate_matrix_objects = [] for nt_dict in nt_dicts: rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix( nt_dict, kappa) rate_matrix_objects.append(rate_matrix_object) total = float(sum(category_weights)) category_distribution = [weight / total for weight in category_weights] mixture_model = SubModel.MixtureModel(category_distribution, rate_matrix_objects) mixture_model.normalize() return mixture_model
def get_response(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # parse the tree try: tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() except Newick.NewickSyntaxError as e: raise HandlingError(str(e)) # get the mixture weights mixture_weights = [fs.weight_a, fs.weight_b] # get the kappa values kappa_values = [fs.kappa_a, fs.kappa_b] # get the nucleotide distributions frequency_strings = (fs.frequency_a, fs.frequency_b) nucleotide_distributions = [] for nt_string in frequency_strings: d = SnippetUtil.get_distribution(nt_string, 'nucleotide', list('ACGT')) nucleotide_distributions.append(d) # create the nucleotide HKY rate matrix objects rate_matrix_objects = [] for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values): rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix( nt_distribution, kappa) rate_matrix_objects.append(rate_matrix_object) # create the mixture proportions weight_sum = sum(mixture_weights) mixture_proportions = [weight / weight_sum for weight in mixture_weights] # create the mixture model mixture_model = SubModel.MixtureModel(mixture_proportions, rate_matrix_objects) # normalize the mixture model mixture_model.normalize() # simulate the alignment try: alignment = PhyLikelihood.simulate_alignment(tree, mixture_model, fs.ncols) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the output string output_string = '' if fs.fasta: # the output is the alignment arr = [] for node in tree.gen_tips(): arr.append(alignment.get_fasta_sequence(node.name)) alignment_string = '\n'.join(arr) output_string = alignment_string elif fs.nex: # the output is the alignment and the tree nexus = Nexus.Nexus() nexus.tree = tree nexus.alignment = alignment for i in range(2): arr = [] arr.append('weight: %s' % mixture_weights[i]) arr.append('kappa: %s' % kappa_values[i]) nexus.add_comment('category %d: %s' % (i + 1, ', '.join(arr))) output_string = str(nexus) # define the filename if fs.fasta: filename_extension = 'fasta' elif fs.nex: filename_extension = 'nex' filename = 'sample.' + fs.fmt #TODO use the correct filename extension in the output return output_string
def get_response(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # parse the tree try: tree = Newick.parse(fs.tree, Newick.NewickTree) tree.assert_valid() except Newick.NewickSyntaxError as e: raise HandlingError(str(e)) # get the mixture weights mixture_weights = [fs.weight_a, fs.weight_b] # get the kappa values kappa_values = [fs.kappa_a, fs.kappa_b] # get the nucleotide distributions frequency_strings = (fs.frequency_a, fs.frequency_b) nucleotide_distributions = [] for nt_string in frequency_strings: d = SnippetUtil.get_distribution(nt_string, 'nucleotide', list('ACGT')) nucleotide_distributions.append(d) # create the nucleotide HKY rate matrix objects rate_matrix_objects = [] for nt_distribution, kappa in zip(nucleotide_distributions, kappa_values): rate_matrix_object = RateMatrix.get_unscaled_hky85_rate_matrix( nt_distribution, kappa) rate_matrix_objects.append(rate_matrix_object) # create the mixture proportions weight_sum = sum(mixture_weights) mixture_proportions = [weight / weight_sum for weight in mixture_weights] # create the mixture model mixture_model = SubModel.MixtureModel( mixture_proportions, rate_matrix_objects) # normalize the mixture model mixture_model.normalize() # simulate the alignment try: alignment = PhyLikelihood.simulate_alignment( tree, mixture_model, fs.ncols) except PhyLikelihood.SimulationError as e: raise HandlingError(e) # get the output string output_string = '' if fs.fasta: # the output is the alignment arr = [] for node in tree.gen_tips(): arr.append(alignment.get_fasta_sequence(node.name)) alignment_string = '\n'.join(arr) output_string = alignment_string elif fs.nex: # the output is the alignment and the tree nexus = Nexus.Nexus() nexus.tree = tree nexus.alignment = alignment for i in range(2): arr = [] arr.append('weight: %s' % mixture_weights[i]) arr.append('kappa: %s' % kappa_values[i]) nexus.add_comment('category %d: %s' % (i+1, ', '.join(arr))) output_string = str(nexus) # define the filename if fs.fasta: filename_extension = 'fasta' elif fs.nex: filename_extension = 'nex' filename = 'sample.' + fs.fmt #TODO use the correct filename extension in the output return output_string
def get_response(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # read the nexus data nexus = Nexus.Nexus() try: nexus.load(StringIO(fs.nexus)) except Nexus.NexusError as e: raise HandlingError(e) # move to the data directory original_directory = os.getcwd() os.chdir(Config.data_path) # create the batch file category_suffixes = [str(category+1) for category in range(fs.ncategories)] hky_hyphy_model = get_hyphy_model_string(hyphy_nexus, fs.ncategories) with open(hyphy_bf, 'wt') as fout: print >> fout, hky_hyphy_model # create the nexus file with open(hyphy_nexus, 'wt') as fout: print >> fout, nexus # run hyphy p = subprocess.Popen([Config.hyphy_exe_path, hyphy_bf], close_fds=True, stdout=subprocess.PIPE) hyphy_output = p.stdout.read() # move back to the original directory os.chdir(original_directory) # read the hyphy output ns = Hyphy.get_hyphy_namespace(StringIO(hyphy_output)) out = StringIO() if fs.outdebug: print >> out, get_hyphy_debug_info(hyphy_output) print >> out, '' print >> out, '' if fs.outmodel: print >> out, 'hyphy model:' print >> out, '---------------------------------------' print >> out, hky_hyphy_model print >> out, '---------------------------------------' print >> out, '' print >> out, '' if True: print >> out, 'reformatted hyphy output:' print >> out, '---------------------------------------' # show the log likelihood print >> out, 'log likelihood :', ns.lnL print >> out, '' # show the kappa value print >> out, 'kappa :', ns.kappa print >> out, '' category_blocks = [] for suffix in category_suffixes: block = StringIO() print >> block, 'mixing proportion :', getattr(ns, 'catFreq'+suffix) print >> block, 'tree :', getattr(ns, 'tree'+suffix).get_newick_string() for nt in list('ACGT'): print >> block, nt, ':', getattr(ns, 'eqFreq'+nt+suffix) category_blocks.append(block.getvalue().strip()) print >> out, '\n\n'.join(category_blocks) print >> out, '---------------------------------------' print >> out, '' print >> out, '' if fs.outcheck: # get the raw matrices matrices = [] for suffix in category_suffixes: nt_dict = {} for nt in list('ACGT'): nt_dict[nt] = getattr(ns, 'eqFreq'+nt+suffix) total = float(sum(nt_dict.values())) nt_dict = dict((k, v/total) for k, v in nt_dict.items()) matrix = RateMatrix.get_unscaled_hky85_rate_matrix( nt_dict, ns.kappa) matrices.append(matrix) raw_matrix_rates = [matrix.get_expected_rate() for matrix in matrices] category_weights = [] for suffix in category_suffixes: category_weights.append(getattr(ns, 'catFreq'+suffix)) total = float(sum(category_weights)) category_distribution = [weight / total for weight in category_weights] mixture_model = SubModel.MixtureModel(category_distribution, matrices) raw_mixture_rate = mixture_model.get_expected_rate() # rescale the mixture model # 0.75 is the expected rate of the initial model r1 = 0.75 scaling_factor = r1 mixture_model.rescale(scaling_factor) recomputed_log_likelihood = PhyLikelihood.get_log_likelihood( nexus.tree, nexus.alignment, mixture_model) print >> out, 'recomputed likelihood and rates:' print >> out, '---------------------------------------' print >> out, 'log likelihood :', recomputed_log_likelihood print >> out, '' print >> out, 'rate :', raw_mixture_rate print >> out, '' for rate, suffix in zip(raw_matrix_rates, category_suffixes): print >> out, 'rate%s : %s' % (suffix, rate) print >> out, '---------------------------------------' print >> out, '' print >> out, '' # return the response return out.getvalue()
def get_response(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # read the nexus data nexus = Nexus.Nexus() try: nexus.load(StringIO(fs.nexus)) except Nexus.NexusError as e: raise HandlingError(e) # move to the data directory original_directory = os.getcwd() os.chdir(Config.data_path) # create the batch file category_suffixes = [ str(category + 1) for category in range(fs.ncategories) ] hky_hyphy_model = get_hyphy_model_string(hyphy_nexus, fs.ncategories) with open(hyphy_bf, 'wt') as fout: print >> fout, hky_hyphy_model # create the nexus file with open(hyphy_nexus, 'wt') as fout: print >> fout, nexus # run hyphy p = subprocess.Popen([Config.hyphy_exe_path, hyphy_bf], close_fds=True, stdout=subprocess.PIPE) hyphy_output = p.stdout.read() # move back to the original directory os.chdir(original_directory) # read the hyphy output ns = Hyphy.get_hyphy_namespace(StringIO(hyphy_output)) out = StringIO() if fs.outdebug: print >> out, get_hyphy_debug_info(hyphy_output) print >> out, '' print >> out, '' if fs.outmodel: print >> out, 'hyphy model:' print >> out, '---------------------------------------' print >> out, hky_hyphy_model print >> out, '---------------------------------------' print >> out, '' print >> out, '' if True: print >> out, 'reformatted hyphy output:' print >> out, '---------------------------------------' # show the log likelihood print >> out, 'log likelihood :', ns.lnL print >> out, '' # show the kappa value print >> out, 'kappa :', ns.kappa print >> out, '' category_blocks = [] for suffix in category_suffixes: block = StringIO() print >> block, 'mixing proportion :', getattr( ns, 'catFreq' + suffix) print >> block, 'tree :', getattr(ns, 'tree' + suffix).get_newick_string() for nt in list('ACGT'): print >> block, nt, ':', getattr(ns, 'eqFreq' + nt + suffix) category_blocks.append(block.getvalue().strip()) print >> out, '\n\n'.join(category_blocks) print >> out, '---------------------------------------' print >> out, '' print >> out, '' if fs.outcheck: # get the raw matrices matrices = [] for suffix in category_suffixes: nt_dict = {} for nt in list('ACGT'): nt_dict[nt] = getattr(ns, 'eqFreq' + nt + suffix) total = float(sum(nt_dict.values())) nt_dict = dict((k, v / total) for k, v in nt_dict.items()) matrix = RateMatrix.get_unscaled_hky85_rate_matrix( nt_dict, ns.kappa) matrices.append(matrix) raw_matrix_rates = [matrix.get_expected_rate() for matrix in matrices] category_weights = [] for suffix in category_suffixes: category_weights.append(getattr(ns, 'catFreq' + suffix)) total = float(sum(category_weights)) category_distribution = [weight / total for weight in category_weights] mixture_model = SubModel.MixtureModel(category_distribution, matrices) raw_mixture_rate = mixture_model.get_expected_rate() # rescale the mixture model # 0.75 is the expected rate of the initial model r1 = 0.75 scaling_factor = r1 mixture_model.rescale(scaling_factor) recomputed_log_likelihood = PhyLikelihood.get_log_likelihood( nexus.tree, nexus.alignment, mixture_model) print >> out, 'recomputed likelihood and rates:' print >> out, '---------------------------------------' print >> out, 'log likelihood :', recomputed_log_likelihood print >> out, '' print >> out, 'rate :', raw_mixture_rate print >> out, '' for rate, suffix in zip(raw_matrix_rates, category_suffixes): print >> out, 'rate%s : %s' % (suffix, rate) print >> out, '---------------------------------------' print >> out, '' print >> out, '' # return the response return out.getvalue()