def estimate_states(tree, outgroup, migration_matrix): # estimate the states (by making each node trifurcating...) root = tree.root kids = root.GetKids() if kids[0].name in outgroup: true_root = kids[1] else: true_root = kids[0] lik_score = ML.EstimateStates(true_root) k = 1 + len(migration_matrix) * (len(migration_matrix.values()[0]) - 1) aic_score = 2 * k - 2 * lik_score return true_root, lik_score, aic_score
# wipe the likelihoods off of the tree (just to be sure) ML.TreeWipe(tree.a_node) # learn the likelihoods print "Learn habitat assignments" ML.LearnLiks(tree, mu, migration_matrix, outgroup) # estimate the states (by making each node trifurcating...) root = tree.root kids = root.GetKids() if kids[0].name in outgroup: true_root = kids[1] else: true_root = kids[0] lik_score = ML.EstimateStates(true_root) k = 1 + len(migration_matrix) * (len(migration_matrix.values()[0]) - 1) aic_score = 2 * k - 2 * lik_score lik_f.write("likelihood: " + str(lik_score) + "\n") lik_f.write("aic: " + str(aic_score) + "\n") # write out the habitat assignment of each leaf for leaf in true_root.leaf_nodes: this_str = str(leaf) + "\t" + leaf.habitat migration_f.write(this_str + "\n") # draw out the tree files = {} files['full'] = full_f if thresh_fn is not None:
stats_str += str(len(habitat_matrix)) + "\t" stats_str += str(score) + "\t" stats_str += str(mu) + "\t" stats_str += str(diff) + "\t" stats_file.write(stats_str + "\n") stats_file.flush() print "\t\t" + str(counter) + "\t" + str(diff) # wipe the likelihoods off of the tree ML.TreeWipe(tree) # learn the likelihoods ML.LearnLiks(tree,mu,habitat_matrix) # estimate the states (by making each node trifurcating...) ML.EstimateStates(tree.a_node,habitat_matrix) # upgrade guesses for mu and habitat matrix this_migrate = habitat_matrix mu, habitat_matrix = ML.LearnRates(tree,mu,habitat_matrix,rateopt) new_migrate = habitat_matrix # stop? old_diff = diff score, diff = ML.CheckConverge(tree,new_migrate,this_migrate) if diff < converge_thresh: break # this should break the loop if you end up bouncing back and # forth between the same values
# wipe the likelihoods off of the tree (just to be sure) ML.TreeWipe(tree.a_node) # learn the likelihoods print "Learn habitat assignments" ML.LearnLiks(tree, mu, migration_matrix, outgroup) # estimate the states (by making each node trifurcating...) root = tree.root kids = root.GetKids() if kids[0].name in outgroup: true_root = kids[1] else: true_root = kids[0] ML.EstimateStates(true_root) # write out the habitat assignment of each leaf for leaf in true_root.leaf_nodes: this_str = str(leaf) + "\t" + leaf.habitat migration_f.write(this_str + "\n") # draw out the tree files = {} files['full'] = full_f if thresh_fn is not None: files['cluster'] = cluster_f files['bar'] = bars_f files['prune'] = prune_f files['cdist'] = cdist_f
def learn_habitats(tree, habitat_matrix, mu, rateopt, converge_thresh, habitat_thresh): """Learn habitats """ sys.stderr.write('Learning Habitats:\n') # setting/checking params score = -9999.99999999 diff = 1.0 old_diff = 1.0 msg = 'The convergence threshold ({}) must be between 0 & 1' assert 0 <= converge_thresh <= 1, msg.format(converge_thresh) msg = 'The collapse threshold ({}) must be between 0 & 1' assert 0 <= habitat_thresh <= 1, msg.format(habitat_thresh) stats_header = ['counter', 'habs', 'ML_score', 'mu', 'habitat dist diff'] stats = [stats_header] while 1: msg = "\t{} habitats\tRefinement Steps [d(Habitat Score)]:\n" sys.stderr.write(msg.format(len(habitat_matrix))) counter = 0 stats_line = '{} habitats'.format(len(habitat_matrix)) stats.append([stats_line] + stats_header) while 1: msg = '\t\t{}\t{}\n' sys.stderr.write(msg.format(counter, diff)) stats.append([counter, len(habitat_matrix), score, mu, diff]) # wipe the likelihoods off of the tree ML.TreeWipe(tree) # learn the likelihoods ML.LearnLiks(tree,mu,habitat_matrix) # estimate the states (by making each node trifurcating...) ML.EstimateStates(tree.a_node,habitat_matrix) # upgrade guesses for mu and habitat matrix this_migrate = habitat_matrix mu, habitat_matrix = ML.LearnRates(tree,mu,habitat_matrix,rateopt) new_migrate = habitat_matrix # stop? old_diff = diff score, diff = ML.CheckConverge(tree,new_migrate,this_migrate) if diff < converge_thresh: break # this should break the loop if you end up bouncing back and # forth between the same values sig_figs = 8 diff1 = math.floor(diff*math.pow(10,sig_figs)) diff2 = math.floor(old_diff*math.pow(10,sig_figs)) if diff1 > 0: if diff1 == diff2: break if counter > 500: break counter += 1 # remove similar habitats new_habitats = remove_similar_habitats(habitat_matrix, habitat_thresh) if len(new_habitats) == len(habitat_matrix): break habitat_matrix = new_habitats if len(habitat_matrix) < 2: break msg = 'Learned {} habitats in {} seconds\n' sys.stderr.write(msg.format(len(habitat_matrix), time.clock())) return habitat_matrix, mu, stats