def estimate_skyline(base_name, plot=False): tree_file = base_name + ".opt.nwk" aln_file = base_name + ".nuc.fasta" params = (os.path.split(base_name)[-1]).split('_') print(params) period = float(params[-2][6:]) amp = float(params[-3][3:]) N = float(params[2][1:]) generations = generations_from_ffpopsim_tree(tree_file)[1] T = TreeTime(tree=tree_file, aln=aln_file, dates=generations, gtr="JC69", real_dates=False) T.run(Tc="skyline",max_iter=3, long_branch=True, resolve_polytomies=True, infer_gtr=True, root='best') #, fixed_slope=0.0001) print(T.gtr) skyline = T.merger_model.skyline_inferred() skyline_em = T.merger_model.skyline_empirical() x = skyline.x truePopSize = N*(1.0 + amp*np.cos(2.0*np.pi*x/N/period)) if plot: plt.figure(figsize=onecolumn_figsize) plt.plot(x, skyline.y) plt.plot(skyline_em.x, skyline_em.y) plt.plot(x, truePopSize) informative_range = x.searchsorted(np.min([n.numdate for n in T.tree.root])) return period, amp, x[informative_range:], skyline.y[informative_range:], truePopSize[informative_range:]
def tt_from_file(self, infile, root='best', nodefile=None): self.is_timetree=False self.logger('Reading tree from file '+infile,2) dates = {seq.id:seq.attributes['num_date'] for seq in self.aln if 'date' in seq.attributes} self.tt = TreeTime(dates=dates, tree=str(infile), gtr='Jukes-Cantor', aln = self.aln, verbose=self.verbose, fill_overhangs=True) if root: self.tt.reroot(root=root) self.tree = self.tt.tree for node in self.tree.find_clades(): if node.is_terminal() and node.name in self.sequence_lookup: seq = self.sequence_lookup[node.name] node.attr = seq.attributes try: node.attr['date'] = node.attr['date'].strftime('%Y-%m-%d') except: pass else: node.attr = {} if nodefile is not None: self.logger('reading node properties from file: '+nodefile,2) with myopen(nodefile, 'r') as infile: from cPickle import load node_props = load(infile) for n in self.tree.find_clades(): if n.name in node_props: for attr in node_props[n.name]: n.__setattr__(attr, node_props[n.name][attr]) else: self.logger("No node properties found for "+n.name,2)
def timetree(tree=None, aln=None, seq_meta=None, keeproot=False, confidence=False, resolve_polytomies=True, max_iter=2, infer_gtr=True, Tc=0.01, reroot='best', use_marginal=False, **kwarks): from treetime import TreeTime dates = {} for name, data in seq_meta.items(): num_date = parse_date(data["date"], date_fmt) if num_date is not None: dates[name] = num_date tt = TreeTime(tree=tree, aln=aln, dates=dates, gtr='JC69') if confidence and use_marginal: # estimate confidence intervals via marginal ML and assign marginal ML times to nodes marginal = 'assign' else: marginal = confidence tt.run(infer_gtr=infer_gtr, root=reroot, Tc=Tc, time_marginal=marginal, resolve_polytomies=resolve_polytomies, max_iter=max_iter, **kwarks) for n in T.find_clades(): n.num_date = n.numdate # treetime convention is different from augur... # get 90% max posterior region) if confidence: n.num_date_confidence = list(tt.get_max_posterior_region(n, 0.9)) return tt
def run_treetime(config: TreetimeConfig): if config.generate_tree: generate_tree( config.input_filenames.FASTA, config.output_filenames.NWK_GENERATED, ) input_nwk = (config.output_filenames.NWK_GENERATED if config.generate_tree else config.input_filenames.NWK) tree = Phylo.read(input_nwk, "newick") aln = AlignIO.read(config.input_filenames.FASTA, "fasta") dates, meta = read_metadata_from_file( config.input_filenames.DATES, '.cache/log.txt', ) # TODO: in case of config.gtr == "infer" shall we default to "jc" here ? # TreeTime.run() has additional "infer_gtr" option. # Why same thing in two places? tt = TreeTime(dates=dates, tree=tree, aln=aln, gtr="jc") tt.run( root=config.root, infer_gtr=config.gtr == "infer", relaxed_clock=config.relaxed_clock, resolve_polytomies=config.resolve_polytomies, max_iter=config.max_iter, Tc=config.coalescent_prior, fixed_slope=config.slope, do_marginal=config.do_marginal, ) config_json = config._asdict() write_json(config_json, config.output_filenames.CONFIG_JSON) layout(tt) tree_json = node_to_json(tt.tree.root, meta, config) write_json(tree_json, config.output_filenames.TREE_JSON) # likelihoods_json = likelihoods_to_json(tt) # write_json(likelihoods_json, config.output_filenames.LIKELIHOODS_JSON) Phylo.write(tt.tree, config.output_filenames.NWK, "newick") decorate(tt) Phylo.write(tt.tree, config.output_filenames.NEX, "nexus") save_alignment(tt, config) save_metadata_to_csv(tt, meta, config) # TODO: Do we need this? It was commented out in the original treetime-web. # save_molecular_clock_to_csv(tt, config) save_gtr(tt, config)
def timetree(tree=None, aln=None, ref=None, seq_meta=None, keeproot=False, confidence=False, resolve_polytomies=True, max_iter=2, dateLimits=None, infer_gtr=True, Tc=0.01, reroot='best', use_marginal=False, **kwarks): from treetime import TreeTime dL_int = None if dateLimits: dL_int = [int(x) for x in dateLimits] dL_int.sort() dates = {} for name, data in seq_meta.items(): num_date = parse_date(data["date"], date_fmt, dL_int) if num_date is not None: dates[name] = num_date #send ref, if is None, does no harm tt = TreeTime(tree=tree, aln=aln, ref=ref, dates=dates, gtr='JC69') if confidence and use_marginal: # estimate confidence intervals via marginal ML and assign marginal ML times to nodes marginal = 'assign' else: marginal = confidence #Length of VCF files means GTR model with gaps causes overestimation of mutation TO gaps #so gaps appear in internal nodes when no gaps at tips! To prevent.... pi = None if ref != None: #if VCF, fix pi pi = np.array([0.1618, 0.3188, 0.3176, 0.1618, 0.04]) #from real runs (Walker 2018) tt.run(infer_gtr=infer_gtr, root=reroot, Tc=Tc, time_marginal=marginal, resolve_polytomies=resolve_polytomies, max_iter=max_iter, fixed_pi=pi, **kwarks) for n in T.find_clades(): n.num_date = n.numdate # treetime convention is different from augur... # get 90% max posterior region) if confidence: n.num_date_confidence = list(tt.get_max_posterior_region(n, 0.9)) return tt
def treetime_from_newick(gtr, infile): """ Create TreeTime object and load phylogenetic tree from newick file Args: - infile(str): path to the newick file. Returns: - tanc(TreeTime): tree time object with phylogenetic tree set and required parameters assigned to the nodes. """ tanc = TreeTime(gtr) tanc.tree = Phylo.read(infile, 'newick') tanc.set_additional_tree_params() return tanc
from __future__ import print_function, division import numpy as np from Bio import Phylo from treetime import TreeTime from treetime.utils import parse_dates from treetime.node_interpolator import Distribution, NodeInterpolator if __name__ == '__main__': base_name = 'test/treetime_examples/data/h3n2_na/h3n2_na_20' dates = parse_dates(base_name + '.metadata.csv') tt = TreeTime(gtr='Jukes-Cantor', tree=base_name + '.nwk', aln=base_name + '.fasta', verbose=3, dates=dates, debug=True) # rerooting can be done along with the tree time inference tt.run(root="best", branch_length_mode='input', max_iter=2, time_marginal=True) # initialize date constraints and branch length interpolators # this called in each iteration. 44ms tt.init_date_constraints() ########################################################### # joint inference of node times. done in every generation. 0.7s
#from treetime.utils import numeric_date with open(base_name + '.csv') as date_file: dates = {} for line in date_file: if line[0] == '#': continue try: name, date = line.strip().split(',') dates[name] = float(date) except: continue # instantiate treetime ebola = TreeTime(gtr='Jukes-Cantor', tree=base_name + '.nwk', aln=base_name + '.fasta', verbose=4, dates=dates) # infer an ebola time tree while rerooting and resolving polytomies ebola.run(root='best', relaxed_clock=False, max_iter=2, resolve_polytomies=True, Tc='skyline', time_marginal="assign") # get Skyline and 2-sigma confidence intervals skyline, confidence = ebola.merger_model.skyline_inferred(gen=50, confidence=2.0)
def refine(tree=None, aln=None, ref=None, dates=None, branch_length_inference='auto', confidence=False, resolve_polytomies=True, max_iter=2, precision='auto', infer_gtr=True, Tc=0.01, reroot=None, use_marginal=False, fixed_pi=None, clock_rate=None, clock_std=None, clock_filter_iqd=None, verbosity=1, covariance=True, **kwarks): from treetime import TreeTime try: #Tc could be a number or 'opt' or 'skyline'. TreeTime expects a float or int if a number. Tc = float(Tc) except ValueError: True #let it remain a string if (ref is not None) and (fixed_pi is None): #if VCF, fix pi #Otherwise mutation TO gaps is overestimated b/c of seq length fixed_pi = [ ref.count(base) / len(ref) for base in ['A', 'C', 'G', 'T', '-'] ] if fixed_pi[-1] == 0: fixed_pi[-1] = 0.05 fixed_pi = [v - 0.01 for v in fixed_pi] if ref is not None: # VCF -> adjust branch length #set branch length mode explicitly if auto, as informative-site only #trees can have big branch lengths, making this set incorrectly in TreeTime if branch_length_inference == 'auto': branch_length_inference = 'joint' #send ref, if is None, does no harm tt = TreeTime(tree=tree, aln=aln, ref=ref, dates=dates, verbose=verbosity, gtr='JC69', precision=precision) # conditionally run clock-filter and remove bad tips if clock_filter_iqd: # treetime clock filter will mark, but not remove bad tips tt.clock_filter(reroot=reroot, n_iqd=clock_filter_iqd, plot=False) #use whatever was specified # remove them explicitly leaves = [x for x in tt.tree.get_terminals()] for n in leaves: if n.bad_branch: tt.tree.prune(n) print('pruning leaf ', n.name) # fix treetime set-up for new tree topology tt.prepare_tree() if confidence and use_marginal: # estimate confidence intervals via marginal ML and assign # marginal ML times to nodes marginal = 'assign' else: marginal = confidence # uncertainty of the the clock rate is relevant if confidence intervals are estimated if confidence and clock_std: vary_rate = clock_std # if standard devivation of clock is specified, use that elif (clock_rate is None) and confidence and covariance: vary_rate = True # if run in covariance mode, standard deviation can be estimated else: vary_rate = False # otherwise, rate uncertainty will be ignored tt.run(infer_gtr=infer_gtr, root=reroot, Tc=Tc, time_marginal=marginal, branch_length_mode=branch_length_inference, resolve_polytomies=resolve_polytomies, max_iter=max_iter, fixed_pi=fixed_pi, fixed_clock_rate=clock_rate, vary_rate=vary_rate, use_covariation=covariance, **kwarks) if confidence: for n in tt.tree.find_clades(): n.num_date_confidence = list(tt.get_max_posterior_region(n, 0.9)) print( "\nInferred a time resolved phylogeny using TreeTime:" "\n\tSagulenko et al. TreeTime: Maximum-likelihood phylodynamic analysis" "\n\tVirus Evolution, vol 4, https://academic.oup.com/ve/article/4/1/vex042/4794731\n" ) return tt
def estimate_clock_model(params): """ implementing treetime clock """ if assure_tree(params, tmp_dir='clock_model_tmp'): return 1 dates = utils.parse_dates(params.dates, date_col=params.date_column, name_col=params.name_column) if len(dates)==0: return 1 outdir = get_outdir(params, '_clock') ########################################################################### ### READ IN VCF ########################################################################### #sets ref and fixed_pi to None if not VCF aln, ref, fixed_pi = read_if_vcf(params) is_vcf = True if ref is not None else False ########################################################################### ### ESTIMATE ROOT (if requested) AND DETERMINE TEMPORAL SIGNAL ########################################################################### if params.aln is None and params.sequence_length is None: print("one of arguments '--aln' and '--sequence-length' is required.", file=sys.stderr) return 1 basename = get_basename(params, outdir) try: myTree = TreeTime(dates=dates, tree=params.tree, aln=aln, gtr='JC69', verbose=params.verbose, seq_len=params.sequence_length, ref=ref) except TreeTimeError as e: print("\nTreeTime setup failed. Please see above for error messages and/or rerun with --verbose 4\n") raise e myTree.tip_slack=params.tip_slack if params.clock_filter: n_bad = [n.name for n in myTree.tree.get_terminals() if n.bad_branch] myTree.clock_filter(n_iqd=params.clock_filter, reroot=params.reroot or 'least-squares') n_bad_after = [n.name for n in myTree.tree.get_terminals() if n.bad_branch] if len(n_bad_after)>len(n_bad): print("The following leaves don't follow a loose clock and " "will be ignored in rate estimation:\n\t" +"\n\t".join(set(n_bad_after).difference(n_bad))) if not params.keep_root: # reroot to optimal root, this assigns clock_model to myTree if params.covariation: # this requires branch length estimates myTree.run(root="least-squares", max_iter=0, use_covariation=params.covariation) try: res = myTree.reroot(params.reroot, force_positive=not params.allow_negative_rate) except TreeTimeError as e: print("ERROR: unknown root or rooting mechanism!") raise e myTree.get_clock_model(covariation=params.covariation) else: myTree.get_clock_model(covariation=params.covariation) d2d = utils.DateConversion.from_regression(myTree.clock_model) print('\n',d2d) print(fill('The R^2 value indicates the fraction of variation in' 'root-to-tip distance explained by the sampling times.' 'Higher values corresponds more clock-like behavior (max 1.0).')+'\n') print(fill('The rate is the slope of the best fit of the date to' 'the root-to-tip distance and provides an estimate of' 'the substitution rate. The rate needs to be positive!' 'Negative rates suggest an inappropriate root.')+'\n') print('\nThe estimated rate and tree correspond to a root date:') if params.covariation: reg = myTree.clock_model dp = np.array([reg['intercept']/reg['slope']**2,-1./reg['slope']]) droot = np.sqrt(reg['cov'][:2,:2].dot(dp).dot(dp)) print('\n--- root-date:\t %3.2f +/- %1.2f (one std-dev)\n\n'%(-d2d.intercept/d2d.clock_rate, droot)) else: print('\n--- root-date:\t %3.2f\n\n'%(-d2d.intercept/d2d.clock_rate)) if not params.keep_root: # write rerooted tree to file outtree_name = basename+'rerooted.newick' Phylo.write(myTree.tree, outtree_name, 'newick') print("--- re-rooted tree written to \n\t%s\n"%outtree_name) table_fname = basename+'rtt.csv' with open(table_fname, 'w') as ofile: ofile.write("#name, date, root-to-tip distance\n") ofile.write("#Dates of nodes that didn't have a specified date are inferred from the root-to-tip regression.\n") for n in myTree.tree.get_terminals(): if hasattr(n, "raw_date_constraint") and (n.raw_date_constraint is not None): if np.isscalar(n.raw_date_constraint): tmp_str = str(n.raw_date_constraint) elif len(n.raw_date_constraint): tmp_str = str(n.raw_date_constraint[0])+'-'+str(n.raw_date_constraint[1]) else: tmp_str = '' ofile.write("%s, %s, %f\n"%(n.name, tmp_str, n.dist2root)) else: ofile.write("%s, %f, %f\n"%(n.name, d2d.numdate_from_dist2root(n.dist2root), n.dist2root)) for n in myTree.tree.get_nonterminals(order='preorder'): ofile.write("%s, %f, %f\n"%(n.name, d2d.numdate_from_dist2root(n.dist2root), n.dist2root)) print("--- wrote dates and root-to-tip distances to \n\t%s\n"%table_fname) ########################################################################### ### PLOT AND SAVE RESULT ########################################################################### plot_rtt(myTree, outdir+params.plot_rtt) return 0
def timetree(params): """ implementeing treetime tree """ if params.relax is None: relaxed_clock_params = None elif params.relax==[]: relaxed_clock_params=True elif len(params.relax)==2: relaxed_clock_params={'slack':params.relax[0], 'coupling':params.relax[1]} dates = utils.parse_dates(params.dates, date_col=params.date_column, name_col=params.name_column) if len(dates)==0: print("No valid dates -- exiting.") return 1 if assure_tree(params, tmp_dir='timetree_tmp'): print("No tree -- exiting.") return 1 outdir = get_outdir(params, '_treetime') gtr = create_gtr(params) infer_gtr = params.gtr=='infer' ########################################################################### ### READ IN VCF ########################################################################### #sets ref and fixed_pi to None if not VCF aln, ref, fixed_pi = read_if_vcf(params) is_vcf = True if ref is not None else False branch_length_mode = params.branch_length_mode #variable-site-only trees can have big branch lengths, the auto setting won't work. if is_vcf or (params.aln and params.sequence_length): if branch_length_mode == 'auto': branch_length_mode = 'joint' ########################################################################### ### SET-UP and RUN ########################################################################### if params.aln is None and params.sequence_length is None: print("one of arguments '--aln' and '--sequence-length' is required.", file=sys.stderr) return 1 myTree = TreeTime(dates=dates, tree=params.tree, ref=ref, aln=aln, gtr=gtr, seq_len=params.sequence_length, verbose=params.verbose, fill_overhangs=not params.keep_overhangs) myTree.tip_slack=params.tip_slack if not myTree.one_mutation: print("TreeTime setup failed, exiting") return 1 # coalescent model options try: coalescent = float(params.coalescent) if coalescent<10*myTree.one_mutation: coalescent = None except: if params.coalescent in ['opt', 'const', 'skyline']: coalescent = params.coalescent else: print("unknown coalescent model specification, has to be either " "a float, 'opt', 'const' or 'skyline' -- exiting") return 1 # determine whether confidence intervals are to be computed and how the # uncertainty in the rate estimate should be treated calc_confidence = params.confidence if params.clock_std_dev: vary_rate = params.clock_std_dev if calc_confidence else False elif params.confidence and params.covariation: vary_rate = True elif params.confidence: print(fill("Outside of covariation aware mode TreeTime cannot estimate confidence intervals " "without specified standard deviation of the clock rate.Please specify '--clock-std-dev' " "or rerun with '--covariation'. Will proceed without confidence estimation")) vary_rate = False calc_confidence = False else: vary_rate = False # RUN root = None if params.keep_root else params.reroot try: success = myTree.run(root=root, relaxed_clock=relaxed_clock_params, resolve_polytomies=(not params.keep_polytomies), Tc=coalescent, max_iter=params.max_iter, fixed_clock_rate=params.clock_rate, n_iqd=params.clock_filter, time_marginal="assign" if calc_confidence else False, vary_rate = vary_rate, branch_length_mode = branch_length_mode, reconstruct_tip_states=params.reconstruct_tip_states, fixed_pi=fixed_pi, use_covariation = params.covariation, n_points=params.n_skyline) except TreeTimeError as e: print("\nTreeTime run FAILED: please check above for errors and/or rerun with --verbose 4.\n") raise e ########################################################################### ### OUTPUT and saving of results ########################################################################### if infer_gtr: fname = outdir+'sequence_evolution_model.txt' with open(fname, 'w') as ofile: ofile.write(str(myTree.gtr)+'\n') print('\nInferred sequence evolution model (saved as %s):'%fname) print(myTree.gtr) fname = outdir+'molecular_clock.txt' with open(fname, 'w') as ofile: ofile.write(str(myTree.date2dist)+'\n') print('\nInferred sequence evolution model (saved as %s):'%fname) print(myTree.date2dist) basename = get_basename(params, outdir) if coalescent in ['skyline', 'opt', 'const']: print("Inferred coalescent model") if coalescent=='skyline': print_save_plot_skyline(myTree, plot=basename+'skyline.pdf', save=basename+'skyline.tsv', screen=True) else: Tc = myTree.merger_model.Tc.y[0] print(" --T_c: \t %1.2e \toptimized inverse merger rate in units of substitutions"%Tc) print(" --T_c: \t %1.2e \toptimized inverse merger rate in years"%(Tc/myTree.date2dist.clock_rate)) print(" --N_e: \t %1.2e \tcorresponding 'effective population size' assuming 50 gen/year\n"%(Tc/myTree.date2dist.clock_rate*50)) # plot import matplotlib.pyplot as plt from .treetime import plot_vs_years leaf_count = myTree.tree.count_terminals() label_func = lambda x: (x.name if x.is_terminal() and ((leaf_count<30 and (not params.no_tip_labels)) or params.tip_labels) else '') plot_vs_years(myTree, show_confidence=False, label_func=label_func, confidence=0.9 if calc_confidence else None) tree_fname = (outdir + params.plot_tree) plt.savefig(tree_fname) print("--- saved tree as \n\t %s\n"%tree_fname) plot_rtt(myTree, outdir + params.plot_rtt) if params.relax: fname = outdir+'substitution_rates.tsv' print("--- wrote branch specific rates to\n\t %s\n"%fname) with open(fname, 'w') as fh: fh.write("#node\tclock_length\tmutation_length\trate\tfold_change\n") for n in myTree.tree.find_clades(order="preorder"): if n==myTree.tree.root: continue g = n.branch_length_interpolator.gamma fh.write("%s\t%1.3e\t%1.3e\t%1.3e\t%1.2f\n"%(n.name, n.clock_length, n.mutation_length, myTree.date2dist.clock_rate*g, g)) export_sequences_and_tree(myTree, basename, is_vcf, params.zero_based, timetree=True, confidence=calc_confidence, reconstruct_tip_states=params.reconstruct_tip_states) return 0
# PARSING OPTIONS ########################################################################### try: Tc = float(params.Tc) if Tc<1e-5: Tc = None except: if params.Tc in ['opt', 'skyline']: Tc = params.Tc else: Tc = None ########################################################################### ### SET-UP and RUN ########################################################################### myTree = TreeTime(dates=dates, tree=params.tree, aln=params.aln, gtr=gtr, verbose=params.verbose) myTree.run(root=params.reroot, relaxed_clock=params.relax, resolve_polytomies=(not params.keep_polytomies), Tc=Tc, max_iter=params.max_iter, branch_lengths = 'joint' if params.optimize_branch_length else 'input') ########################################################################### ### OUTPUT and saving of results ########################################################################### if infer_gtr: print('\nInferred GTR model:') print(myTree.gtr) print(myTree.date2dist) if Tc=='skyline':
try: import seaborn as sns sns.set_style('whitegrid') except: print("Seaborn not found. Default style will be used for the plots") plt.ion() if __name__ == '__main__': base_name = 'data/H3N2_NA_allyears_NA.20' dates = read_dates(base_name) # instantiate treetime myTree = TreeTime(gtr='Jukes-Cantor', tree=base_name + '.nwk', aln=base_name + '.fasta', verbose=4, dates=dates, debug=False) # RUN: this example uses a fixed clock rate of 0.003, resolves polytomes, and estimates # confidences via a final marginal reconstruction myTree.run(root='clock_filter', relaxed_clock=False, max_iter=2, fixed_clock_rate=0.003, resolve_polytomies=True, Tc=None, n_iqd=2, time_marginal=True) #################
# load data and parse dates plt.ion() base_name = 'data/H3N2_NA_allyears_NA.20' dates = read_dates(base_name) # define a multiplicity for each node (using season here, could be anything) seq_multiplicity = { name: 5 * (np.cos(2 * np.pi * (dates[name] + 0.5)) + 1.2) for name in dates } # multiplicity is passed into treetime as a dictionary linking node name to count tt = TreeTime(gtr='Jukes-Cantor', tree=base_name + '.nwk', seq_multiplicity=seq_multiplicity, aln=base_name + '.fasta', verbose=1, dates=dates) tt.reroot(root="best") fig, axs = plt.subplots(1, 2, figsize=(18, 9)) axs[0].set_title("Tree rerooted by treetime", fontsize=18) axs[1].set_title("Optimal divergence-time relationship with weighed nodes", fontsize=18) Phylo.draw(tt.tree, show_confidence=False, axes=axs[0], label_func=lambda x: x.name.split('|')[0] if x.is_terminal() else "") d = np.array([(n.numdate_given, n.dist2root, n.count)
def timetree(tree=None, aln=None, ref=None, dates=None, branch_length_mode='auto', confidence=False, resolve_polytomies=True, max_iter=2, infer_gtr=True, Tc=0.01, reroot=None, use_marginal=False, fixed_pi=None, clock_rate=None, n_iqd=None, verbosity=1, **kwarks): from treetime import TreeTime try: #Tc could be a number or 'opt' or 'skyline'. TreeTime expects a float or int if a number. Tc = float(Tc) except ValueError: True #let it remain a string if ref != None: #if VCF, fix pi #Otherwise mutation TO gaps is overestimated b/c of seq length fixed_pi = [ ref.count(base) / len(ref) for base in ['A', 'C', 'G', 'T', '-'] ] if fixed_pi[-1] == 0: fixed_pi[-1] = 0.05 fixed_pi = [v - 0.01 for v in fixed_pi] #set this explicitly if auto, as informative-site only trees can have big branch lengths, #making this set incorrectly in TreeTime if branch_length_mode == 'auto': branch_length_mode = 'joint' #send ref, if is None, does no harm tt = TreeTime(tree=tree, aln=aln, ref=ref, dates=dates, verbose=verbosity, gtr='JC69') if confidence and use_marginal: # estimate confidence intervals via marginal ML and assign marginal ML times to nodes marginal = 'assign' else: marginal = confidence tt.run(infer_gtr=infer_gtr, root=reroot, Tc=Tc, time_marginal=marginal, branch_length_mode=branch_length_mode, resolve_polytomies=resolve_polytomies, max_iter=max_iter, fixed_pi=fixed_pi, fixed_clock_rate=clock_rate, n_iqd=n_iqd, **kwarks) if confidence: for n in tt.tree.find_clades(): n.num_date_confidence = list(tt.get_max_posterior_region(n, 0.9)) print( "\nInferred a time resolved phylogeny using TreeTime:" "\n\tSagulenko et al. TreeTime: Maximum-likelihood phylodynamic analysis" "\n\tVirus Evolution, vol 4, https://academic.oup.com/ve/article/4/1/vex042/4794731\n" ) return tt
### FAKING ALIGMENT IF NONE GIVEN ########################################################################### if params.aln is None: from Bio import Seq, SeqRecord, Align aln = Align.MultipleSeqAlignment([ SeqRecord.SeqRecord(Seq.Seq("AAA"), id=node, name=node) for node in dates ]) ########################################################################### ### ESTIMATE ROOT (if requested) AND DETERMINE TEMPORAL SIGNAL ########################################################################### base_name = '.'.join(params.tree.split('/')[-1].split('.')[:-1]) myTree = TreeTime(dates=dates, tree=params.tree, aln=aln, gtr='JC69', verbose=params.verbose) if not params.keep_root: myTree.reroot('best') d2d = DateConversion.from_tree(myTree.tree) print('\n', d2d) print('The R^2 value indicates the fraction of variation in' '\nroot-to-tip distance explained by the temporal sampling.' '\nHigher values corresponds more clock-like behavior (max 1.0).') print('\nThe rate is the slope of the best fit of the date to' '\nthe root-to-tip distance and provides an estimate of' '\nthe substitution rate. The rate needs to be positive!'
def timetree(params): """ implementeing treetime tree """ if params.relax == []: params.relax = True dates = utils.parse_dates(params.dates) if len(dates) == 0: return 1 if assure_tree(params, tmp_dir='timetree_tmp'): return 1 outdir = get_outdir(params, '_treetime') gtr = create_gtr(params) infer_gtr = params.gtr == 'infer' ########################################################################### ### READ IN VCF ########################################################################### #sets ref and fixed_pi to None if not VCF aln, ref, fixed_pi = read_if_vcf(params) is_vcf = True if ref is not None else False branch_length_mode = params.branch_length_mode if is_vcf: #variable-site-only trees can have big branch lengths, setting this wrong. if branch_length_mode == 'auto': branch_length_mode = 'joint' ########################################################################### ### SET-UP and RUN ########################################################################### if params.aln is None and params.sequence_length is None: print("one of arguments '--aln' and '--sequence-length' is required.", file=sys.stderr) return 1 myTree = TreeTime(dates=dates, tree=params.tree, ref=ref, aln=aln, gtr=gtr, seq_len=params.sequence_length, verbose=params.verbose) # coalescent model options try: coalescent = float(params.coalescent) if coalescent < 10 * myTree.one_mutation: coalescent = None except: if params.coalescent in ['opt', 'const', 'skyline']: coalescent = params.coalescent else: print("unknown coalescent model specification, has to be either " "a float, 'opt', 'const' or 'skyline'") coalescent = None vary_rate = params.confidence if params.clock_std_dev and params.clock_rate: vary_rate = params.clock_std_dev root = None if params.keep_root else params.reroot success = myTree.run( root=root, relaxed_clock=params.relax, resolve_polytomies=(not params.keep_polytomies), Tc=coalescent, max_iter=params.max_iter, fixed_clock_rate=params.clock_rate, n_iqd=params.clock_filter, time_marginal="assign" if params.confidence else False, vary_rate=vary_rate, branch_length_mode=branch_length_mode, fixed_pi=fixed_pi) if success == ttconf.ERROR: # if TreeTime.run failed, exit return 1 ########################################################################### ### OUTPUT and saving of results ########################################################################### if infer_gtr: print('\nInferred GTR model:') print(myTree.gtr) print(myTree.date2dist) basename = get_basename(params, outdir) if coalescent in ['skyline', 'opt']: print("Inferred coalescent model") if coalescent == 'skyline': print_save_plot_skyline(myTree, plot=basename + 'skyline.pdf', save=basename + 'skyline.tsv', screen=True) elif coalescent == 'opt': Tc = myTree.merger_model.Tc.y[0] print(" --T_c: \t %1.4f \toptimized inverse merger rate" % Tc) print( " --N_e: \t %1.1f \tcorresponding pop size assument 50 gen/year\n" % (Tc / myTree.date2dist.clock_rate * 50)) # plot import matplotlib.pyplot as plt from .treetime import plot_vs_years leaf_count = myTree.tree.count_terminals() label_func = lambda x: x.name[:20] if (leaf_count < 30 & x.is_terminal() ) else '' plot_vs_years(myTree, show_confidence=False, label_func=label_func, confidence=0.9 if params.confidence else None) tree_fname = (outdir + params.plot_tree) plt.savefig(tree_fname) print("--- saved tree as \n\t %s\n" % tree_fname) export_sequences_and_tree(myTree, basename, is_vcf, params.zero_based, timetree=True, confidence=params.confidence) plot_rtt(myTree, outdir + params.plot_rtt) return 0