def estimate_clock_model(params): """ implementing treetime clock """ if assure_tree(params, tmp_dir='clock_model_tmp'): return 1 dates = utils.parse_dates(params.dates, date_col=params.date_column, name_col=params.name_column) if len(dates)==0: return 1 outdir = get_outdir(params, '_clock') ########################################################################### ### READ IN VCF ########################################################################### #sets ref and fixed_pi to None if not VCF aln, ref, fixed_pi = read_if_vcf(params) is_vcf = True if ref is not None else False ########################################################################### ### ESTIMATE ROOT (if requested) AND DETERMINE TEMPORAL SIGNAL ########################################################################### if params.aln is None and params.sequence_length is None: print("one of arguments '--aln' and '--sequence-length' is required.", file=sys.stderr) return 1 basename = get_basename(params, outdir) try: myTree = TreeTime(dates=dates, tree=params.tree, aln=aln, gtr='JC69', verbose=params.verbose, seq_len=params.sequence_length, ref=ref) except TreeTimeError as e: print("\nTreeTime setup failed. Please see above for error messages and/or rerun with --verbose 4\n") raise e myTree.tip_slack=params.tip_slack if params.clock_filter: n_bad = [n.name for n in myTree.tree.get_terminals() if n.bad_branch] myTree.clock_filter(n_iqd=params.clock_filter, reroot=params.reroot or 'least-squares') n_bad_after = [n.name for n in myTree.tree.get_terminals() if n.bad_branch] if len(n_bad_after)>len(n_bad): print("The following leaves don't follow a loose clock and " "will be ignored in rate estimation:\n\t" +"\n\t".join(set(n_bad_after).difference(n_bad))) if not params.keep_root: # reroot to optimal root, this assigns clock_model to myTree if params.covariation: # this requires branch length estimates myTree.run(root="least-squares", max_iter=0, use_covariation=params.covariation) try: res = myTree.reroot(params.reroot, force_positive=not params.allow_negative_rate) except TreeTimeError as e: print("ERROR: unknown root or rooting mechanism!") raise e myTree.get_clock_model(covariation=params.covariation) else: myTree.get_clock_model(covariation=params.covariation) d2d = utils.DateConversion.from_regression(myTree.clock_model) print('\n',d2d) print(fill('The R^2 value indicates the fraction of variation in' 'root-to-tip distance explained by the sampling times.' 'Higher values corresponds more clock-like behavior (max 1.0).')+'\n') print(fill('The rate is the slope of the best fit of the date to' 'the root-to-tip distance and provides an estimate of' 'the substitution rate. The rate needs to be positive!' 'Negative rates suggest an inappropriate root.')+'\n') print('\nThe estimated rate and tree correspond to a root date:') if params.covariation: reg = myTree.clock_model dp = np.array([reg['intercept']/reg['slope']**2,-1./reg['slope']]) droot = np.sqrt(reg['cov'][:2,:2].dot(dp).dot(dp)) print('\n--- root-date:\t %3.2f +/- %1.2f (one std-dev)\n\n'%(-d2d.intercept/d2d.clock_rate, droot)) else: print('\n--- root-date:\t %3.2f\n\n'%(-d2d.intercept/d2d.clock_rate)) if not params.keep_root: # write rerooted tree to file outtree_name = basename+'rerooted.newick' Phylo.write(myTree.tree, outtree_name, 'newick') print("--- re-rooted tree written to \n\t%s\n"%outtree_name) table_fname = basename+'rtt.csv' with open(table_fname, 'w') as ofile: ofile.write("#name, date, root-to-tip distance\n") ofile.write("#Dates of nodes that didn't have a specified date are inferred from the root-to-tip regression.\n") for n in myTree.tree.get_terminals(): if hasattr(n, "raw_date_constraint") and (n.raw_date_constraint is not None): if np.isscalar(n.raw_date_constraint): tmp_str = str(n.raw_date_constraint) elif len(n.raw_date_constraint): tmp_str = str(n.raw_date_constraint[0])+'-'+str(n.raw_date_constraint[1]) else: tmp_str = '' ofile.write("%s, %s, %f\n"%(n.name, tmp_str, n.dist2root)) else: ofile.write("%s, %f, %f\n"%(n.name, d2d.numdate_from_dist2root(n.dist2root), n.dist2root)) for n in myTree.tree.get_nonterminals(order='preorder'): ofile.write("%s, %f, %f\n"%(n.name, d2d.numdate_from_dist2root(n.dist2root), n.dist2root)) print("--- wrote dates and root-to-tip distances to \n\t%s\n"%table_fname) ########################################################################### ### PLOT AND SAVE RESULT ########################################################################### plot_rtt(myTree, outdir+params.plot_rtt) return 0
def estimate_clock_model(params): """ implementing treetime clock """ if assure_tree(params, tmp_dir='clock_model_tmp'): return 1 dates = utils.parse_dates(params.dates) if len(dates)==0: return 1 outdir = get_outdir(params, '_clock') ########################################################################### ### READ IN VCF ########################################################################### #sets ref and fixed_pi to None if not VCF aln, ref, fixed_pi = read_if_vcf(params) is_vcf = True if ref is not None else False ########################################################################### ### ESTIMATE ROOT (if requested) AND DETERMINE TEMPORAL SIGNAL ########################################################################### if params.aln is None and params.sequence_length is None: print("one of arguments '--aln' and '--sequence-length' is required.", file=sys.stderr) return 1 basename = get_basename(params, outdir) myTree = TreeTime(dates=dates, tree=params.tree, aln=aln, gtr='JC69', verbose=params.verbose, seq_len=params.sequence_length, ref=ref) myTree.tip_slack=params.tip_slack if myTree.tree is None: print("ERROR: tree loading failed. exiting...") return 1 if params.clock_filter: n_bad = [n.name for n in myTree.tree.get_terminals() if n.bad_branch] myTree.clock_filter(n_iqd=params.clock_filter, reroot=params.reroot or 'least-squares') n_bad_after = [n.name for n in myTree.tree.get_terminals() if n.bad_branch] if len(n_bad_after)>len(n_bad): print("The following leaves don't follow a loose clock and " "will be ignored in rate estimation:\n\t" +"\n\t".join(set(n_bad_after).difference(n_bad))) if not params.keep_root: # reroot to optimal root, this assigns clock_model to myTree if params.covariation: # this requires branch length estimates myTree.run(root="least-squares", max_iter=0, use_covariation=params.covariation) res = myTree.reroot(params.reroot, force_positive=not params.allow_negative_rate) myTree.get_clock_model(covariation=params.covariation) if res==ttconf.ERROR: print("ERROR: unknown root or rooting mechanism!\n" "\tvalid choices are 'least-squares', 'ML', and 'ML-rough'") return 1 else: myTree.get_clock_model(covariation=params.covariation) d2d = utils.DateConversion.from_regression(myTree.clock_model) print('\n',d2d) print('The R^2 value indicates the fraction of variation in' '\nroot-to-tip distance explained by the sampling times.' '\nHigher values corresponds more clock-like behavior (max 1.0).') print('\nThe rate is the slope of the best fit of the date to' '\nthe root-to-tip distance and provides an estimate of' '\nthe substitution rate. The rate needs to be positive!' '\nNegative rates suggest an inappropriate root.\n') print('\nThe estimated rate and tree correspond to a root date:') if params.covariation: reg = myTree.clock_model dp = np.array([reg['intercept']/reg['slope']**2,-1./reg['slope']]) droot = np.sqrt(reg['cov'][:2,:2].dot(dp).dot(dp)) print('\n--- root-date:\t %3.2f +/- %1.2f (one std-dev)\n\n'%(-d2d.intercept/d2d.clock_rate, droot)) else: print('\n--- root-date:\t %3.2f\n\n'%(-d2d.intercept/d2d.clock_rate)) if not params.keep_root: # write rerooted tree to file outtree_name = basename+'rerooted.newick' Phylo.write(myTree.tree, outtree_name, 'newick') print("--- re-rooted tree written to \n\t%s\n"%outtree_name) table_fname = basename+'rtt.csv' with open(table_fname, 'w') as ofile: ofile.write("#name, date, root-to-tip distance\n") ofile.write("#Dates of nodes that didn't have a specified date are inferred from the root-to-tip regression.\n") for n in myTree.tree.get_terminals(): if hasattr(n, "raw_date_constraint") and (n.raw_date_constraint is not None): if np.isscalar(n.raw_date_constraint): tmp_str = str(n.raw_date_constraint) elif len(n.raw_date_constraint): tmp_str = str(n.raw_date_constraint[0])+'-'+str(n.raw_date_constraint[1]) else: tmp_str = '' ofile.write("%s, %s, %f\n"%(n.name, tmp_str, n.dist2root)) else: ofile.write("%s, %f, %f\n"%(n.name, d2d.numdate_from_dist2root(n.dist2root), n.dist2root)) for n in myTree.tree.get_nonterminals(order='preorder'): ofile.write("%s, %f, %f\n"%(n.name, d2d.numdate_from_dist2root(n.dist2root), n.dist2root)) print("--- wrote dates and root-to-tip distances to \n\t%s\n"%table_fname) ########################################################################### ### PLOT AND SAVE RESULT ########################################################################### plot_rtt(myTree, outdir+params.plot_rtt) return 0
def timetree(params): """ implementeing treetime tree """ if params.relax is None: relaxed_clock_params = None elif params.relax==[]: relaxed_clock_params=True elif len(params.relax)==2: relaxed_clock_params={'slack':params.relax[0], 'coupling':params.relax[1]} dates = utils.parse_dates(params.dates, date_col=params.date_column, name_col=params.name_column) if len(dates)==0: print("No valid dates -- exiting.") return 1 if assure_tree(params, tmp_dir='timetree_tmp'): print("No tree -- exiting.") return 1 outdir = get_outdir(params, '_treetime') gtr = create_gtr(params) infer_gtr = params.gtr=='infer' ########################################################################### ### READ IN VCF ########################################################################### #sets ref and fixed_pi to None if not VCF aln, ref, fixed_pi = read_if_vcf(params) is_vcf = True if ref is not None else False branch_length_mode = params.branch_length_mode #variable-site-only trees can have big branch lengths, the auto setting won't work. if is_vcf or (params.aln and params.sequence_length): if branch_length_mode == 'auto': branch_length_mode = 'joint' ########################################################################### ### SET-UP and RUN ########################################################################### if params.aln is None and params.sequence_length is None: print("one of arguments '--aln' and '--sequence-length' is required.", file=sys.stderr) return 1 myTree = TreeTime(dates=dates, tree=params.tree, ref=ref, aln=aln, gtr=gtr, seq_len=params.sequence_length, verbose=params.verbose, fill_overhangs=not params.keep_overhangs) myTree.tip_slack=params.tip_slack if not myTree.one_mutation: print("TreeTime setup failed, exiting") return 1 # coalescent model options try: coalescent = float(params.coalescent) if coalescent<10*myTree.one_mutation: coalescent = None except: if params.coalescent in ['opt', 'const', 'skyline']: coalescent = params.coalescent else: print("unknown coalescent model specification, has to be either " "a float, 'opt', 'const' or 'skyline' -- exiting") return 1 # determine whether confidence intervals are to be computed and how the # uncertainty in the rate estimate should be treated calc_confidence = params.confidence if params.clock_std_dev: vary_rate = params.clock_std_dev if calc_confidence else False elif params.confidence and params.covariation: vary_rate = True elif params.confidence: print(fill("Outside of covariation aware mode TreeTime cannot estimate confidence intervals " "without specified standard deviation of the clock rate.Please specify '--clock-std-dev' " "or rerun with '--covariation'. Will proceed without confidence estimation")) vary_rate = False calc_confidence = False else: vary_rate = False # RUN root = None if params.keep_root else params.reroot try: success = myTree.run(root=root, relaxed_clock=relaxed_clock_params, resolve_polytomies=(not params.keep_polytomies), Tc=coalescent, max_iter=params.max_iter, fixed_clock_rate=params.clock_rate, n_iqd=params.clock_filter, time_marginal="assign" if calc_confidence else False, vary_rate = vary_rate, branch_length_mode = branch_length_mode, reconstruct_tip_states=params.reconstruct_tip_states, fixed_pi=fixed_pi, use_covariation = params.covariation, n_points=params.n_skyline) except TreeTimeError as e: print("\nTreeTime run FAILED: please check above for errors and/or rerun with --verbose 4.\n") raise e ########################################################################### ### OUTPUT and saving of results ########################################################################### if infer_gtr: fname = outdir+'sequence_evolution_model.txt' with open(fname, 'w') as ofile: ofile.write(str(myTree.gtr)+'\n') print('\nInferred sequence evolution model (saved as %s):'%fname) print(myTree.gtr) fname = outdir+'molecular_clock.txt' with open(fname, 'w') as ofile: ofile.write(str(myTree.date2dist)+'\n') print('\nInferred sequence evolution model (saved as %s):'%fname) print(myTree.date2dist) basename = get_basename(params, outdir) if coalescent in ['skyline', 'opt', 'const']: print("Inferred coalescent model") if coalescent=='skyline': print_save_plot_skyline(myTree, plot=basename+'skyline.pdf', save=basename+'skyline.tsv', screen=True) else: Tc = myTree.merger_model.Tc.y[0] print(" --T_c: \t %1.2e \toptimized inverse merger rate in units of substitutions"%Tc) print(" --T_c: \t %1.2e \toptimized inverse merger rate in years"%(Tc/myTree.date2dist.clock_rate)) print(" --N_e: \t %1.2e \tcorresponding 'effective population size' assuming 50 gen/year\n"%(Tc/myTree.date2dist.clock_rate*50)) # plot import matplotlib.pyplot as plt from .treetime import plot_vs_years leaf_count = myTree.tree.count_terminals() label_func = lambda x: (x.name if x.is_terminal() and ((leaf_count<30 and (not params.no_tip_labels)) or params.tip_labels) else '') plot_vs_years(myTree, show_confidence=False, label_func=label_func, confidence=0.9 if calc_confidence else None) tree_fname = (outdir + params.plot_tree) plt.savefig(tree_fname) print("--- saved tree as \n\t %s\n"%tree_fname) plot_rtt(myTree, outdir + params.plot_rtt) if params.relax: fname = outdir+'substitution_rates.tsv' print("--- wrote branch specific rates to\n\t %s\n"%fname) with open(fname, 'w') as fh: fh.write("#node\tclock_length\tmutation_length\trate\tfold_change\n") for n in myTree.tree.find_clades(order="preorder"): if n==myTree.tree.root: continue g = n.branch_length_interpolator.gamma fh.write("%s\t%1.3e\t%1.3e\t%1.3e\t%1.2f\n"%(n.name, n.clock_length, n.mutation_length, myTree.date2dist.clock_rate*g, g)) export_sequences_and_tree(myTree, basename, is_vcf, params.zero_based, timetree=True, confidence=calc_confidence, reconstruct_tip_states=params.reconstruct_tip_states) return 0
def timetree(params): """ implementeing treetime tree """ if params.relax is None: relaxed_clock_params = None elif params.relax==[]: relaxed_clock_params=True elif len(params.relax)==2: relaxed_clock_params={'slack':params.relax[0], 'coupling':params.relax[1]} dates = utils.parse_dates(params.dates) if len(dates)==0: print("No valid dates -- exiting.") return 1 if assure_tree(params, tmp_dir='timetree_tmp'): print("No tree -- exiting.") return 1 outdir = get_outdir(params, '_treetime') gtr = create_gtr(params) infer_gtr = params.gtr=='infer' ########################################################################### ### READ IN VCF ########################################################################### #sets ref and fixed_pi to None if not VCF aln, ref, fixed_pi = read_if_vcf(params) is_vcf = True if ref is not None else False branch_length_mode = params.branch_length_mode #variable-site-only trees can have big branch lengths, the auto setting won't work. if is_vcf or (params.aln and params.sequence_length): if branch_length_mode == 'auto': branch_length_mode = 'joint' ########################################################################### ### SET-UP and RUN ########################################################################### if params.aln is None and params.sequence_length is None: print("one of arguments '--aln' and '--sequence-length' is required.", file=sys.stderr) return 1 myTree = TreeTime(dates=dates, tree=params.tree, ref=ref, aln=aln, gtr=gtr, seq_len=params.sequence_length, verbose=params.verbose) myTree.tip_slack=params.tip_slack if not myTree.one_mutation: print("TreeTime setup failed, exiting") return 1 # coalescent model options try: coalescent = float(params.coalescent) if coalescent<10*myTree.one_mutation: coalescent = None except: if params.coalescent in ['opt', 'const', 'skyline']: coalescent = params.coalescent else: print("unknown coalescent model specification, has to be either " "a float, 'opt', 'const' or 'skyline' -- exiting") return 1 # determine whether confidence intervals are to be computed and how the # uncertainty in the rate estimate should be treated calc_confidence = params.confidence if params.clock_std_dev: vary_rate = params.clock_std_dev if calc_confidence else False elif params.confidence and params.covariation: vary_rate = True elif params.confidence: print("\nOutside of covariance aware mode TreeTime cannot estimate confidence intervals " "without specified standard deviation of the clock rate Please specify '--clock-std-dev' " "or rerun with '--covariance'. Will proceed without confidence estimation") vary_rate = False calc_confidence = False else: vary_rate = False # RUN root = None if params.keep_root else params.reroot success = myTree.run(root=root, relaxed_clock=relaxed_clock_params, resolve_polytomies=(not params.keep_polytomies), Tc=coalescent, max_iter=params.max_iter, fixed_clock_rate=params.clock_rate, n_iqd=params.clock_filter, time_marginal="assign" if calc_confidence else False, vary_rate = vary_rate, branch_length_mode = branch_length_mode, fixed_pi=fixed_pi, use_covariation = params.covariation) if success==ttconf.ERROR: # if TreeTime.run failed, exit print("\nTreeTime run FAILED: please check above for errors and/or rerun with --verbose 4.\n") return 1 ########################################################################### ### OUTPUT and saving of results ########################################################################### if infer_gtr: print('\nInferred GTR model:') print(myTree.gtr) print(myTree.date2dist) basename = get_basename(params, outdir) if coalescent in ['skyline', 'opt', 'const']: print("Inferred coalescent model") if coalescent=='skyline': print_save_plot_skyline(myTree, plot=basename+'skyline.pdf', save=basename+'skyline.tsv', screen=True) else: Tc = myTree.merger_model.Tc.y[0] print(" --T_c: \t %1.2e \toptimized inverse merger rate in units of substitutions"%Tc) print(" --T_c: \t %1.2e \toptimized inverse merger rate in years"%(Tc/myTree.date2dist.clock_rate)) print(" --N_e: \t %1.2e \tcorresponding 'effective population size' assuming 50 gen/year\n"%(Tc/myTree.date2dist.clock_rate*50)) # plot import matplotlib.pyplot as plt from .treetime import plot_vs_years leaf_count = myTree.tree.count_terminals() label_func = lambda x: (x.name if x.is_terminal() and ((leaf_count<30 and (not params.no_tip_labels)) or params.tip_labels) else '') plot_vs_years(myTree, show_confidence=False, label_func=label_func, confidence=0.9 if params.confidence else None) tree_fname = (outdir + params.plot_tree) plt.savefig(tree_fname) print("--- saved tree as \n\t %s\n"%tree_fname) plot_rtt(myTree, outdir + params.plot_rtt) if params.relax: fname = outdir+'substitution_rates.tsv' print("--- wrote branch specific rates to\n\t %s\n"%fname) with open(fname, 'w') as fh: fh.write("#node\tclock_length\tmutation_length\trate\tfold_change\n") for n in myTree.tree.find_clades(order="preorder"): if n==myTree.tree.root: continue g = n.branch_length_interpolator.gamma fh.write("%s\t%1.3e\t%1.3e\t%1.3e\t%1.2f\n"%(n.name, n.clock_length, n.mutation_length, myTree.date2dist.clock_rate*g, g)) export_sequences_and_tree(myTree, basename, is_vcf, params.zero_based, timetree=True, confidence=calc_confidence) return 0