Ejemplo n.º 1
0
def estimate_skyline(base_name, plot=False):
    tree_file = base_name + ".opt.nwk"
    aln_file  = base_name + ".nuc.fasta"
    params = (os.path.split(base_name)[-1]).split('_')
    print(params)
    period = float(params[-2][6:])
    amp = float(params[-3][3:])
    N = float(params[2][1:])

    generations = generations_from_ffpopsim_tree(tree_file)[1]
    T = TreeTime(tree=tree_file, aln=aln_file, dates=generations, gtr="JC69", real_dates=False)

    T.run(Tc="skyline",max_iter=3, long_branch=True, resolve_polytomies=True, infer_gtr=True, root='best') #, fixed_slope=0.0001)
    print(T.gtr)
    skyline = T.merger_model.skyline_inferred()
    skyline_em = T.merger_model.skyline_empirical()


    x = skyline.x
    truePopSize = N*(1.0 + amp*np.cos(2.0*np.pi*x/N/period))
    if plot:
        plt.figure(figsize=onecolumn_figsize)
        plt.plot(x, skyline.y)
        plt.plot(skyline_em.x, skyline_em.y)
        plt.plot(x, truePopSize)

    informative_range = x.searchsorted(np.min([n.numdate for n in T.tree.root]))
    return period, amp, x[informative_range:], skyline.y[informative_range:], truePopSize[informative_range:]
Ejemplo n.º 2
0
    def tt_from_file(self, infile, root='best', nodefile=None):
        self.is_timetree=False
        self.logger('Reading tree from file '+infile,2)
        dates  =   {seq.id:seq.attributes['num_date']
                    for seq in self.aln if 'date' in seq.attributes}
        self.tt = TreeTime(dates=dates, tree=str(infile), gtr='Jukes-Cantor',
                            aln = self.aln, verbose=self.verbose, fill_overhangs=True)
        if root:
            self.tt.reroot(root=root)
        self.tree = self.tt.tree

        for node in self.tree.find_clades():
            if node.is_terminal() and node.name in self.sequence_lookup:
                seq = self.sequence_lookup[node.name]
                node.attr = seq.attributes
                try:
                    node.attr['date'] = node.attr['date'].strftime('%Y-%m-%d')
                except:
                    pass
            else:
                node.attr = {}

        if nodefile is not None:
            self.logger('reading node properties from file: '+nodefile,2)
            with myopen(nodefile, 'r') as infile:
                from cPickle import load
                node_props = load(infile)
            for n in self.tree.find_clades():
                if n.name in node_props:
                    for attr in node_props[n.name]:
                        n.__setattr__(attr, node_props[n.name][attr])
                else:
                    self.logger("No node properties found for "+n.name,2)
Ejemplo n.º 3
0
def timetree(tree=None, aln=None, seq_meta=None, keeproot=False,
             confidence=False, resolve_polytomies=True, max_iter=2,
             infer_gtr=True, Tc=0.01, reroot='best', use_marginal=False, **kwarks):
    from treetime import TreeTime
    dates = {}
    for name, data in seq_meta.items():
        num_date = parse_date(data["date"], date_fmt)
        if num_date is not None:
            dates[name] = num_date

    tt = TreeTime(tree=tree, aln=aln, dates=dates, gtr='JC69')

    if confidence and use_marginal:
        # estimate confidence intervals via marginal ML and assign marginal ML times to nodes
        marginal = 'assign'
    else:
        marginal = confidence

    tt.run(infer_gtr=infer_gtr, root=reroot, Tc=Tc, time_marginal=marginal,
           resolve_polytomies=resolve_polytomies, max_iter=max_iter, **kwarks)

    for n in T.find_clades():
        n.num_date = n.numdate # treetime convention is different from augur...
        # get 90% max posterior region)
        if confidence:
            n.num_date_confidence = list(tt.get_max_posterior_region(n, 0.9))
    return tt
Ejemplo n.º 4
0
def run_treetime(config: TreetimeConfig):

    if config.generate_tree:
        generate_tree(
            config.input_filenames.FASTA,
            config.output_filenames.NWK_GENERATED,
        )

    input_nwk = (config.output_filenames.NWK_GENERATED
                 if config.generate_tree else config.input_filenames.NWK)

    tree = Phylo.read(input_nwk, "newick")
    aln = AlignIO.read(config.input_filenames.FASTA, "fasta")
    dates, meta = read_metadata_from_file(
        config.input_filenames.DATES,
        '.cache/log.txt',
    )

    # TODO: in case of config.gtr == "infer" shall we default to "jc" here ?
    # TreeTime.run() has additional "infer_gtr" option.
    # Why same thing in two places?
    tt = TreeTime(dates=dates, tree=tree, aln=aln, gtr="jc")

    tt.run(
        root=config.root,
        infer_gtr=config.gtr == "infer",
        relaxed_clock=config.relaxed_clock,
        resolve_polytomies=config.resolve_polytomies,
        max_iter=config.max_iter,
        Tc=config.coalescent_prior,
        fixed_slope=config.slope,
        do_marginal=config.do_marginal,
    )

    config_json = config._asdict()
    write_json(config_json, config.output_filenames.CONFIG_JSON)

    layout(tt)

    tree_json = node_to_json(tt.tree.root, meta, config)
    write_json(tree_json, config.output_filenames.TREE_JSON)

    # likelihoods_json = likelihoods_to_json(tt)
    # write_json(likelihoods_json, config.output_filenames.LIKELIHOODS_JSON)

    Phylo.write(tt.tree, config.output_filenames.NWK, "newick")

    decorate(tt)

    Phylo.write(tt.tree, config.output_filenames.NEX, "nexus")

    save_alignment(tt, config)

    save_metadata_to_csv(tt, meta, config)

    # TODO: Do we need this? It was commented out in the original treetime-web.
    # save_molecular_clock_to_csv(tt, config)

    save_gtr(tt, config)
Ejemplo n.º 5
0
def timetree(tree=None,
             aln=None,
             ref=None,
             seq_meta=None,
             keeproot=False,
             confidence=False,
             resolve_polytomies=True,
             max_iter=2,
             dateLimits=None,
             infer_gtr=True,
             Tc=0.01,
             reroot='best',
             use_marginal=False,
             **kwarks):
    from treetime import TreeTime

    dL_int = None
    if dateLimits:
        dL_int = [int(x) for x in dateLimits]
        dL_int.sort()

    dates = {}
    for name, data in seq_meta.items():
        num_date = parse_date(data["date"], date_fmt, dL_int)
        if num_date is not None:
            dates[name] = num_date

    #send ref, if is None, does no harm
    tt = TreeTime(tree=tree, aln=aln, ref=ref, dates=dates, gtr='JC69')

    if confidence and use_marginal:
        # estimate confidence intervals via marginal ML and assign marginal ML times to nodes
        marginal = 'assign'
    else:
        marginal = confidence

    #Length of VCF files means GTR model with gaps causes overestimation of mutation TO gaps
    #so gaps appear in internal nodes when no gaps at tips! To prevent....
    pi = None
    if ref != None:  #if VCF, fix pi
        pi = np.array([0.1618, 0.3188, 0.3176, 0.1618,
                       0.04])  #from real runs (Walker 2018)

    tt.run(infer_gtr=infer_gtr,
           root=reroot,
           Tc=Tc,
           time_marginal=marginal,
           resolve_polytomies=resolve_polytomies,
           max_iter=max_iter,
           fixed_pi=pi,
           **kwarks)

    for n in T.find_clades():
        n.num_date = n.numdate  # treetime convention is different from augur...
        # get 90% max posterior region)
        if confidence:
            n.num_date_confidence = list(tt.get_max_posterior_region(n, 0.9))
    return tt
Ejemplo n.º 6
0
def treetime_from_newick(gtr, infile):
    """
    Create TreeTime object and load phylogenetic tree from newick file
    Args:
     - infile(str): path to the newick file.
    Returns:
     - tanc(TreeTime): tree time object with phylogenetic tree set and required
     parameters assigned to the nodes.
    """
    tanc = TreeTime(gtr)
    tanc.tree = Phylo.read(infile, 'newick')
    tanc.set_additional_tree_params()
    return tanc
Ejemplo n.º 7
0
from __future__ import print_function, division
import numpy as np
from Bio import Phylo
from treetime import TreeTime
from treetime.utils import parse_dates
from treetime.node_interpolator import Distribution, NodeInterpolator

if __name__ == '__main__':

    base_name = 'test/treetime_examples/data/h3n2_na/h3n2_na_20'

    dates = parse_dates(base_name + '.metadata.csv')
    tt = TreeTime(gtr='Jukes-Cantor',
                  tree=base_name + '.nwk',
                  aln=base_name + '.fasta',
                  verbose=3,
                  dates=dates,
                  debug=True)

    # rerooting can be done along with the tree time inference
    tt.run(root="best",
           branch_length_mode='input',
           max_iter=2,
           time_marginal=True)

    # initialize date constraints and branch length interpolators
    # this called in each iteration. 44ms
    tt.init_date_constraints()

    ###########################################################
    # joint inference of node times. done in every generation. 0.7s
Ejemplo n.º 8
0
    #from treetime.utils import numeric_date
    with open(base_name + '.csv') as date_file:
        dates = {}
        for line in date_file:
            if line[0] == '#':
                continue
            try:
                name, date = line.strip().split(',')
                dates[name] = float(date)
            except:
                continue

    # instantiate treetime
    ebola = TreeTime(gtr='Jukes-Cantor',
                     tree=base_name + '.nwk',
                     aln=base_name + '.fasta',
                     verbose=4,
                     dates=dates)

    # infer an ebola time tree while rerooting and resolving polytomies
    ebola.run(root='best',
              relaxed_clock=False,
              max_iter=2,
              resolve_polytomies=True,
              Tc='skyline',
              time_marginal="assign")

    # get Skyline and 2-sigma confidence intervals
    skyline, confidence = ebola.merger_model.skyline_inferred(gen=50,
                                                              confidence=2.0)
Ejemplo n.º 9
0
def refine(tree=None,
           aln=None,
           ref=None,
           dates=None,
           branch_length_inference='auto',
           confidence=False,
           resolve_polytomies=True,
           max_iter=2,
           precision='auto',
           infer_gtr=True,
           Tc=0.01,
           reroot=None,
           use_marginal=False,
           fixed_pi=None,
           clock_rate=None,
           clock_std=None,
           clock_filter_iqd=None,
           verbosity=1,
           covariance=True,
           **kwarks):
    from treetime import TreeTime

    try:  #Tc could be a number or  'opt' or 'skyline'. TreeTime expects a float or int if a number.
        Tc = float(Tc)
    except ValueError:
        True  #let it remain a string

    if (ref is not None) and (fixed_pi is None):  #if VCF, fix pi
        #Otherwise mutation TO gaps is overestimated b/c of seq length
        fixed_pi = [
            ref.count(base) / len(ref) for base in ['A', 'C', 'G', 'T', '-']
        ]
        if fixed_pi[-1] == 0:
            fixed_pi[-1] = 0.05
            fixed_pi = [v - 0.01 for v in fixed_pi]

    if ref is not None:  # VCF -> adjust branch length
        #set branch length mode explicitly if auto, as informative-site only
        #trees can have big branch lengths, making this set incorrectly in TreeTime
        if branch_length_inference == 'auto':
            branch_length_inference = 'joint'

    #send ref, if is None, does no harm
    tt = TreeTime(tree=tree,
                  aln=aln,
                  ref=ref,
                  dates=dates,
                  verbose=verbosity,
                  gtr='JC69',
                  precision=precision)

    # conditionally run clock-filter and remove bad tips
    if clock_filter_iqd:
        # treetime clock filter will mark, but not remove bad tips
        tt.clock_filter(reroot=reroot, n_iqd=clock_filter_iqd,
                        plot=False)  #use whatever was specified
        # remove them explicitly
        leaves = [x for x in tt.tree.get_terminals()]
        for n in leaves:
            if n.bad_branch:
                tt.tree.prune(n)
                print('pruning leaf ', n.name)
        # fix treetime set-up for new tree topology
        tt.prepare_tree()

    if confidence and use_marginal:
        # estimate confidence intervals via marginal ML and assign
        # marginal ML times to nodes
        marginal = 'assign'
    else:
        marginal = confidence

    # uncertainty of the the clock rate is relevant if confidence intervals are estimated
    if confidence and clock_std:
        vary_rate = clock_std  # if standard devivation of clock is specified, use that
    elif (clock_rate is None) and confidence and covariance:
        vary_rate = True  # if run in covariance mode, standard deviation can be estimated
    else:
        vary_rate = False  # otherwise, rate uncertainty will be ignored

    tt.run(infer_gtr=infer_gtr,
           root=reroot,
           Tc=Tc,
           time_marginal=marginal,
           branch_length_mode=branch_length_inference,
           resolve_polytomies=resolve_polytomies,
           max_iter=max_iter,
           fixed_pi=fixed_pi,
           fixed_clock_rate=clock_rate,
           vary_rate=vary_rate,
           use_covariation=covariance,
           **kwarks)

    if confidence:
        for n in tt.tree.find_clades():
            n.num_date_confidence = list(tt.get_max_posterior_region(n, 0.9))

    print(
        "\nInferred a time resolved phylogeny using TreeTime:"
        "\n\tSagulenko et al. TreeTime: Maximum-likelihood phylodynamic analysis"
        "\n\tVirus Evolution, vol 4, https://academic.oup.com/ve/article/4/1/vex042/4794731\n"
    )
    return tt
Ejemplo n.º 10
0
def estimate_clock_model(params):
    """
    implementing treetime clock
    """

    if assure_tree(params, tmp_dir='clock_model_tmp'):
        return 1
    dates = utils.parse_dates(params.dates, date_col=params.date_column, name_col=params.name_column)
    if len(dates)==0:
        return 1

    outdir = get_outdir(params, '_clock')

    ###########################################################################
    ### READ IN VCF
    ###########################################################################
    #sets ref and fixed_pi to None if not VCF
    aln, ref, fixed_pi = read_if_vcf(params)
    is_vcf = True if ref is not None else False

    ###########################################################################
    ### ESTIMATE ROOT (if requested) AND DETERMINE TEMPORAL SIGNAL
    ###########################################################################
    if params.aln is None and params.sequence_length is None:
        print("one of arguments '--aln' and '--sequence-length' is required.", file=sys.stderr)
        return 1

    basename = get_basename(params, outdir)
    try:
        myTree = TreeTime(dates=dates, tree=params.tree, aln=aln, gtr='JC69',
                      verbose=params.verbose, seq_len=params.sequence_length,
                      ref=ref)
    except TreeTimeError as e:
        print("\nTreeTime setup failed. Please see above for error messages and/or rerun with --verbose 4\n")
        raise e

    myTree.tip_slack=params.tip_slack
    if params.clock_filter:
        n_bad = [n.name for n in myTree.tree.get_terminals() if n.bad_branch]
        myTree.clock_filter(n_iqd=params.clock_filter, reroot=params.reroot or 'least-squares')
        n_bad_after = [n.name for n in myTree.tree.get_terminals() if n.bad_branch]
        if len(n_bad_after)>len(n_bad):
            print("The following leaves don't follow a loose clock and "
                  "will be ignored in rate estimation:\n\t"
                  +"\n\t".join(set(n_bad_after).difference(n_bad)))

    if not params.keep_root:
        # reroot to optimal root, this assigns clock_model to myTree
        if params.covariation: # this requires branch length estimates
            myTree.run(root="least-squares", max_iter=0,
                       use_covariation=params.covariation)

        try:
            res = myTree.reroot(params.reroot,
                      force_positive=not params.allow_negative_rate)
        except TreeTimeError as e:
            print("ERROR: unknown root or rooting mechanism!")
            raise e

        myTree.get_clock_model(covariation=params.covariation)
    else:
        myTree.get_clock_model(covariation=params.covariation)

    d2d = utils.DateConversion.from_regression(myTree.clock_model)
    print('\n',d2d)
    print(fill('The R^2 value indicates the fraction of variation in'
          'root-to-tip distance explained by the sampling times.'
          'Higher values corresponds more clock-like behavior (max 1.0).')+'\n')

    print(fill('The rate is the slope of the best fit of the date to'
          'the root-to-tip distance and provides an estimate of'
          'the substitution rate. The rate needs to be positive!'
          'Negative rates suggest an inappropriate root.')+'\n')

    print('\nThe estimated rate and tree correspond to a root date:')
    if params.covariation:
        reg = myTree.clock_model
        dp = np.array([reg['intercept']/reg['slope']**2,-1./reg['slope']])
        droot = np.sqrt(reg['cov'][:2,:2].dot(dp).dot(dp))
        print('\n--- root-date:\t %3.2f +/- %1.2f (one std-dev)\n\n'%(-d2d.intercept/d2d.clock_rate, droot))
    else:
        print('\n--- root-date:\t %3.2f\n\n'%(-d2d.intercept/d2d.clock_rate))

    if not params.keep_root:
        # write rerooted tree to file
        outtree_name = basename+'rerooted.newick'
        Phylo.write(myTree.tree, outtree_name, 'newick')
        print("--- re-rooted tree written to \n\t%s\n"%outtree_name)

    table_fname = basename+'rtt.csv'
    with open(table_fname, 'w') as ofile:
        ofile.write("#name, date, root-to-tip distance\n")
        ofile.write("#Dates of nodes that didn't have a specified date are inferred from the root-to-tip regression.\n")
        for n in myTree.tree.get_terminals():
            if hasattr(n, "raw_date_constraint") and (n.raw_date_constraint is not None):
                if np.isscalar(n.raw_date_constraint):
                    tmp_str = str(n.raw_date_constraint)
                elif len(n.raw_date_constraint):
                    tmp_str = str(n.raw_date_constraint[0])+'-'+str(n.raw_date_constraint[1])
                else:
                    tmp_str = ''
                ofile.write("%s, %s, %f\n"%(n.name, tmp_str, n.dist2root))
            else:
                ofile.write("%s, %f, %f\n"%(n.name, d2d.numdate_from_dist2root(n.dist2root), n.dist2root))
        for n in myTree.tree.get_nonterminals(order='preorder'):
            ofile.write("%s, %f, %f\n"%(n.name, d2d.numdate_from_dist2root(n.dist2root), n.dist2root))
        print("--- wrote dates and root-to-tip distances to \n\t%s\n"%table_fname)


    ###########################################################################
    ### PLOT AND SAVE RESULT
    ###########################################################################
    plot_rtt(myTree, outdir+params.plot_rtt)
    return 0
Ejemplo n.º 11
0
def timetree(params):
    """
    implementeing treetime tree
    """
    if params.relax is None:
        relaxed_clock_params = None
    elif params.relax==[]:
        relaxed_clock_params=True
    elif len(params.relax)==2:
        relaxed_clock_params={'slack':params.relax[0], 'coupling':params.relax[1]}


    dates = utils.parse_dates(params.dates, date_col=params.date_column, name_col=params.name_column)
    if len(dates)==0:
        print("No valid dates -- exiting.")
        return 1

    if assure_tree(params, tmp_dir='timetree_tmp'):
        print("No tree -- exiting.")
        return 1

    outdir = get_outdir(params, '_treetime')

    gtr = create_gtr(params)
    infer_gtr = params.gtr=='infer'

    ###########################################################################
    ### READ IN VCF
    ###########################################################################
    #sets ref and fixed_pi to None if not VCF
    aln, ref, fixed_pi = read_if_vcf(params)
    is_vcf = True if ref is not None else False
    branch_length_mode = params.branch_length_mode
    #variable-site-only trees can have big branch lengths, the auto setting won't work.
    if is_vcf or (params.aln and params.sequence_length):
        if branch_length_mode == 'auto':
            branch_length_mode = 'joint'



    ###########################################################################
    ### SET-UP and RUN
    ###########################################################################
    if params.aln is None and params.sequence_length is None:
        print("one of arguments '--aln' and '--sequence-length' is required.", file=sys.stderr)
        return 1
    myTree = TreeTime(dates=dates, tree=params.tree, ref=ref,
                      aln=aln, gtr=gtr, seq_len=params.sequence_length,
                      verbose=params.verbose, fill_overhangs=not params.keep_overhangs)
    myTree.tip_slack=params.tip_slack
    if not myTree.one_mutation:
        print("TreeTime setup failed, exiting")
        return 1

    # coalescent model options
    try:
        coalescent = float(params.coalescent)
        if coalescent<10*myTree.one_mutation:
            coalescent = None
    except:
        if params.coalescent in ['opt', 'const', 'skyline']:
            coalescent = params.coalescent
        else:
            print("unknown coalescent model specification, has to be either "
                  "a float, 'opt', 'const' or 'skyline' -- exiting")
            return 1

    # determine whether confidence intervals are to be computed and how the
    # uncertainty in the rate estimate should be treated
    calc_confidence = params.confidence
    if params.clock_std_dev:
        vary_rate = params.clock_std_dev if calc_confidence else False
    elif params.confidence and params.covariation:
        vary_rate = True
    elif params.confidence:
        print(fill("Outside of covariation aware mode TreeTime cannot estimate confidence intervals "
                "without specified standard deviation of the clock rate.Please specify '--clock-std-dev' "
                "or rerun with '--covariation'. Will proceed without confidence estimation"))
        vary_rate = False
        calc_confidence = False
    else:
        vary_rate = False

    # RUN
    root = None if params.keep_root else params.reroot
    try:
        success = myTree.run(root=root, relaxed_clock=relaxed_clock_params,
               resolve_polytomies=(not params.keep_polytomies),
               Tc=coalescent, max_iter=params.max_iter,
               fixed_clock_rate=params.clock_rate,
               n_iqd=params.clock_filter,
               time_marginal="assign" if calc_confidence else False,
               vary_rate = vary_rate,
               branch_length_mode = branch_length_mode,
               reconstruct_tip_states=params.reconstruct_tip_states,
               fixed_pi=fixed_pi,
               use_covariation = params.covariation, n_points=params.n_skyline)
    except TreeTimeError as e:
        print("\nTreeTime run FAILED: please check above for errors and/or rerun with --verbose 4.\n")
        raise e

    ###########################################################################
    ### OUTPUT and saving of results
    ###########################################################################
    if infer_gtr:
        fname = outdir+'sequence_evolution_model.txt'
        with open(fname, 'w') as ofile:
            ofile.write(str(myTree.gtr)+'\n')
        print('\nInferred sequence evolution model (saved as %s):'%fname)
        print(myTree.gtr)

    fname = outdir+'molecular_clock.txt'
    with open(fname, 'w') as ofile:
        ofile.write(str(myTree.date2dist)+'\n')
    print('\nInferred sequence evolution model (saved as %s):'%fname)
    print(myTree.date2dist)

    basename = get_basename(params, outdir)
    if coalescent in ['skyline', 'opt', 'const']:
        print("Inferred coalescent model")
        if coalescent=='skyline':
            print_save_plot_skyline(myTree, plot=basename+'skyline.pdf', save=basename+'skyline.tsv', screen=True)
        else:
            Tc = myTree.merger_model.Tc.y[0]
            print(" --T_c: \t %1.2e \toptimized inverse merger rate in units of substitutions"%Tc)
            print(" --T_c: \t %1.2e \toptimized inverse merger rate in years"%(Tc/myTree.date2dist.clock_rate))
            print(" --N_e: \t %1.2e \tcorresponding 'effective population size' assuming 50 gen/year\n"%(Tc/myTree.date2dist.clock_rate*50))

    # plot
    import matplotlib.pyplot as plt
    from .treetime import plot_vs_years
    leaf_count = myTree.tree.count_terminals()
    label_func = lambda x: (x.name if x.is_terminal() and ((leaf_count<30
                                        and (not params.no_tip_labels))
                                      or params.tip_labels) else '')

    plot_vs_years(myTree, show_confidence=False, label_func=label_func,
                  confidence=0.9 if calc_confidence else None)
    tree_fname = (outdir + params.plot_tree)
    plt.savefig(tree_fname)
    print("--- saved tree as \n\t %s\n"%tree_fname)

    plot_rtt(myTree, outdir + params.plot_rtt)
    if params.relax:
        fname = outdir+'substitution_rates.tsv'
        print("--- wrote branch specific rates to\n\t %s\n"%fname)
        with open(fname, 'w') as fh:
            fh.write("#node\tclock_length\tmutation_length\trate\tfold_change\n")
            for n in myTree.tree.find_clades(order="preorder"):
                if n==myTree.tree.root:
                    continue
                g = n.branch_length_interpolator.gamma
                fh.write("%s\t%1.3e\t%1.3e\t%1.3e\t%1.2f\n"%(n.name, n.clock_length, n.mutation_length, myTree.date2dist.clock_rate*g, g))

    export_sequences_and_tree(myTree, basename, is_vcf, params.zero_based,
                              timetree=True, confidence=calc_confidence,
                              reconstruct_tip_states=params.reconstruct_tip_states)

    return 0
Ejemplo n.º 12
0
    # PARSING OPTIONS
    ###########################################################################
    try:
        Tc = float(params.Tc)
        if Tc<1e-5:
            Tc = None
    except:
        if params.Tc in ['opt', 'skyline']:
            Tc = params.Tc
        else:
            Tc = None

    ###########################################################################
    ### SET-UP and RUN
    ###########################################################################
    myTree = TreeTime(dates=dates, tree=params.tree,
                       aln=params.aln, gtr=gtr, verbose=params.verbose)
    myTree.run(root=params.reroot, relaxed_clock=params.relax,
               resolve_polytomies=(not params.keep_polytomies),
               Tc=Tc, max_iter=params.max_iter,
               branch_lengths = 'joint' if params.optimize_branch_length else 'input')

    ###########################################################################
    ### OUTPUT and saving of results
    ###########################################################################
    if infer_gtr:
        print('\nInferred GTR model:')
        print(myTree.gtr)

    print(myTree.date2dist)

    if Tc=='skyline':
Ejemplo n.º 13
0
try:
    import seaborn as sns
    sns.set_style('whitegrid')
except:
    print("Seaborn not found. Default style will be used for the plots")
plt.ion()

if __name__ == '__main__':

    base_name = 'data/H3N2_NA_allyears_NA.20'
    dates = read_dates(base_name)

    # instantiate treetime
    myTree = TreeTime(gtr='Jukes-Cantor',
                      tree=base_name + '.nwk',
                      aln=base_name + '.fasta',
                      verbose=4,
                      dates=dates,
                      debug=False)

    # RUN: this example uses a fixed clock rate of 0.003, resolves polytomes, and estimates
    # confidences via a final marginal reconstruction
    myTree.run(root='clock_filter',
               relaxed_clock=False,
               max_iter=2,
               fixed_clock_rate=0.003,
               resolve_polytomies=True,
               Tc=None,
               n_iqd=2,
               time_marginal=True)

    #################
Ejemplo n.º 14
0
    # load data and parse dates
    plt.ion()
    base_name = 'data/H3N2_NA_allyears_NA.20'
    dates = read_dates(base_name)

    # define a multiplicity for each node (using season here, could be anything)
    seq_multiplicity = {
        name: 5 * (np.cos(2 * np.pi * (dates[name] + 0.5)) + 1.2)
        for name in dates
    }

    # multiplicity is passed into treetime as a dictionary linking node name to count
    tt = TreeTime(gtr='Jukes-Cantor',
                  tree=base_name + '.nwk',
                  seq_multiplicity=seq_multiplicity,
                  aln=base_name + '.fasta',
                  verbose=1,
                  dates=dates)

    tt.reroot(root="best")
    fig, axs = plt.subplots(1, 2, figsize=(18, 9))
    axs[0].set_title("Tree rerooted by treetime", fontsize=18)
    axs[1].set_title("Optimal divergence-time relationship with weighed nodes",
                     fontsize=18)
    Phylo.draw(tt.tree,
               show_confidence=False,
               axes=axs[0],
               label_func=lambda x: x.name.split('|')[0]
               if x.is_terminal() else "")

    d = np.array([(n.numdate_given, n.dist2root, n.count)
Ejemplo n.º 15
0
def timetree(tree=None,
             aln=None,
             ref=None,
             dates=None,
             branch_length_mode='auto',
             confidence=False,
             resolve_polytomies=True,
             max_iter=2,
             infer_gtr=True,
             Tc=0.01,
             reroot=None,
             use_marginal=False,
             fixed_pi=None,
             clock_rate=None,
             n_iqd=None,
             verbosity=1,
             **kwarks):
    from treetime import TreeTime

    try:  #Tc could be a number or  'opt' or 'skyline'. TreeTime expects a float or int if a number.
        Tc = float(Tc)
    except ValueError:
        True  #let it remain a string

    if ref != None:  #if VCF, fix pi
        #Otherwise mutation TO gaps is overestimated b/c of seq length
        fixed_pi = [
            ref.count(base) / len(ref) for base in ['A', 'C', 'G', 'T', '-']
        ]
        if fixed_pi[-1] == 0:
            fixed_pi[-1] = 0.05
            fixed_pi = [v - 0.01 for v in fixed_pi]

        #set this explicitly if auto, as informative-site only trees can have big branch lengths,
        #making this set incorrectly in TreeTime
        if branch_length_mode == 'auto':
            branch_length_mode = 'joint'

    #send ref, if is None, does no harm
    tt = TreeTime(tree=tree,
                  aln=aln,
                  ref=ref,
                  dates=dates,
                  verbose=verbosity,
                  gtr='JC69')

    if confidence and use_marginal:
        # estimate confidence intervals via marginal ML and assign marginal ML times to nodes
        marginal = 'assign'
    else:
        marginal = confidence

    tt.run(infer_gtr=infer_gtr,
           root=reroot,
           Tc=Tc,
           time_marginal=marginal,
           branch_length_mode=branch_length_mode,
           resolve_polytomies=resolve_polytomies,
           max_iter=max_iter,
           fixed_pi=fixed_pi,
           fixed_clock_rate=clock_rate,
           n_iqd=n_iqd,
           **kwarks)

    if confidence:
        for n in tt.tree.find_clades():
            n.num_date_confidence = list(tt.get_max_posterior_region(n, 0.9))

    print(
        "\nInferred a time resolved phylogeny using TreeTime:"
        "\n\tSagulenko et al. TreeTime: Maximum-likelihood phylodynamic analysis"
        "\n\tVirus Evolution, vol 4, https://academic.oup.com/ve/article/4/1/vex042/4794731\n"
    )
    return tt
Ejemplo n.º 16
0
    ### FAKING ALIGMENT IF NONE GIVEN
    ###########################################################################
    if params.aln is None:
        from Bio import Seq, SeqRecord, Align
        aln = Align.MultipleSeqAlignment([
            SeqRecord.SeqRecord(Seq.Seq("AAA"), id=node, name=node)
            for node in dates
        ])

    ###########################################################################
    ### ESTIMATE ROOT (if requested) AND DETERMINE TEMPORAL SIGNAL
    ###########################################################################
    base_name = '.'.join(params.tree.split('/')[-1].split('.')[:-1])
    myTree = TreeTime(dates=dates,
                      tree=params.tree,
                      aln=aln,
                      gtr='JC69',
                      verbose=params.verbose)

    if not params.keep_root:
        myTree.reroot('best')

    d2d = DateConversion.from_tree(myTree.tree)
    print('\n', d2d)
    print('The R^2 value indicates the fraction of variation in'
          '\nroot-to-tip distance explained by the temporal sampling.'
          '\nHigher values corresponds more clock-like behavior (max 1.0).')

    print('\nThe rate is the slope of the best fit of the date to'
          '\nthe root-to-tip distance and provides an estimate of'
          '\nthe substitution rate. The rate needs to be positive!'
Ejemplo n.º 17
0
def timetree(params):
    """
    implementeing treetime tree
    """
    if params.relax == []:
        params.relax = True

    dates = utils.parse_dates(params.dates)
    if len(dates) == 0:
        return 1

    if assure_tree(params, tmp_dir='timetree_tmp'):
        return 1

    outdir = get_outdir(params, '_treetime')

    gtr = create_gtr(params)
    infer_gtr = params.gtr == 'infer'

    ###########################################################################
    ### READ IN VCF
    ###########################################################################
    #sets ref and fixed_pi to None if not VCF
    aln, ref, fixed_pi = read_if_vcf(params)
    is_vcf = True if ref is not None else False
    branch_length_mode = params.branch_length_mode
    if is_vcf:  #variable-site-only trees can have big branch lengths, setting this wrong.
        if branch_length_mode == 'auto':
            branch_length_mode = 'joint'

    ###########################################################################
    ### SET-UP and RUN
    ###########################################################################
    if params.aln is None and params.sequence_length is None:
        print("one of arguments '--aln' and '--sequence-length' is required.",
              file=sys.stderr)
        return 1
    myTree = TreeTime(dates=dates,
                      tree=params.tree,
                      ref=ref,
                      aln=aln,
                      gtr=gtr,
                      seq_len=params.sequence_length,
                      verbose=params.verbose)

    # coalescent model options
    try:
        coalescent = float(params.coalescent)
        if coalescent < 10 * myTree.one_mutation:
            coalescent = None
    except:
        if params.coalescent in ['opt', 'const', 'skyline']:
            coalescent = params.coalescent
        else:
            print("unknown coalescent model specification, has to be either "
                  "a float, 'opt', 'const' or 'skyline'")
            coalescent = None

    vary_rate = params.confidence
    if params.clock_std_dev and params.clock_rate:
        vary_rate = params.clock_std_dev

    root = None if params.keep_root else params.reroot
    success = myTree.run(
        root=root,
        relaxed_clock=params.relax,
        resolve_polytomies=(not params.keep_polytomies),
        Tc=coalescent,
        max_iter=params.max_iter,
        fixed_clock_rate=params.clock_rate,
        n_iqd=params.clock_filter,
        time_marginal="assign" if params.confidence else False,
        vary_rate=vary_rate,
        branch_length_mode=branch_length_mode,
        fixed_pi=fixed_pi)
    if success == ttconf.ERROR:  # if TreeTime.run failed, exit
        return 1

    ###########################################################################
    ### OUTPUT and saving of results
    ###########################################################################
    if infer_gtr:
        print('\nInferred GTR model:')
        print(myTree.gtr)

    print(myTree.date2dist)

    basename = get_basename(params, outdir)
    if coalescent in ['skyline', 'opt']:
        print("Inferred coalescent model")
    if coalescent == 'skyline':
        print_save_plot_skyline(myTree,
                                plot=basename + 'skyline.pdf',
                                save=basename + 'skyline.tsv',
                                screen=True)
    elif coalescent == 'opt':
        Tc = myTree.merger_model.Tc.y[0]
        print(" --T_c: \t %1.4f \toptimized inverse merger rate" % Tc)
        print(
            " --N_e: \t %1.1f \tcorresponding pop size assument 50 gen/year\n"
            % (Tc / myTree.date2dist.clock_rate * 50))

    # plot
    import matplotlib.pyplot as plt
    from .treetime import plot_vs_years
    leaf_count = myTree.tree.count_terminals()
    label_func = lambda x: x.name[:20] if (leaf_count < 30 & x.is_terminal()
                                           ) else ''

    plot_vs_years(myTree,
                  show_confidence=False,
                  label_func=label_func,
                  confidence=0.9 if params.confidence else None)
    tree_fname = (outdir + params.plot_tree)
    plt.savefig(tree_fname)
    print("--- saved tree as \n\t %s\n" % tree_fname)

    export_sequences_and_tree(myTree,
                              basename,
                              is_vcf,
                              params.zero_based,
                              timetree=True,
                              confidence=params.confidence)

    plot_rtt(myTree, outdir + params.plot_rtt)
    return 0