tree=base_name + ".nwk",
                     aln=base_name + '.fasta',
                     verbose=0)

    # infer a GTR model and ML ancestral sequences, either marginally or jointly most likely
    myTree.infer_ancestral_sequences(infer_gtr=True, marginal=True)

    # lets examine the properties of a node in the tree after ancestral inference
    node = myTree.tree.get_nonterminals()[7]
    # each node now has an inferred sequence
    if treetime.version < "0.7":
        print("\nthe inferred sequences is an array of characters:",
              node.sequence)
    else:
        print("\nthe inferred sequences is an array of characters:",
              myTree.sequence(node, as_string=False))

    # in addition, each node of the tree now has an mutation object attached
    # note that the mutation numbering starts at 0 rather than 1
    print("\nmutations on the branch leading to node %s:" % node.name,
          node.mutations)

    # we can readily verify these mutations by checking the inferred sequences
    if node.mutations:
        mut = node.mutations[0]
        if treetime.version < "0.7":
            print(
                "\nmutation %s%d%s corresponds to" % mut,
                "parent state: %s, child state %s\n\n" %
                (node.up.sequence[mut[1]], node.sequence[mut[1]]))
        else:
Ejemplo n.º 2
0
        print("the following features are in the supplied as genes but not the annotation:", set(args.genes).difference(features.keys()))

    T = Phylo.read(args.tree, 'newick')
    leafs = {n.name for n in T.get_terminals()}

    node_data = {}
    root_sequence_translations = {}
    for gene, translation in zip(genes, translations):
        seqs = []
        for s in SeqIO.parse(translation, 'fasta'):
            if s.id in leafs:
                seqs.append(s)


        tt = TreeAnc(tree=T, aln=MultipleSeqAlignment(seqs), alphabet='aa')

        tt.infer_ancestral_sequences(reconstruct_tip_states=True)
        root_sequence_translations[gene] = tt.sequence(tt.tree.root, as_string=True, reconstructed=True)

        with open(translation.replace('.fasta', '_withInternalNodes.fasta'), 'w') as fh:
            for n in tt.tree.find_clades():
                if n.name not in node_data:
                    node_data[n.name] = {"aa_muts":{}}
                if len(n.mutations):
                    node_data[n.name]["aa_muts"][gene] = [f"{a}{p+1}{d}" for a,p,d in n.mutations]
                fh.write(f">{n.name}\n{tt.sequence(n, as_string=True, reconstructed=True)}\n")

    annotations = annotation_json(features, ref)
    with open(args.output, 'w') as fh:
        json.dump({"nodes":node_data, "annotations":annotations, "reference":root_sequence_translations}, fh)