tree=base_name + ".nwk", aln=base_name + '.fasta', verbose=0) # infer a GTR model and ML ancestral sequences, either marginally or jointly most likely myTree.infer_ancestral_sequences(infer_gtr=True, marginal=True) # lets examine the properties of a node in the tree after ancestral inference node = myTree.tree.get_nonterminals()[7] # each node now has an inferred sequence if treetime.version < "0.7": print("\nthe inferred sequences is an array of characters:", node.sequence) else: print("\nthe inferred sequences is an array of characters:", myTree.sequence(node, as_string=False)) # in addition, each node of the tree now has an mutation object attached # note that the mutation numbering starts at 0 rather than 1 print("\nmutations on the branch leading to node %s:" % node.name, node.mutations) # we can readily verify these mutations by checking the inferred sequences if node.mutations: mut = node.mutations[0] if treetime.version < "0.7": print( "\nmutation %s%d%s corresponds to" % mut, "parent state: %s, child state %s\n\n" % (node.up.sequence[mut[1]], node.sequence[mut[1]])) else:
print("the following features are in the supplied as genes but not the annotation:", set(args.genes).difference(features.keys())) T = Phylo.read(args.tree, 'newick') leafs = {n.name for n in T.get_terminals()} node_data = {} root_sequence_translations = {} for gene, translation in zip(genes, translations): seqs = [] for s in SeqIO.parse(translation, 'fasta'): if s.id in leafs: seqs.append(s) tt = TreeAnc(tree=T, aln=MultipleSeqAlignment(seqs), alphabet='aa') tt.infer_ancestral_sequences(reconstruct_tip_states=True) root_sequence_translations[gene] = tt.sequence(tt.tree.root, as_string=True, reconstructed=True) with open(translation.replace('.fasta', '_withInternalNodes.fasta'), 'w') as fh: for n in tt.tree.find_clades(): if n.name not in node_data: node_data[n.name] = {"aa_muts":{}} if len(n.mutations): node_data[n.name]["aa_muts"][gene] = [f"{a}{p+1}{d}" for a,p,d in n.mutations] fh.write(f">{n.name}\n{tt.sequence(n, as_string=True, reconstructed=True)}\n") annotations = annotation_json(features, ref) with open(args.output, 'w') as fh: json.dump({"nodes":node_data, "annotations":annotations, "reference":root_sequence_translations}, fh)