def open_tree(tree_file_path): """Opens tree (contree or treefile) and assigns support values to nodes in case of a standard tree file""" if 'contree' in tree_file_path: tree = PhyloTree(tree_file_path, sp_naming_function=None) elif 'treefile' in tree_file_path: # Branch supports in SH-aLRT support (%) / ultrafast bootstrap support (%) tree = PhyloTree(tree_file_path, sp_naming_function=None, format=1) for node in tree.iter_descendants(): if not node.is_leaf(): support_values = node.name.split('/') try: node.support = float(support_values[1]) except IndexError: # No support values when sequences were identical --> set support artifically to 100.0 node.support = 100.0 #node.add_features(shalrt = float(support_values[0])) # Not necessary... else: sys.exit('Error: tree format not recognised') return tree
t = PhyloTree('temp/queryCOG.hmmhits.fasta.aln.trimal.treefile') # try to root with largest paralog clade try: m = 0 for clade in t.get_monophyletic(values=["c", "b"], target_attr="annot"): if len([l for l in clade.get_leaves()]) > m: rooting_clade = clade m = len([l for l in clade.get_leaves()]) t.set_outgroup(clade) except: midpoint = t.get_midpoint_outgroup() if midpoint: t.set_outgroup(midpoint) annotate_leaf_proteins(t) # find the clade with the mapped sequence for l in t.iter_descendants(): if l.name == mapped_seq: clade = l n = 0 while n == 0: if clade.up: annotations = [ leaf.annot for sister in clade.get_sisters() for leaf in sister.get_leaves() ] counts = dict(Counter(annotations)) if 'c' not in counts.keys(): counts['c'] = 0 if ('a' in annotations) and ( counts['a'] / (counts['a'] + counts['c']) > 0.15): clade = clade.up