def gamma_deviation_from_clock(tree, shape, rate): tree1 = Tree(tree) for node in tree1.postorder_node_iter(): if node is tree1.seed_node: continue f = np.random.gamma(shape, scale=1 / shape) node.edge_length = node.edge_length * f * rate return tree1
def exp_deviation_from_clock(tree, rate): # Note: we force the distribution to have mean 1, so #there is no free parameter for an exponential distribution tree1 = Tree(tree) for node in tree1.postorder_node_iter(): if node is tree1.seed_node: continue f = np.random.exponential() node.edge_length = node.edge_length * f * rate return tree1
def load_from_dendropy(cls, tree: dendropy.Tree) -> 'PhyloDM': """Load a tree from a Dendropy tree object. Args: tree: The Dendropy tree object. """ pdm = cls() node_to_id = dict() for node in tree.postorder_node_iter(): if node.taxon and node.taxon.label: new_node_id = pdm.add_node(taxon=node.taxon.label) else: new_node_id = pdm.add_node() node_to_id[node] = new_node_id for node in tree.postorder_node_iter(): if node.parent_node is not None: pdm.add_edge(parent_id=node_to_id[node.parent_node], child_id=node_to_id[node], length=node.edge_length) return pdm
def lnorm_deviation_from_clock(tree, sd, rate): # Note: we force the distribution to have mean 1, so # there is only 1 parameter to control the lognormal distribution # sd here is the standard deviation of the lognormal distribution, # NOT its underlying normal distribution tree1 = Tree(tree) mu = -0.5 * log(sd * sd + 1) sigma = sqrt(log(sd * sd + 1)) for node in tree1.postorder_node_iter(): if node is tree1.seed_node: continue f = np.random.lognormal(mean=mu, sigma=sigma) node.edge_length = node.edge_length * f * rate return tree1
def get_super_tree(self, superTree_method, **args): def parse_trees(**args): n_tree, n_branch = float(len(self.data['trees'])), {} for mt_id, mt in enumerate(self.data['trees']): w = (float(len(mt.tre.leaf_nodes())) / len(self.data['taxa']))**2 for node in mt.tre.preorder_node_iter(): if node.barcode not in n_branch: n_branch[node.barcode] = [[w, mt_id, node]] else: n_branch[node.barcode].append([w, mt_id, node]) return n_tree, n_branch def consensus(self, **args): n_tree, n_branch = parse_trees(**args) n_branch = sorted([[len(v) / n_tree, k, v] for k, v in n_branch.iteritems()], reverse=True) consensus_tree = [] for posterior, branch, nodes in n_branch: for cbr, _, _ in consensus_tree: b1, b2 = sorted([branch, cbr]) if not (((b1 & b2) == b1) or ((b1 & (~b2)) == b1)): branch = 0 break if branch: consensus_tree.append([branch, posterior, nodes]) return sorted(consensus_tree, reverse=True) def MCC(self, **args): n_tree, n_branch = parse_trees(**args) for mt_id, mt in enumerate(self.data['trees']): if len(mt.tre.leaf_nodes()) == len(self.data['taxa']): mt.score = np.sum([ len(n_branch[node.barcode]) for node in mt.tre.preorder_node_iter() ]) tre = max(self.data['trees'], key=lambda x: x.score).tre return [[ n.barcode, len(n_branch[n.barcode]) / n_tree, n_branch[n.barcode] ] for n in tre.preorder_node_iter()] def load_subtree(self, treeLabel, **args): n_tree, n_branch = parse_trees(**args) for mt_id, mt in enumerate(self.data['trees']): if mt.tre.label == treeLabel: tre = mt.tre break return [[ n.barcode, len(n_branch[n.barcode]) / n_tree, n_branch[n.barcode], n.age, n.edge_length ] for n in tre.preorder_node_iter()] #def ASTRID(self, **args) : #from dendropy import PhylogeneticDistanceMatrix def load_tree(self, consFile=None, **args): n_tree, n_branch = parse_trees(**args) with open(consFile) as fin: schema = 'nexus' if fin.readline().upper().startswith( '#NEXUS') else 'newick' for tre in Tree.yield_from_files([consFile], schema=schema): break internal_id = n_taxa = len(self.data['taxa']) digit_code = np.power(2, np.arange(n_taxa, dtype='object')) for node in tre.postorder_node_iter(): if node.is_leaf(): node.id = self.data['taxa'][node.taxon.label] node.barcode = digit_code[node.id] else: node.id, internal_id = internal_id, internal_id + 1 node.barcode = sum([c.barcode for c in node.child_nodes()]) tre.seed_node.age = tre.seed_node.distance_from_tip() for node in tre.preorder_node_iter(): if node.parent_node: node.age = node.parent_node.age - node.edge_length return [[ n.barcode, len(n_branch.get(n.barcode, [])) / n_tree, n_branch.get(n.barcode, []), n.age, n.edge_length ] for n in tre.preorder_node_iter()] if superTree_method in ('MCC', 'ASTRID', 'consensus'): branches = locals()[superTree_method](self, **args) elif os.path.isfile(superTree_method): branches = load_tree(self, consFile=superTree_method, **args) else: branches = load_subtree(self, treeLabel=superTree_method, **args) supertree = Tree() sn = supertree.seed_node sn.barcode, sn.posterior = branches[0][0], branches[0][1] sn.age = branches[0][3] if len(branches[0]) > 3 else np.sum( [n[2].age * n[0] for n in branches[0][2]]) / np.sum([n[0] for n in branches[0][2]]) sn.contain = [[b[0], b[1], b[2].id] for b in branches[0][2]] for br in branches[1:]: cbr, posterior, nodes = br[:3] while (sn.barcode & cbr) != cbr: sn = sn.parent_node new_node = Node() if len(nodes) == 0 or ( not nodes[0][2].taxon) else Node(taxon=Taxon( label=nodes[0][2].taxon.label)) sn.add_child(new_node) sn = new_node sn.barcode, sn.posterior = cbr, posterior sn.contain = [[b[0], b[1], b[2].id] for b in nodes] if len(br) <= 3: sn.edge_length = 0.0 if len(nodes) == 0 else np.sum( [n[2].edge_length * n[0] for n in nodes]) / np.sum([n[0] for n in nodes]) sn.age = sn.parent_node.age if len(nodes) == 0 else np.sum( [n[2].age * n[0] for n in nodes]) / np.sum([n[0] for n in nodes]) else: sn.age, sn.edge_length = br[3:] internal_id = len(self.data['taxa']) for node in supertree.postorder_node_iter(): if node.is_leaf(): node.id = self.data['taxa'][node.taxon.label] else: node.id = internal_id internal_id += 1 return MetaTree(supertree)